1/* Induction variable optimizations.
2   Copyright (C) 2003-2015 Free Software Foundation, Inc.
3
4This file is part of GCC.
5
6GCC is free software; you can redistribute it and/or modify it
7under the terms of the GNU General Public License as published by the
8Free Software Foundation; either version 3, or (at your option) any
9later version.
10
11GCC is distributed in the hope that it will be useful, but WITHOUT
12ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14for more details.
15
16You should have received a copy of the GNU General Public License
17along with GCC; see the file COPYING3.  If not see
18<http://www.gnu.org/licenses/>.  */
19
20/* This pass tries to find the optimal set of induction variables for the loop.
21   It optimizes just the basic linear induction variables (although adding
22   support for other types should not be too hard).  It includes the
23   optimizations commonly known as strength reduction, induction variable
24   coalescing and induction variable elimination.  It does it in the
25   following steps:
26
27   1) The interesting uses of induction variables are found.  This includes
28
29      -- uses of induction variables in non-linear expressions
30      -- addresses of arrays
31      -- comparisons of induction variables
32
33   2) Candidates for the induction variables are found.  This includes
34
35      -- old induction variables
36      -- the variables defined by expressions derived from the "interesting
37	 uses" above
38
39   3) The optimal (w.r. to a cost function) set of variables is chosen.  The
40      cost function assigns a cost to sets of induction variables and consists
41      of three parts:
42
43      -- The use costs.  Each of the interesting uses chooses the best induction
44	 variable in the set and adds its cost to the sum.  The cost reflects
45	 the time spent on modifying the induction variables value to be usable
46	 for the given purpose (adding base and offset for arrays, etc.).
47      -- The variable costs.  Each of the variables has a cost assigned that
48	 reflects the costs associated with incrementing the value of the
49	 variable.  The original variables are somewhat preferred.
50      -- The set cost.  Depending on the size of the set, extra cost may be
51	 added to reflect register pressure.
52
53      All the costs are defined in a machine-specific way, using the target
54      hooks and machine descriptions to determine them.
55
56   4) The trees are transformed to use the new variables, the dead code is
57      removed.
58
59   All of this is done loop by loop.  Doing it globally is theoretically
60   possible, it might give a better performance and it might enable us
61   to decide costs more precisely, but getting all the interactions right
62   would be complicated.  */
63
64#include "config.h"
65#include "system.h"
66#include "coretypes.h"
67#include "tm.h"
68#include "hash-set.h"
69#include "machmode.h"
70#include "vec.h"
71#include "double-int.h"
72#include "input.h"
73#include "alias.h"
74#include "symtab.h"
75#include "wide-int.h"
76#include "inchash.h"
77#include "tree.h"
78#include "fold-const.h"
79#include "stor-layout.h"
80#include "tm_p.h"
81#include "predict.h"
82#include "hard-reg-set.h"
83#include "function.h"
84#include "dominance.h"
85#include "cfg.h"
86#include "basic-block.h"
87#include "gimple-pretty-print.h"
88#include "hash-map.h"
89#include "hash-table.h"
90#include "tree-ssa-alias.h"
91#include "internal-fn.h"
92#include "tree-eh.h"
93#include "gimple-expr.h"
94#include "is-a.h"
95#include "gimple.h"
96#include "gimplify.h"
97#include "gimple-iterator.h"
98#include "gimplify-me.h"
99#include "gimple-ssa.h"
100#include "plugin-api.h"
101#include "ipa-ref.h"
102#include "cgraph.h"
103#include "tree-cfg.h"
104#include "tree-phinodes.h"
105#include "ssa-iterators.h"
106#include "stringpool.h"
107#include "tree-ssanames.h"
108#include "tree-ssa-loop-ivopts.h"
109#include "tree-ssa-loop-manip.h"
110#include "tree-ssa-loop-niter.h"
111#include "tree-ssa-loop.h"
112#include "hashtab.h"
113#include "rtl.h"
114#include "flags.h"
115#include "statistics.h"
116#include "real.h"
117#include "fixed-value.h"
118#include "insn-config.h"
119#include "expmed.h"
120#include "dojump.h"
121#include "explow.h"
122#include "calls.h"
123#include "emit-rtl.h"
124#include "varasm.h"
125#include "stmt.h"
126#include "expr.h"
127#include "tree-dfa.h"
128#include "tree-ssa.h"
129#include "cfgloop.h"
130#include "tree-pass.h"
131#include "tree-chrec.h"
132#include "tree-scalar-evolution.h"
133#include "params.h"
134#include "langhooks.h"
135#include "tree-affine.h"
136#include "target.h"
137#include "tree-inline.h"
138#include "tree-ssa-propagate.h"
139#include "tree-ssa-address.h"
140#include "builtins.h"
141#include "tree-vectorizer.h"
142
143/* FIXME: Expressions are expanded to RTL in this pass to determine the
144   cost of different addressing modes.  This should be moved to a TBD
145   interface between the GIMPLE and RTL worlds.  */
146#include "recog.h"
147
148/* The infinite cost.  */
149#define INFTY 10000000
150
151#define AVG_LOOP_NITER(LOOP) 5
152
153/* Returns the expected number of loop iterations for LOOP.
154   The average trip count is computed from profile data if it
155   exists. */
156
157static inline HOST_WIDE_INT
158avg_loop_niter (struct loop *loop)
159{
160  HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
161  if (niter == -1)
162    return AVG_LOOP_NITER (loop);
163
164  return niter;
165}
166
167/* Representation of the induction variable.  */
168struct iv
169{
170  tree base;		/* Initial value of the iv.  */
171  tree base_object;	/* A memory object to that the induction variable points.  */
172  tree step;		/* Step of the iv (constant only).  */
173  tree ssa_name;	/* The ssa name with the value.  */
174  bool biv_p;		/* Is it a biv?  */
175  bool have_use_for;	/* Do we already have a use for it?  */
176  unsigned use_id;	/* The identifier in the use if it is the case.  */
177};
178
179/* Per-ssa version information (induction variable descriptions, etc.).  */
180struct version_info
181{
182  tree name;		/* The ssa name.  */
183  struct iv *iv;	/* Induction variable description.  */
184  bool has_nonlin_use;	/* For a loop-level invariant, whether it is used in
185			   an expression that is not an induction variable.  */
186  bool preserve_biv;	/* For the original biv, whether to preserve it.  */
187  unsigned inv_id;	/* Id of an invariant.  */
188};
189
190/* Types of uses.  */
191enum use_type
192{
193  USE_NONLINEAR_EXPR,	/* Use in a nonlinear expression.  */
194  USE_ADDRESS,		/* Use in an address.  */
195  USE_COMPARE		/* Use is a compare.  */
196};
197
198/* Cost of a computation.  */
199typedef struct
200{
201  int cost;		/* The runtime cost.  */
202  unsigned complexity;	/* The estimate of the complexity of the code for
203			   the computation (in no concrete units --
204			   complexity field should be larger for more
205			   complex expressions and addressing modes).  */
206} comp_cost;
207
208static const comp_cost no_cost = {0, 0};
209static const comp_cost infinite_cost = {INFTY, INFTY};
210
211/* The candidate - cost pair.  */
212struct cost_pair
213{
214  struct iv_cand *cand;	/* The candidate.  */
215  comp_cost cost;	/* The cost.  */
216  bitmap depends_on;	/* The list of invariants that have to be
217			   preserved.  */
218  tree value;		/* For final value elimination, the expression for
219			   the final value of the iv.  For iv elimination,
220			   the new bound to compare with.  */
221  enum tree_code comp;	/* For iv elimination, the comparison.  */
222  int inv_expr_id;      /* Loop invariant expression id.  */
223};
224
225/* Use.  */
226struct iv_use
227{
228  unsigned id;		/* The id of the use.  */
229  enum use_type type;	/* Type of the use.  */
230  struct iv *iv;	/* The induction variable it is based on.  */
231  gimple stmt;		/* Statement in that it occurs.  */
232  tree *op_p;		/* The place where it occurs.  */
233  bitmap related_cands;	/* The set of "related" iv candidates, plus the common
234			   important ones.  */
235
236  unsigned n_map_members; /* Number of candidates in the cost_map list.  */
237  struct cost_pair *cost_map;
238			/* The costs wrto the iv candidates.  */
239
240  struct iv_cand *selected;
241			/* The selected candidate.  */
242};
243
244/* The position where the iv is computed.  */
245enum iv_position
246{
247  IP_NORMAL,		/* At the end, just before the exit condition.  */
248  IP_END,		/* At the end of the latch block.  */
249  IP_BEFORE_USE,	/* Immediately before a specific use.  */
250  IP_AFTER_USE,		/* Immediately after a specific use.  */
251  IP_ORIGINAL		/* The original biv.  */
252};
253
254/* The induction variable candidate.  */
255struct iv_cand
256{
257  unsigned id;		/* The number of the candidate.  */
258  bool important;	/* Whether this is an "important" candidate, i.e. such
259			   that it should be considered by all uses.  */
260  ENUM_BITFIELD(iv_position) pos : 8;	/* Where it is computed.  */
261  gimple incremented_at;/* For original biv, the statement where it is
262			   incremented.  */
263  tree var_before;	/* The variable used for it before increment.  */
264  tree var_after;	/* The variable used for it after increment.  */
265  struct iv *iv;	/* The value of the candidate.  NULL for
266			   "pseudocandidate" used to indicate the possibility
267			   to replace the final value of an iv by direct
268			   computation of the value.  */
269  unsigned cost;	/* Cost of the candidate.  */
270  unsigned cost_step;	/* Cost of the candidate's increment operation.  */
271  struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
272			      where it is incremented.  */
273  bitmap depends_on;	/* The list of invariants that are used in step of the
274			   biv.  */
275};
276
277/* Loop invariant expression hashtable entry.  */
278struct iv_inv_expr_ent
279{
280  tree expr;
281  int id;
282  hashval_t hash;
283};
284
285/* The data used by the induction variable optimizations.  */
286
287typedef struct iv_use *iv_use_p;
288
289typedef struct iv_cand *iv_cand_p;
290
291/* Hashtable helpers.  */
292
293struct iv_inv_expr_hasher : typed_free_remove <iv_inv_expr_ent>
294{
295  typedef iv_inv_expr_ent value_type;
296  typedef iv_inv_expr_ent compare_type;
297  static inline hashval_t hash (const value_type *);
298  static inline bool equal (const value_type *, const compare_type *);
299};
300
301/* Hash function for loop invariant expressions.  */
302
303inline hashval_t
304iv_inv_expr_hasher::hash (const value_type *expr)
305{
306  return expr->hash;
307}
308
309/* Hash table equality function for expressions.  */
310
311inline bool
312iv_inv_expr_hasher::equal (const value_type *expr1, const compare_type *expr2)
313{
314  return expr1->hash == expr2->hash
315	 && operand_equal_p (expr1->expr, expr2->expr, 0);
316}
317
318struct ivopts_data
319{
320  /* The currently optimized loop.  */
321  struct loop *current_loop;
322  source_location loop_loc;
323
324  /* Numbers of iterations for all exits of the current loop.  */
325  hash_map<edge, tree_niter_desc *> *niters;
326
327  /* Number of registers used in it.  */
328  unsigned regs_used;
329
330  /* The size of version_info array allocated.  */
331  unsigned version_info_size;
332
333  /* The array of information for the ssa names.  */
334  struct version_info *version_info;
335
336  /* The hashtable of loop invariant expressions created
337     by ivopt.  */
338  hash_table<iv_inv_expr_hasher> *inv_expr_tab;
339
340  /* Loop invariant expression id.  */
341  int inv_expr_id;
342
343  /* The bitmap of indices in version_info whose value was changed.  */
344  bitmap relevant;
345
346  /* The uses of induction variables.  */
347  vec<iv_use_p> iv_uses;
348
349  /* The candidates.  */
350  vec<iv_cand_p> iv_candidates;
351
352  /* A bitmap of important candidates.  */
353  bitmap important_candidates;
354
355  /* Cache used by tree_to_aff_combination_expand.  */
356  hash_map<tree, name_expansion *> *name_expansion_cache;
357
358  /* The maximum invariant id.  */
359  unsigned max_inv_id;
360
361  /* Whether to consider just related and important candidates when replacing a
362     use.  */
363  bool consider_all_candidates;
364
365  /* Are we optimizing for speed?  */
366  bool speed;
367
368  /* Whether the loop body includes any function calls.  */
369  bool body_includes_call;
370
371  /* Whether the loop body can only be exited via single exit.  */
372  bool loop_single_exit_p;
373};
374
375/* An assignment of iv candidates to uses.  */
376
377struct iv_ca
378{
379  /* The number of uses covered by the assignment.  */
380  unsigned upto;
381
382  /* Number of uses that cannot be expressed by the candidates in the set.  */
383  unsigned bad_uses;
384
385  /* Candidate assigned to a use, together with the related costs.  */
386  struct cost_pair **cand_for_use;
387
388  /* Number of times each candidate is used.  */
389  unsigned *n_cand_uses;
390
391  /* The candidates used.  */
392  bitmap cands;
393
394  /* The number of candidates in the set.  */
395  unsigned n_cands;
396
397  /* Total number of registers needed.  */
398  unsigned n_regs;
399
400  /* Total cost of expressing uses.  */
401  comp_cost cand_use_cost;
402
403  /* Total cost of candidates.  */
404  unsigned cand_cost;
405
406  /* Number of times each invariant is used.  */
407  unsigned *n_invariant_uses;
408
409  /* The array holding the number of uses of each loop
410     invariant expressions created by ivopt.  */
411  unsigned *used_inv_expr;
412
413  /* The number of created loop invariants.  */
414  unsigned num_used_inv_expr;
415
416  /* Total cost of the assignment.  */
417  comp_cost cost;
418};
419
420/* Difference of two iv candidate assignments.  */
421
422struct iv_ca_delta
423{
424  /* Changed use.  */
425  struct iv_use *use;
426
427  /* An old assignment (for rollback purposes).  */
428  struct cost_pair *old_cp;
429
430  /* A new assignment.  */
431  struct cost_pair *new_cp;
432
433  /* Next change in the list.  */
434  struct iv_ca_delta *next_change;
435};
436
437/* Bound on number of candidates below that all candidates are considered.  */
438
439#define CONSIDER_ALL_CANDIDATES_BOUND \
440  ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
441
442/* If there are more iv occurrences, we just give up (it is quite unlikely that
443   optimizing such a loop would help, and it would take ages).  */
444
445#define MAX_CONSIDERED_USES \
446  ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
447
448/* If there are at most this number of ivs in the set, try removing unnecessary
449   ivs from the set always.  */
450
451#define ALWAYS_PRUNE_CAND_SET_BOUND \
452  ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
453
454/* The list of trees for that the decl_rtl field must be reset is stored
455   here.  */
456
457static vec<tree> decl_rtl_to_reset;
458
459static comp_cost force_expr_to_var_cost (tree, bool);
460
461/* Number of uses recorded in DATA.  */
462
463static inline unsigned
464n_iv_uses (struct ivopts_data *data)
465{
466  return data->iv_uses.length ();
467}
468
469/* Ith use recorded in DATA.  */
470
471static inline struct iv_use *
472iv_use (struct ivopts_data *data, unsigned i)
473{
474  return data->iv_uses[i];
475}
476
477/* Number of candidates recorded in DATA.  */
478
479static inline unsigned
480n_iv_cands (struct ivopts_data *data)
481{
482  return data->iv_candidates.length ();
483}
484
485/* Ith candidate recorded in DATA.  */
486
487static inline struct iv_cand *
488iv_cand (struct ivopts_data *data, unsigned i)
489{
490  return data->iv_candidates[i];
491}
492
493/* The single loop exit if it dominates the latch, NULL otherwise.  */
494
495edge
496single_dom_exit (struct loop *loop)
497{
498  edge exit = single_exit (loop);
499
500  if (!exit)
501    return NULL;
502
503  if (!just_once_each_iteration_p (loop, exit->src))
504    return NULL;
505
506  return exit;
507}
508
509/* Dumps information about the induction variable IV to FILE.  */
510
511void
512dump_iv (FILE *file, struct iv *iv)
513{
514  if (iv->ssa_name)
515    {
516      fprintf (file, "ssa name ");
517      print_generic_expr (file, iv->ssa_name, TDF_SLIM);
518      fprintf (file, "\n");
519    }
520
521  fprintf (file, "  type ");
522  print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
523  fprintf (file, "\n");
524
525  if (iv->step)
526    {
527      fprintf (file, "  base ");
528      print_generic_expr (file, iv->base, TDF_SLIM);
529      fprintf (file, "\n");
530
531      fprintf (file, "  step ");
532      print_generic_expr (file, iv->step, TDF_SLIM);
533      fprintf (file, "\n");
534    }
535  else
536    {
537      fprintf (file, "  invariant ");
538      print_generic_expr (file, iv->base, TDF_SLIM);
539      fprintf (file, "\n");
540    }
541
542  if (iv->base_object)
543    {
544      fprintf (file, "  base object ");
545      print_generic_expr (file, iv->base_object, TDF_SLIM);
546      fprintf (file, "\n");
547    }
548
549  if (iv->biv_p)
550    fprintf (file, "  is a biv\n");
551}
552
553/* Dumps information about the USE to FILE.  */
554
555void
556dump_use (FILE *file, struct iv_use *use)
557{
558  fprintf (file, "use %d\n", use->id);
559
560  switch (use->type)
561    {
562    case USE_NONLINEAR_EXPR:
563      fprintf (file, "  generic\n");
564      break;
565
566    case USE_ADDRESS:
567      fprintf (file, "  address\n");
568      break;
569
570    case USE_COMPARE:
571      fprintf (file, "  compare\n");
572      break;
573
574    default:
575      gcc_unreachable ();
576    }
577
578  fprintf (file, "  in statement ");
579  print_gimple_stmt (file, use->stmt, 0, 0);
580  fprintf (file, "\n");
581
582  fprintf (file, "  at position ");
583  if (use->op_p)
584    print_generic_expr (file, *use->op_p, TDF_SLIM);
585  fprintf (file, "\n");
586
587  dump_iv (file, use->iv);
588
589  if (use->related_cands)
590    {
591      fprintf (file, "  related candidates ");
592      dump_bitmap (file, use->related_cands);
593    }
594}
595
596/* Dumps information about the uses to FILE.  */
597
598void
599dump_uses (FILE *file, struct ivopts_data *data)
600{
601  unsigned i;
602  struct iv_use *use;
603
604  for (i = 0; i < n_iv_uses (data); i++)
605    {
606      use = iv_use (data, i);
607
608      dump_use (file, use);
609      fprintf (file, "\n");
610    }
611}
612
613/* Dumps information about induction variable candidate CAND to FILE.  */
614
615void
616dump_cand (FILE *file, struct iv_cand *cand)
617{
618  struct iv *iv = cand->iv;
619
620  fprintf (file, "candidate %d%s\n",
621	   cand->id, cand->important ? " (important)" : "");
622
623  if (cand->depends_on)
624    {
625      fprintf (file, "  depends on ");
626      dump_bitmap (file, cand->depends_on);
627    }
628
629  if (!iv)
630    {
631      fprintf (file, "  final value replacement\n");
632      return;
633    }
634
635  if (cand->var_before)
636    {
637      fprintf (file, "  var_before ");
638      print_generic_expr (file, cand->var_before, TDF_SLIM);
639      fprintf (file, "\n");
640    }
641  if (cand->var_after)
642    {
643      fprintf (file, "  var_after ");
644      print_generic_expr (file, cand->var_after, TDF_SLIM);
645      fprintf (file, "\n");
646    }
647
648  switch (cand->pos)
649    {
650    case IP_NORMAL:
651      fprintf (file, "  incremented before exit test\n");
652      break;
653
654    case IP_BEFORE_USE:
655      fprintf (file, "  incremented before use %d\n", cand->ainc_use->id);
656      break;
657
658    case IP_AFTER_USE:
659      fprintf (file, "  incremented after use %d\n", cand->ainc_use->id);
660      break;
661
662    case IP_END:
663      fprintf (file, "  incremented at end\n");
664      break;
665
666    case IP_ORIGINAL:
667      fprintf (file, "  original biv\n");
668      break;
669    }
670
671  dump_iv (file, iv);
672}
673
674/* Returns the info for ssa version VER.  */
675
676static inline struct version_info *
677ver_info (struct ivopts_data *data, unsigned ver)
678{
679  return data->version_info + ver;
680}
681
682/* Returns the info for ssa name NAME.  */
683
684static inline struct version_info *
685name_info (struct ivopts_data *data, tree name)
686{
687  return ver_info (data, SSA_NAME_VERSION (name));
688}
689
690/* Returns true if STMT is after the place where the IP_NORMAL ivs will be
691   emitted in LOOP.  */
692
693static bool
694stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
695{
696  basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
697
698  gcc_assert (bb);
699
700  if (sbb == loop->latch)
701    return true;
702
703  if (sbb != bb)
704    return false;
705
706  return stmt == last_stmt (bb);
707}
708
709/* Returns true if STMT if after the place where the original induction
710   variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
711   if the positions are identical.  */
712
713static bool
714stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
715{
716  basic_block cand_bb = gimple_bb (cand->incremented_at);
717  basic_block stmt_bb = gimple_bb (stmt);
718
719  if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
720    return false;
721
722  if (stmt_bb != cand_bb)
723    return true;
724
725  if (true_if_equal
726      && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
727    return true;
728  return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
729}
730
731/* Returns true if STMT if after the place where the induction variable
732   CAND is incremented in LOOP.  */
733
734static bool
735stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
736{
737  switch (cand->pos)
738    {
739    case IP_END:
740      return false;
741
742    case IP_NORMAL:
743      return stmt_after_ip_normal_pos (loop, stmt);
744
745    case IP_ORIGINAL:
746    case IP_AFTER_USE:
747      return stmt_after_inc_pos (cand, stmt, false);
748
749    case IP_BEFORE_USE:
750      return stmt_after_inc_pos (cand, stmt, true);
751
752    default:
753      gcc_unreachable ();
754    }
755}
756
757/* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
758
759static bool
760abnormal_ssa_name_p (tree exp)
761{
762  if (!exp)
763    return false;
764
765  if (TREE_CODE (exp) != SSA_NAME)
766    return false;
767
768  return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
769}
770
771/* Returns false if BASE or INDEX contains a ssa name that occurs in an
772   abnormal phi node.  Callback for for_each_index.  */
773
774static bool
775idx_contains_abnormal_ssa_name_p (tree base, tree *index,
776				  void *data ATTRIBUTE_UNUSED)
777{
778  if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
779    {
780      if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
781	return false;
782      if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
783	return false;
784    }
785
786  return !abnormal_ssa_name_p (*index);
787}
788
789/* Returns true if EXPR contains a ssa name that occurs in an
790   abnormal phi node.  */
791
792bool
793contains_abnormal_ssa_name_p (tree expr)
794{
795  enum tree_code code;
796  enum tree_code_class codeclass;
797
798  if (!expr)
799    return false;
800
801  code = TREE_CODE (expr);
802  codeclass = TREE_CODE_CLASS (code);
803
804  if (code == SSA_NAME)
805    return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
806
807  if (code == INTEGER_CST
808      || is_gimple_min_invariant (expr))
809    return false;
810
811  if (code == ADDR_EXPR)
812    return !for_each_index (&TREE_OPERAND (expr, 0),
813			    idx_contains_abnormal_ssa_name_p,
814			    NULL);
815
816  if (code == COND_EXPR)
817    return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
818      || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
819      || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
820
821  switch (codeclass)
822    {
823    case tcc_binary:
824    case tcc_comparison:
825      if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
826	return true;
827
828      /* Fallthru.  */
829    case tcc_unary:
830      if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
831	return true;
832
833      break;
834
835    default:
836      gcc_unreachable ();
837    }
838
839  return false;
840}
841
842/*  Returns the structure describing number of iterations determined from
843    EXIT of DATA->current_loop, or NULL if something goes wrong.  */
844
845static struct tree_niter_desc *
846niter_for_exit (struct ivopts_data *data, edge exit)
847{
848  struct tree_niter_desc *desc;
849  tree_niter_desc **slot;
850
851  if (!data->niters)
852    {
853      data->niters = new hash_map<edge, tree_niter_desc *>;
854      slot = NULL;
855    }
856  else
857    slot = data->niters->get (exit);
858
859  if (!slot)
860    {
861      /* Try to determine number of iterations.  We cannot safely work with ssa
862         names that appear in phi nodes on abnormal edges, so that we do not
863         create overlapping life ranges for them (PR 27283).  */
864      desc = XNEW (struct tree_niter_desc);
865      if (!number_of_iterations_exit (data->current_loop,
866				      exit, desc, true)
867     	  || contains_abnormal_ssa_name_p (desc->niter))
868	{
869	  XDELETE (desc);
870	  desc = NULL;
871	}
872      data->niters->put (exit, desc);
873    }
874  else
875    desc = *slot;
876
877  return desc;
878}
879
880/* Returns the structure describing number of iterations determined from
881   single dominating exit of DATA->current_loop, or NULL if something
882   goes wrong.  */
883
884static struct tree_niter_desc *
885niter_for_single_dom_exit (struct ivopts_data *data)
886{
887  edge exit = single_dom_exit (data->current_loop);
888
889  if (!exit)
890    return NULL;
891
892  return niter_for_exit (data, exit);
893}
894
895/* Initializes data structures used by the iv optimization pass, stored
896   in DATA.  */
897
898static void
899tree_ssa_iv_optimize_init (struct ivopts_data *data)
900{
901  data->version_info_size = 2 * num_ssa_names;
902  data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
903  data->relevant = BITMAP_ALLOC (NULL);
904  data->important_candidates = BITMAP_ALLOC (NULL);
905  data->max_inv_id = 0;
906  data->niters = NULL;
907  data->iv_uses.create (20);
908  data->iv_candidates.create (20);
909  data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
910  data->inv_expr_id = 0;
911  data->name_expansion_cache = NULL;
912  decl_rtl_to_reset.create (20);
913}
914
915/* Returns a memory object to that EXPR points.  In case we are able to
916   determine that it does not point to any such object, NULL is returned.  */
917
918static tree
919determine_base_object (tree expr)
920{
921  enum tree_code code = TREE_CODE (expr);
922  tree base, obj;
923
924  /* If this is a pointer casted to any type, we need to determine
925     the base object for the pointer; so handle conversions before
926     throwing away non-pointer expressions.  */
927  if (CONVERT_EXPR_P (expr))
928    return determine_base_object (TREE_OPERAND (expr, 0));
929
930  if (!POINTER_TYPE_P (TREE_TYPE (expr)))
931    return NULL_TREE;
932
933  switch (code)
934    {
935    case INTEGER_CST:
936      return NULL_TREE;
937
938    case ADDR_EXPR:
939      obj = TREE_OPERAND (expr, 0);
940      base = get_base_address (obj);
941
942      if (!base)
943	return expr;
944
945      if (TREE_CODE (base) == MEM_REF)
946	return determine_base_object (TREE_OPERAND (base, 0));
947
948      return fold_convert (ptr_type_node,
949		           build_fold_addr_expr (base));
950
951    case POINTER_PLUS_EXPR:
952      return determine_base_object (TREE_OPERAND (expr, 0));
953
954    case PLUS_EXPR:
955    case MINUS_EXPR:
956      /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
957      gcc_unreachable ();
958
959    default:
960      return fold_convert (ptr_type_node, expr);
961    }
962}
963
964/* Return true if address expression with non-DECL_P operand appears
965   in EXPR.  */
966
967static bool
968contain_complex_addr_expr (tree expr)
969{
970  bool res = false;
971
972  STRIP_NOPS (expr);
973  switch (TREE_CODE (expr))
974    {
975    case POINTER_PLUS_EXPR:
976    case PLUS_EXPR:
977    case MINUS_EXPR:
978      res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
979      res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
980      break;
981
982    case ADDR_EXPR:
983      return (!DECL_P (TREE_OPERAND (expr, 0)));
984
985    default:
986      return false;
987    }
988
989  return res;
990}
991
992/* Allocates an induction variable with given initial value BASE and step STEP
993   for loop LOOP.  */
994
995static struct iv *
996alloc_iv (tree base, tree step)
997{
998  tree expr = base;
999  struct iv *iv = XCNEW (struct iv);
1000  gcc_assert (step != NULL_TREE);
1001
1002  /* Lower address expression in base except ones with DECL_P as operand.
1003     By doing this:
1004       1) More accurate cost can be computed for address expressions;
1005       2) Duplicate candidates won't be created for bases in different
1006          forms, like &a[0] and &a.  */
1007  STRIP_NOPS (expr);
1008  if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1009      || contain_complex_addr_expr (expr))
1010    {
1011      aff_tree comb;
1012      tree_to_aff_combination (expr, TREE_TYPE (base), &comb);
1013      base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1014    }
1015
1016  iv->base = base;
1017  iv->base_object = determine_base_object (base);
1018  iv->step = step;
1019  iv->biv_p = false;
1020  iv->have_use_for = false;
1021  iv->use_id = 0;
1022  iv->ssa_name = NULL_TREE;
1023
1024  return iv;
1025}
1026
1027/* Sets STEP and BASE for induction variable IV.  */
1028
1029static void
1030set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
1031{
1032  struct version_info *info = name_info (data, iv);
1033
1034  gcc_assert (!info->iv);
1035
1036  bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1037  info->iv = alloc_iv (base, step);
1038  info->iv->ssa_name = iv;
1039}
1040
1041/* Finds induction variable declaration for VAR.  */
1042
1043static struct iv *
1044get_iv (struct ivopts_data *data, tree var)
1045{
1046  basic_block bb;
1047  tree type = TREE_TYPE (var);
1048
1049  if (!POINTER_TYPE_P (type)
1050      && !INTEGRAL_TYPE_P (type))
1051    return NULL;
1052
1053  if (!name_info (data, var)->iv)
1054    {
1055      bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1056
1057      if (!bb
1058	  || !flow_bb_inside_loop_p (data->current_loop, bb))
1059	set_iv (data, var, var, build_int_cst (type, 0));
1060    }
1061
1062  return name_info (data, var)->iv;
1063}
1064
1065/* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
1066   not define a simple affine biv with nonzero step.  */
1067
1068static tree
1069determine_biv_step (gphi *phi)
1070{
1071  struct loop *loop = gimple_bb (phi)->loop_father;
1072  tree name = PHI_RESULT (phi);
1073  affine_iv iv;
1074
1075  if (virtual_operand_p (name))
1076    return NULL_TREE;
1077
1078  if (!simple_iv (loop, loop, name, &iv, true))
1079    return NULL_TREE;
1080
1081  return integer_zerop (iv.step) ? NULL_TREE : iv.step;
1082}
1083
1084/* Return the first non-invariant ssa var found in EXPR.  */
1085
1086static tree
1087extract_single_var_from_expr (tree expr)
1088{
1089  int i, n;
1090  tree tmp;
1091  enum tree_code code;
1092
1093  if (!expr || is_gimple_min_invariant (expr))
1094    return NULL;
1095
1096  code = TREE_CODE (expr);
1097  if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1098    {
1099      n = TREE_OPERAND_LENGTH (expr);
1100      for (i = 0; i < n; i++)
1101	{
1102	  tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1103
1104	  if (tmp)
1105	    return tmp;
1106	}
1107    }
1108  return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1109}
1110
1111/* Finds basic ivs.  */
1112
1113static bool
1114find_bivs (struct ivopts_data *data)
1115{
1116  gphi *phi;
1117  tree step, type, base, stop;
1118  bool found = false;
1119  struct loop *loop = data->current_loop;
1120  gphi_iterator psi;
1121
1122  for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1123    {
1124      phi = psi.phi ();
1125
1126      if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1127	continue;
1128
1129      step = determine_biv_step (phi);
1130      if (!step)
1131	continue;
1132
1133      base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1134      /* Stop expanding iv base at the first ssa var referred by iv step.
1135	 Ideally we should stop at any ssa var, because that's expensive
1136	 and unusual to happen, we just do it on the first one.
1137
1138	 See PR64705 for the rationale.  */
1139      stop = extract_single_var_from_expr (step);
1140      base = expand_simple_operations (base, stop);
1141      if (contains_abnormal_ssa_name_p (base)
1142	  || contains_abnormal_ssa_name_p (step))
1143	continue;
1144
1145      type = TREE_TYPE (PHI_RESULT (phi));
1146      base = fold_convert (type, base);
1147      if (step)
1148	{
1149	  if (POINTER_TYPE_P (type))
1150	    step = convert_to_ptrofftype (step);
1151	  else
1152	    step = fold_convert (type, step);
1153	}
1154
1155      set_iv (data, PHI_RESULT (phi), base, step);
1156      found = true;
1157    }
1158
1159  return found;
1160}
1161
1162/* Marks basic ivs.  */
1163
1164static void
1165mark_bivs (struct ivopts_data *data)
1166{
1167  gphi *phi;
1168  gimple def;
1169  tree var;
1170  struct iv *iv, *incr_iv;
1171  struct loop *loop = data->current_loop;
1172  basic_block incr_bb;
1173  gphi_iterator psi;
1174
1175  for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1176    {
1177      phi = psi.phi ();
1178
1179      iv = get_iv (data, PHI_RESULT (phi));
1180      if (!iv)
1181	continue;
1182
1183      var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1184      def = SSA_NAME_DEF_STMT (var);
1185      /* Don't mark iv peeled from other one as biv.  */
1186      if (def
1187	  && gimple_code (def) == GIMPLE_PHI
1188	  && gimple_bb (def) == loop->header)
1189	continue;
1190
1191      incr_iv = get_iv (data, var);
1192      if (!incr_iv)
1193	continue;
1194
1195      /* If the increment is in the subloop, ignore it.  */
1196      incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1197      if (incr_bb->loop_father != data->current_loop
1198	  || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1199	continue;
1200
1201      iv->biv_p = true;
1202      incr_iv->biv_p = true;
1203    }
1204}
1205
1206/* Checks whether STMT defines a linear induction variable and stores its
1207   parameters to IV.  */
1208
1209static bool
1210find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1211{
1212  tree lhs, stop;
1213  struct loop *loop = data->current_loop;
1214
1215  iv->base = NULL_TREE;
1216  iv->step = NULL_TREE;
1217
1218  if (gimple_code (stmt) != GIMPLE_ASSIGN)
1219    return false;
1220
1221  lhs = gimple_assign_lhs (stmt);
1222  if (TREE_CODE (lhs) != SSA_NAME)
1223    return false;
1224
1225  if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1226    return false;
1227
1228  /* Stop expanding iv base at the first ssa var referred by iv step.
1229     Ideally we should stop at any ssa var, because that's expensive
1230     and unusual to happen, we just do it on the first one.
1231
1232     See PR64705 for the rationale.  */
1233  stop = extract_single_var_from_expr (iv->step);
1234  iv->base = expand_simple_operations (iv->base, stop);
1235  if (contains_abnormal_ssa_name_p (iv->base)
1236      || contains_abnormal_ssa_name_p (iv->step))
1237    return false;
1238
1239  /* If STMT could throw, then do not consider STMT as defining a GIV.
1240     While this will suppress optimizations, we can not safely delete this
1241     GIV and associated statements, even if it appears it is not used.  */
1242  if (stmt_could_throw_p (stmt))
1243    return false;
1244
1245  return true;
1246}
1247
1248/* Finds general ivs in statement STMT.  */
1249
1250static void
1251find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1252{
1253  affine_iv iv;
1254
1255  if (!find_givs_in_stmt_scev (data, stmt, &iv))
1256    return;
1257
1258  set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1259}
1260
1261/* Finds general ivs in basic block BB.  */
1262
1263static void
1264find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1265{
1266  gimple_stmt_iterator bsi;
1267
1268  for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1269    find_givs_in_stmt (data, gsi_stmt (bsi));
1270}
1271
1272/* Finds general ivs.  */
1273
1274static void
1275find_givs (struct ivopts_data *data)
1276{
1277  struct loop *loop = data->current_loop;
1278  basic_block *body = get_loop_body_in_dom_order (loop);
1279  unsigned i;
1280
1281  for (i = 0; i < loop->num_nodes; i++)
1282    find_givs_in_bb (data, body[i]);
1283  free (body);
1284}
1285
1286/* For each ssa name defined in LOOP determines whether it is an induction
1287   variable and if so, its initial value and step.  */
1288
1289static bool
1290find_induction_variables (struct ivopts_data *data)
1291{
1292  unsigned i;
1293  bitmap_iterator bi;
1294
1295  if (!find_bivs (data))
1296    return false;
1297
1298  find_givs (data);
1299  mark_bivs (data);
1300
1301  if (dump_file && (dump_flags & TDF_DETAILS))
1302    {
1303      struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1304
1305      if (niter)
1306	{
1307	  fprintf (dump_file, "  number of iterations ");
1308	  print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1309	  if (!integer_zerop (niter->may_be_zero))
1310	    {
1311	      fprintf (dump_file, "; zero if ");
1312	      print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1313	    }
1314	  fprintf (dump_file, "\n\n");
1315    	};
1316
1317      fprintf (dump_file, "Induction variables:\n\n");
1318
1319      EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1320	{
1321	  if (ver_info (data, i)->iv)
1322	    dump_iv (dump_file, ver_info (data, i)->iv);
1323	}
1324    }
1325
1326  return true;
1327}
1328
1329/* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1330
1331static struct iv_use *
1332record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1333	    gimple stmt, enum use_type use_type)
1334{
1335  struct iv_use *use = XCNEW (struct iv_use);
1336
1337  use->id = n_iv_uses (data);
1338  use->type = use_type;
1339  use->iv = iv;
1340  use->stmt = stmt;
1341  use->op_p = use_p;
1342  use->related_cands = BITMAP_ALLOC (NULL);
1343
1344  /* To avoid showing ssa name in the dumps, if it was not reset by the
1345     caller.  */
1346  iv->ssa_name = NULL_TREE;
1347
1348  if (dump_file && (dump_flags & TDF_DETAILS))
1349    dump_use (dump_file, use);
1350
1351  data->iv_uses.safe_push (use);
1352
1353  return use;
1354}
1355
1356/* Checks whether OP is a loop-level invariant and if so, records it.
1357   NONLINEAR_USE is true if the invariant is used in a way we do not
1358   handle specially.  */
1359
1360static void
1361record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1362{
1363  basic_block bb;
1364  struct version_info *info;
1365
1366  if (TREE_CODE (op) != SSA_NAME
1367      || virtual_operand_p (op))
1368    return;
1369
1370  bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1371  if (bb
1372      && flow_bb_inside_loop_p (data->current_loop, bb))
1373    return;
1374
1375  info = name_info (data, op);
1376  info->name = op;
1377  info->has_nonlin_use |= nonlinear_use;
1378  if (!info->inv_id)
1379    info->inv_id = ++data->max_inv_id;
1380  bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1381}
1382
1383/* Checks whether the use OP is interesting and if so, records it.  */
1384
1385static struct iv_use *
1386find_interesting_uses_op (struct ivopts_data *data, tree op)
1387{
1388  struct iv *iv;
1389  struct iv *civ;
1390  gimple stmt;
1391  struct iv_use *use;
1392
1393  if (TREE_CODE (op) != SSA_NAME)
1394    return NULL;
1395
1396  iv = get_iv (data, op);
1397  if (!iv)
1398    return NULL;
1399
1400  if (iv->have_use_for)
1401    {
1402      use = iv_use (data, iv->use_id);
1403
1404      gcc_assert (use->type == USE_NONLINEAR_EXPR);
1405      return use;
1406    }
1407
1408  if (integer_zerop (iv->step))
1409    {
1410      record_invariant (data, op, true);
1411      return NULL;
1412    }
1413  iv->have_use_for = true;
1414
1415  civ = XNEW (struct iv);
1416  *civ = *iv;
1417
1418  stmt = SSA_NAME_DEF_STMT (op);
1419  gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1420	      || is_gimple_assign (stmt));
1421
1422  use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1423  iv->use_id = use->id;
1424
1425  return use;
1426}
1427
1428/* Given a condition in statement STMT, checks whether it is a compare
1429   of an induction variable and an invariant.  If this is the case,
1430   CONTROL_VAR is set to location of the iv, BOUND to the location of
1431   the invariant, IV_VAR and IV_BOUND are set to the corresponding
1432   induction variable descriptions, and true is returned.  If this is not
1433   the case, CONTROL_VAR and BOUND are set to the arguments of the
1434   condition and false is returned.  */
1435
1436static bool
1437extract_cond_operands (struct ivopts_data *data, gimple stmt,
1438		       tree **control_var, tree **bound,
1439		       struct iv **iv_var, struct iv **iv_bound)
1440{
1441  /* The objects returned when COND has constant operands.  */
1442  static struct iv const_iv;
1443  static tree zero;
1444  tree *op0 = &zero, *op1 = &zero, *tmp_op;
1445  struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1446  bool ret = false;
1447
1448  if (gimple_code (stmt) == GIMPLE_COND)
1449    {
1450      gcond *cond_stmt = as_a <gcond *> (stmt);
1451      op0 = gimple_cond_lhs_ptr (cond_stmt);
1452      op1 = gimple_cond_rhs_ptr (cond_stmt);
1453    }
1454  else
1455    {
1456      op0 = gimple_assign_rhs1_ptr (stmt);
1457      op1 = gimple_assign_rhs2_ptr (stmt);
1458    }
1459
1460  zero = integer_zero_node;
1461  const_iv.step = integer_zero_node;
1462
1463  if (TREE_CODE (*op0) == SSA_NAME)
1464    iv0 = get_iv (data, *op0);
1465  if (TREE_CODE (*op1) == SSA_NAME)
1466    iv1 = get_iv (data, *op1);
1467
1468  /* Exactly one of the compared values must be an iv, and the other one must
1469     be an invariant.  */
1470  if (!iv0 || !iv1)
1471    goto end;
1472
1473  if (integer_zerop (iv0->step))
1474    {
1475      /* Control variable may be on the other side.  */
1476      tmp_op = op0; op0 = op1; op1 = tmp_op;
1477      tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1478    }
1479  ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1480
1481end:
1482  if (control_var)
1483    *control_var = op0;;
1484  if (iv_var)
1485    *iv_var = iv0;;
1486  if (bound)
1487    *bound = op1;
1488  if (iv_bound)
1489    *iv_bound = iv1;
1490
1491  return ret;
1492}
1493
1494/* Checks whether the condition in STMT is interesting and if so,
1495   records it.  */
1496
1497static void
1498find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1499{
1500  tree *var_p, *bound_p;
1501  struct iv *var_iv, *civ;
1502
1503  if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1504    {
1505      find_interesting_uses_op (data, *var_p);
1506      find_interesting_uses_op (data, *bound_p);
1507      return;
1508    }
1509
1510  civ = XNEW (struct iv);
1511  *civ = *var_iv;
1512  record_use (data, NULL, civ, stmt, USE_COMPARE);
1513}
1514
1515/* Returns the outermost loop EXPR is obviously invariant in
1516   relative to the loop LOOP, i.e. if all its operands are defined
1517   outside of the returned loop.  Returns NULL if EXPR is not
1518   even obviously invariant in LOOP.  */
1519
1520struct loop *
1521outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1522{
1523  basic_block def_bb;
1524  unsigned i, len;
1525
1526  if (is_gimple_min_invariant (expr))
1527    return current_loops->tree_root;
1528
1529  if (TREE_CODE (expr) == SSA_NAME)
1530    {
1531      def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1532      if (def_bb)
1533	{
1534	  if (flow_bb_inside_loop_p (loop, def_bb))
1535	    return NULL;
1536	  return superloop_at_depth (loop,
1537				     loop_depth (def_bb->loop_father) + 1);
1538	}
1539
1540      return current_loops->tree_root;
1541    }
1542
1543  if (!EXPR_P (expr))
1544    return NULL;
1545
1546  unsigned maxdepth = 0;
1547  len = TREE_OPERAND_LENGTH (expr);
1548  for (i = 0; i < len; i++)
1549    {
1550      struct loop *ivloop;
1551      if (!TREE_OPERAND (expr, i))
1552	continue;
1553
1554      ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1555      if (!ivloop)
1556	return NULL;
1557      maxdepth = MAX (maxdepth, loop_depth (ivloop));
1558    }
1559
1560  return superloop_at_depth (loop, maxdepth);
1561}
1562
1563/* Returns true if expression EXPR is obviously invariant in LOOP,
1564   i.e. if all its operands are defined outside of the LOOP.  LOOP
1565   should not be the function body.  */
1566
1567bool
1568expr_invariant_in_loop_p (struct loop *loop, tree expr)
1569{
1570  basic_block def_bb;
1571  unsigned i, len;
1572
1573  gcc_assert (loop_depth (loop) > 0);
1574
1575  if (is_gimple_min_invariant (expr))
1576    return true;
1577
1578  if (TREE_CODE (expr) == SSA_NAME)
1579    {
1580      def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1581      if (def_bb
1582	  && flow_bb_inside_loop_p (loop, def_bb))
1583	return false;
1584
1585      return true;
1586    }
1587
1588  if (!EXPR_P (expr))
1589    return false;
1590
1591  len = TREE_OPERAND_LENGTH (expr);
1592  for (i = 0; i < len; i++)
1593    if (TREE_OPERAND (expr, i)
1594	&& !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1595      return false;
1596
1597  return true;
1598}
1599
1600/* Cumulates the steps of indices into DATA and replaces their values with the
1601   initial ones.  Returns false when the value of the index cannot be determined.
1602   Callback for for_each_index.  */
1603
1604struct ifs_ivopts_data
1605{
1606  struct ivopts_data *ivopts_data;
1607  gimple stmt;
1608  tree step;
1609};
1610
1611static bool
1612idx_find_step (tree base, tree *idx, void *data)
1613{
1614  struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1615  struct iv *iv;
1616  tree step, iv_base, iv_step, lbound, off;
1617  struct loop *loop = dta->ivopts_data->current_loop;
1618
1619  /* If base is a component ref, require that the offset of the reference
1620     be invariant.  */
1621  if (TREE_CODE (base) == COMPONENT_REF)
1622    {
1623      off = component_ref_field_offset (base);
1624      return expr_invariant_in_loop_p (loop, off);
1625    }
1626
1627  /* If base is array, first check whether we will be able to move the
1628     reference out of the loop (in order to take its address in strength
1629     reduction).  In order for this to work we need both lower bound
1630     and step to be loop invariants.  */
1631  if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1632    {
1633      /* Moreover, for a range, the size needs to be invariant as well.  */
1634      if (TREE_CODE (base) == ARRAY_RANGE_REF
1635	  && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1636	return false;
1637
1638      step = array_ref_element_size (base);
1639      lbound = array_ref_low_bound (base);
1640
1641      if (!expr_invariant_in_loop_p (loop, step)
1642	  || !expr_invariant_in_loop_p (loop, lbound))
1643	return false;
1644    }
1645
1646  if (TREE_CODE (*idx) != SSA_NAME)
1647    return true;
1648
1649  iv = get_iv (dta->ivopts_data, *idx);
1650  if (!iv)
1651    return false;
1652
1653  /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1654	  *&x[0], which is not folded and does not trigger the
1655	  ARRAY_REF path below.  */
1656  *idx = iv->base;
1657
1658  if (integer_zerop (iv->step))
1659    return true;
1660
1661  if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1662    {
1663      step = array_ref_element_size (base);
1664
1665      /* We only handle addresses whose step is an integer constant.  */
1666      if (TREE_CODE (step) != INTEGER_CST)
1667	return false;
1668    }
1669  else
1670    /* The step for pointer arithmetics already is 1 byte.  */
1671    step = size_one_node;
1672
1673  iv_base = iv->base;
1674  iv_step = iv->step;
1675  if (!convert_affine_scev (dta->ivopts_data->current_loop,
1676			    sizetype, &iv_base, &iv_step, dta->stmt,
1677			    false))
1678    {
1679      /* The index might wrap.  */
1680      return false;
1681    }
1682
1683  step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1684  dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1685
1686  return true;
1687}
1688
1689/* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1690   object is passed to it in DATA.  */
1691
1692static bool
1693idx_record_use (tree base, tree *idx,
1694		void *vdata)
1695{
1696  struct ivopts_data *data = (struct ivopts_data *) vdata;
1697  find_interesting_uses_op (data, *idx);
1698  if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1699    {
1700      find_interesting_uses_op (data, array_ref_element_size (base));
1701      find_interesting_uses_op (data, array_ref_low_bound (base));
1702    }
1703  return true;
1704}
1705
1706/* If we can prove that TOP = cst * BOT for some constant cst,
1707   store cst to MUL and return true.  Otherwise return false.
1708   The returned value is always sign-extended, regardless of the
1709   signedness of TOP and BOT.  */
1710
1711static bool
1712constant_multiple_of (tree top, tree bot, widest_int *mul)
1713{
1714  tree mby;
1715  enum tree_code code;
1716  unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1717  widest_int res, p0, p1;
1718
1719  STRIP_NOPS (top);
1720  STRIP_NOPS (bot);
1721
1722  if (operand_equal_p (top, bot, 0))
1723    {
1724      *mul = 1;
1725      return true;
1726    }
1727
1728  code = TREE_CODE (top);
1729  switch (code)
1730    {
1731    case MULT_EXPR:
1732      mby = TREE_OPERAND (top, 1);
1733      if (TREE_CODE (mby) != INTEGER_CST)
1734	return false;
1735
1736      if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1737	return false;
1738
1739      *mul = wi::sext (res * wi::to_widest (mby), precision);
1740      return true;
1741
1742    case PLUS_EXPR:
1743    case MINUS_EXPR:
1744      if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1745	  || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1746	return false;
1747
1748      if (code == MINUS_EXPR)
1749	p1 = -p1;
1750      *mul = wi::sext (p0 + p1, precision);
1751      return true;
1752
1753    case INTEGER_CST:
1754      if (TREE_CODE (bot) != INTEGER_CST)
1755	return false;
1756
1757      p0 = widest_int::from (top, SIGNED);
1758      p1 = widest_int::from (bot, SIGNED);
1759      if (p1 == 0)
1760	return false;
1761      *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
1762      return res == 0;
1763
1764    default:
1765      return false;
1766    }
1767}
1768
1769/* Return true if memory reference REF with step STEP may be unaligned.  */
1770
1771static bool
1772may_be_unaligned_p (tree ref, tree step)
1773{
1774  /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1775     thus they are not misaligned.  */
1776  if (TREE_CODE (ref) == TARGET_MEM_REF)
1777    return false;
1778
1779  unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
1780  if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
1781    align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
1782
1783  unsigned HOST_WIDE_INT bitpos;
1784  unsigned int ref_align;
1785  get_object_alignment_1 (ref, &ref_align, &bitpos);
1786  if (ref_align < align
1787      || (bitpos % align) != 0
1788      || (bitpos % BITS_PER_UNIT) != 0)
1789    return true;
1790
1791  unsigned int trailing_zeros = tree_ctz (step);
1792  if (trailing_zeros < HOST_BITS_PER_INT
1793      && (1U << trailing_zeros) * BITS_PER_UNIT < align)
1794    return true;
1795
1796  return false;
1797}
1798
1799/* Return true if EXPR may be non-addressable.   */
1800
1801bool
1802may_be_nonaddressable_p (tree expr)
1803{
1804  switch (TREE_CODE (expr))
1805    {
1806    case TARGET_MEM_REF:
1807      /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1808	 target, thus they are always addressable.  */
1809      return false;
1810
1811    case COMPONENT_REF:
1812      return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1813	     || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1814
1815    case VIEW_CONVERT_EXPR:
1816      /* This kind of view-conversions may wrap non-addressable objects
1817	 and make them look addressable.  After some processing the
1818	 non-addressability may be uncovered again, causing ADDR_EXPRs
1819	 of inappropriate objects to be built.  */
1820      if (is_gimple_reg (TREE_OPERAND (expr, 0))
1821	  || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1822	return true;
1823
1824      /* ... fall through ... */
1825
1826    case ARRAY_REF:
1827    case ARRAY_RANGE_REF:
1828      return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1829
1830    CASE_CONVERT:
1831      return true;
1832
1833    default:
1834      break;
1835    }
1836
1837  return false;
1838}
1839
1840/* Finds addresses in *OP_P inside STMT.  */
1841
1842static void
1843find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1844{
1845  tree base = *op_p, step = size_zero_node;
1846  struct iv *civ;
1847  struct ifs_ivopts_data ifs_ivopts_data;
1848
1849  /* Do not play with volatile memory references.  A bit too conservative,
1850     perhaps, but safe.  */
1851  if (gimple_has_volatile_ops (stmt))
1852    goto fail;
1853
1854  /* Ignore bitfields for now.  Not really something terribly complicated
1855     to handle.  TODO.  */
1856  if (TREE_CODE (base) == BIT_FIELD_REF)
1857    goto fail;
1858
1859  base = unshare_expr (base);
1860
1861  if (TREE_CODE (base) == TARGET_MEM_REF)
1862    {
1863      tree type = build_pointer_type (TREE_TYPE (base));
1864      tree astep;
1865
1866      if (TMR_BASE (base)
1867	  && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1868	{
1869	  civ = get_iv (data, TMR_BASE (base));
1870	  if (!civ)
1871	    goto fail;
1872
1873	  TMR_BASE (base) = civ->base;
1874	  step = civ->step;
1875	}
1876      if (TMR_INDEX2 (base)
1877	  && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1878	{
1879	  civ = get_iv (data, TMR_INDEX2 (base));
1880	  if (!civ)
1881	    goto fail;
1882
1883	  TMR_INDEX2 (base) = civ->base;
1884	  step = civ->step;
1885	}
1886      if (TMR_INDEX (base)
1887	  && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1888	{
1889	  civ = get_iv (data, TMR_INDEX (base));
1890	  if (!civ)
1891	    goto fail;
1892
1893	  TMR_INDEX (base) = civ->base;
1894	  astep = civ->step;
1895
1896	  if (astep)
1897	    {
1898	      if (TMR_STEP (base))
1899		astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1900
1901	      step = fold_build2 (PLUS_EXPR, type, step, astep);
1902	    }
1903	}
1904
1905      if (integer_zerop (step))
1906	goto fail;
1907      base = tree_mem_ref_addr (type, base);
1908    }
1909  else
1910    {
1911      ifs_ivopts_data.ivopts_data = data;
1912      ifs_ivopts_data.stmt = stmt;
1913      ifs_ivopts_data.step = size_zero_node;
1914      if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1915	  || integer_zerop (ifs_ivopts_data.step))
1916	goto fail;
1917      step = ifs_ivopts_data.step;
1918
1919      /* Check that the base expression is addressable.  This needs
1920	 to be done after substituting bases of IVs into it.  */
1921      if (may_be_nonaddressable_p (base))
1922	goto fail;
1923
1924      /* Moreover, on strict alignment platforms, check that it is
1925	 sufficiently aligned.  */
1926      if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1927	goto fail;
1928
1929      base = build_fold_addr_expr (base);
1930
1931      /* Substituting bases of IVs into the base expression might
1932	 have caused folding opportunities.  */
1933      if (TREE_CODE (base) == ADDR_EXPR)
1934	{
1935	  tree *ref = &TREE_OPERAND (base, 0);
1936	  while (handled_component_p (*ref))
1937	    ref = &TREE_OPERAND (*ref, 0);
1938	  if (TREE_CODE (*ref) == MEM_REF)
1939	    {
1940	      tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1941				      TREE_OPERAND (*ref, 0),
1942				      TREE_OPERAND (*ref, 1));
1943	      if (tem)
1944		*ref = tem;
1945	    }
1946	}
1947    }
1948
1949  civ = alloc_iv (base, step);
1950  record_use (data, op_p, civ, stmt, USE_ADDRESS);
1951  return;
1952
1953fail:
1954  for_each_index (op_p, idx_record_use, data);
1955}
1956
1957/* Finds and records invariants used in STMT.  */
1958
1959static void
1960find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1961{
1962  ssa_op_iter iter;
1963  use_operand_p use_p;
1964  tree op;
1965
1966  FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1967    {
1968      op = USE_FROM_PTR (use_p);
1969      record_invariant (data, op, false);
1970    }
1971}
1972
1973/* Finds interesting uses of induction variables in the statement STMT.  */
1974
1975static void
1976find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1977{
1978  struct iv *iv;
1979  tree op, *lhs, *rhs;
1980  ssa_op_iter iter;
1981  use_operand_p use_p;
1982  enum tree_code code;
1983
1984  find_invariants_stmt (data, stmt);
1985
1986  if (gimple_code (stmt) == GIMPLE_COND)
1987    {
1988      find_interesting_uses_cond (data, stmt);
1989      return;
1990    }
1991
1992  if (is_gimple_assign (stmt))
1993    {
1994      lhs = gimple_assign_lhs_ptr (stmt);
1995      rhs = gimple_assign_rhs1_ptr (stmt);
1996
1997      if (TREE_CODE (*lhs) == SSA_NAME)
1998	{
1999	  /* If the statement defines an induction variable, the uses are not
2000	     interesting by themselves.  */
2001
2002	  iv = get_iv (data, *lhs);
2003
2004	  if (iv && !integer_zerop (iv->step))
2005	    return;
2006	}
2007
2008      code = gimple_assign_rhs_code (stmt);
2009      if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2010	  && (REFERENCE_CLASS_P (*rhs)
2011	      || is_gimple_val (*rhs)))
2012	{
2013	  if (REFERENCE_CLASS_P (*rhs))
2014	    find_interesting_uses_address (data, stmt, rhs);
2015	  else
2016	    find_interesting_uses_op (data, *rhs);
2017
2018	  if (REFERENCE_CLASS_P (*lhs))
2019	    find_interesting_uses_address (data, stmt, lhs);
2020	  return;
2021	}
2022      else if (TREE_CODE_CLASS (code) == tcc_comparison)
2023	{
2024	  find_interesting_uses_cond (data, stmt);
2025	  return;
2026	}
2027
2028      /* TODO -- we should also handle address uses of type
2029
2030	 memory = call (whatever);
2031
2032	 and
2033
2034	 call (memory).  */
2035    }
2036
2037  if (gimple_code (stmt) == GIMPLE_PHI
2038      && gimple_bb (stmt) == data->current_loop->header)
2039    {
2040      iv = get_iv (data, PHI_RESULT (stmt));
2041
2042      if (iv && !integer_zerop (iv->step))
2043	return;
2044    }
2045
2046  FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2047    {
2048      op = USE_FROM_PTR (use_p);
2049
2050      if (TREE_CODE (op) != SSA_NAME)
2051	continue;
2052
2053      iv = get_iv (data, op);
2054      if (!iv)
2055	continue;
2056
2057      find_interesting_uses_op (data, op);
2058    }
2059}
2060
2061/* Finds interesting uses of induction variables outside of loops
2062   on loop exit edge EXIT.  */
2063
2064static void
2065find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2066{
2067  gphi *phi;
2068  gphi_iterator psi;
2069  tree def;
2070
2071  for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2072    {
2073      phi = psi.phi ();
2074      def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2075      if (!virtual_operand_p (def))
2076        find_interesting_uses_op (data, def);
2077    }
2078}
2079
2080/* Finds uses of the induction variables that are interesting.  */
2081
2082static void
2083find_interesting_uses (struct ivopts_data *data)
2084{
2085  basic_block bb;
2086  gimple_stmt_iterator bsi;
2087  basic_block *body = get_loop_body (data->current_loop);
2088  unsigned i;
2089  struct version_info *info;
2090  edge e;
2091
2092  if (dump_file && (dump_flags & TDF_DETAILS))
2093    fprintf (dump_file, "Uses:\n\n");
2094
2095  for (i = 0; i < data->current_loop->num_nodes; i++)
2096    {
2097      edge_iterator ei;
2098      bb = body[i];
2099
2100      FOR_EACH_EDGE (e, ei, bb->succs)
2101	if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2102	    && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2103	  find_interesting_uses_outside (data, e);
2104
2105      for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2106	find_interesting_uses_stmt (data, gsi_stmt (bsi));
2107      for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2108	if (!is_gimple_debug (gsi_stmt (bsi)))
2109	  find_interesting_uses_stmt (data, gsi_stmt (bsi));
2110    }
2111
2112  if (dump_file && (dump_flags & TDF_DETAILS))
2113    {
2114      bitmap_iterator bi;
2115
2116      fprintf (dump_file, "\n");
2117
2118      EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2119	{
2120	  info = ver_info (data, i);
2121	  if (info->inv_id)
2122	    {
2123	      fprintf (dump_file, "  ");
2124	      print_generic_expr (dump_file, info->name, TDF_SLIM);
2125	      fprintf (dump_file, " is invariant (%d)%s\n",
2126		       info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2127	    }
2128	}
2129
2130      fprintf (dump_file, "\n");
2131    }
2132
2133  free (body);
2134}
2135
2136/* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2137   is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2138   we are at the top-level of the processed address.  */
2139
2140static tree
2141strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2142		HOST_WIDE_INT *offset)
2143{
2144  tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2145  enum tree_code code;
2146  tree type, orig_type = TREE_TYPE (expr);
2147  HOST_WIDE_INT off0, off1, st;
2148  tree orig_expr = expr;
2149
2150  STRIP_NOPS (expr);
2151
2152  type = TREE_TYPE (expr);
2153  code = TREE_CODE (expr);
2154  *offset = 0;
2155
2156  switch (code)
2157    {
2158    case INTEGER_CST:
2159      if (!cst_and_fits_in_hwi (expr)
2160	  || integer_zerop (expr))
2161	return orig_expr;
2162
2163      *offset = int_cst_value (expr);
2164      return build_int_cst (orig_type, 0);
2165
2166    case POINTER_PLUS_EXPR:
2167    case PLUS_EXPR:
2168    case MINUS_EXPR:
2169      op0 = TREE_OPERAND (expr, 0);
2170      op1 = TREE_OPERAND (expr, 1);
2171
2172      op0 = strip_offset_1 (op0, false, false, &off0);
2173      op1 = strip_offset_1 (op1, false, false, &off1);
2174
2175      *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2176      if (op0 == TREE_OPERAND (expr, 0)
2177	  && op1 == TREE_OPERAND (expr, 1))
2178	return orig_expr;
2179
2180      if (integer_zerop (op1))
2181	expr = op0;
2182      else if (integer_zerop (op0))
2183	{
2184	  if (code == MINUS_EXPR)
2185	    expr = fold_build1 (NEGATE_EXPR, type, op1);
2186	  else
2187	    expr = op1;
2188	}
2189      else
2190	expr = fold_build2 (code, type, op0, op1);
2191
2192      return fold_convert (orig_type, expr);
2193
2194    case MULT_EXPR:
2195      op1 = TREE_OPERAND (expr, 1);
2196      if (!cst_and_fits_in_hwi (op1))
2197	return orig_expr;
2198
2199      op0 = TREE_OPERAND (expr, 0);
2200      op0 = strip_offset_1 (op0, false, false, &off0);
2201      if (op0 == TREE_OPERAND (expr, 0))
2202	return orig_expr;
2203
2204      *offset = off0 * int_cst_value (op1);
2205      if (integer_zerop (op0))
2206	expr = op0;
2207      else
2208	expr = fold_build2 (MULT_EXPR, type, op0, op1);
2209
2210      return fold_convert (orig_type, expr);
2211
2212    case ARRAY_REF:
2213    case ARRAY_RANGE_REF:
2214      if (!inside_addr)
2215	return orig_expr;
2216
2217      step = array_ref_element_size (expr);
2218      if (!cst_and_fits_in_hwi (step))
2219	break;
2220
2221      st = int_cst_value (step);
2222      op1 = TREE_OPERAND (expr, 1);
2223      op1 = strip_offset_1 (op1, false, false, &off1);
2224      *offset = off1 * st;
2225
2226      if (top_compref
2227	  && integer_zerop (op1))
2228	{
2229	  /* Strip the component reference completely.  */
2230	  op0 = TREE_OPERAND (expr, 0);
2231	  op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2232	  *offset += off0;
2233	  return op0;
2234	}
2235      break;
2236
2237    case COMPONENT_REF:
2238      {
2239	tree field;
2240
2241	if (!inside_addr)
2242	  return orig_expr;
2243
2244	tmp = component_ref_field_offset (expr);
2245	field = TREE_OPERAND (expr, 1);
2246	if (top_compref
2247	    && cst_and_fits_in_hwi (tmp)
2248	    && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2249	  {
2250	    HOST_WIDE_INT boffset, abs_off;
2251
2252	    /* Strip the component reference completely.  */
2253	    op0 = TREE_OPERAND (expr, 0);
2254	    op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2255	    boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2256	    abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2257	    if (boffset < 0)
2258	      abs_off = -abs_off;
2259
2260	    *offset = off0 + int_cst_value (tmp) + abs_off;
2261	    return op0;
2262	  }
2263      }
2264      break;
2265
2266    case ADDR_EXPR:
2267      op0 = TREE_OPERAND (expr, 0);
2268      op0 = strip_offset_1 (op0, true, true, &off0);
2269      *offset += off0;
2270
2271      if (op0 == TREE_OPERAND (expr, 0))
2272	return orig_expr;
2273
2274      expr = build_fold_addr_expr (op0);
2275      return fold_convert (orig_type, expr);
2276
2277    case MEM_REF:
2278      /* ???  Offset operand?  */
2279      inside_addr = false;
2280      break;
2281
2282    default:
2283      return orig_expr;
2284    }
2285
2286  /* Default handling of expressions for that we want to recurse into
2287     the first operand.  */
2288  op0 = TREE_OPERAND (expr, 0);
2289  op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2290  *offset += off0;
2291
2292  if (op0 == TREE_OPERAND (expr, 0)
2293      && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2294    return orig_expr;
2295
2296  expr = copy_node (expr);
2297  TREE_OPERAND (expr, 0) = op0;
2298  if (op1)
2299    TREE_OPERAND (expr, 1) = op1;
2300
2301  /* Inside address, we might strip the top level component references,
2302     thus changing type of the expression.  Handling of ADDR_EXPR
2303     will fix that.  */
2304  expr = fold_convert (orig_type, expr);
2305
2306  return expr;
2307}
2308
2309/* Strips constant offsets from EXPR and stores them to OFFSET.  */
2310
2311static tree
2312strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2313{
2314  HOST_WIDE_INT off;
2315  tree core = strip_offset_1 (expr, false, false, &off);
2316  *offset = off;
2317  return core;
2318}
2319
2320/* Returns variant of TYPE that can be used as base for different uses.
2321   We return unsigned type with the same precision, which avoids problems
2322   with overflows.  */
2323
2324static tree
2325generic_type_for (tree type)
2326{
2327  if (POINTER_TYPE_P (type))
2328    return unsigned_type_for (type);
2329
2330  if (TYPE_UNSIGNED (type))
2331    return type;
2332
2333  return unsigned_type_for (type);
2334}
2335
2336/* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2337   the bitmap to that we should store it.  */
2338
2339static struct ivopts_data *fd_ivopts_data;
2340static tree
2341find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2342{
2343  bitmap *depends_on = (bitmap *) data;
2344  struct version_info *info;
2345
2346  if (TREE_CODE (*expr_p) != SSA_NAME)
2347    return NULL_TREE;
2348  info = name_info (fd_ivopts_data, *expr_p);
2349
2350  if (!info->inv_id || info->has_nonlin_use)
2351    return NULL_TREE;
2352
2353  if (!*depends_on)
2354    *depends_on = BITMAP_ALLOC (NULL);
2355  bitmap_set_bit (*depends_on, info->inv_id);
2356
2357  return NULL_TREE;
2358}
2359
2360/* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2361   position to POS.  If USE is not NULL, the candidate is set as related to
2362   it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2363   replacement of the final value of the iv by a direct computation.  */
2364
2365static struct iv_cand *
2366add_candidate_1 (struct ivopts_data *data,
2367		 tree base, tree step, bool important, enum iv_position pos,
2368		 struct iv_use *use, gimple incremented_at)
2369{
2370  unsigned i;
2371  struct iv_cand *cand = NULL;
2372  tree type, orig_type;
2373
2374  /* For non-original variables, make sure their values are computed in a type
2375     that does not invoke undefined behavior on overflows (since in general,
2376     we cannot prove that these induction variables are non-wrapping).  */
2377  if (pos != IP_ORIGINAL)
2378    {
2379      orig_type = TREE_TYPE (base);
2380      type = generic_type_for (orig_type);
2381      if (type != orig_type)
2382	{
2383	  base = fold_convert (type, base);
2384	  step = fold_convert (type, step);
2385	}
2386    }
2387
2388  for (i = 0; i < n_iv_cands (data); i++)
2389    {
2390      cand = iv_cand (data, i);
2391
2392      if (cand->pos != pos)
2393	continue;
2394
2395      if (cand->incremented_at != incremented_at
2396	  || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2397	      && cand->ainc_use != use))
2398	continue;
2399
2400      if (!cand->iv)
2401	{
2402	  if (!base && !step)
2403	    break;
2404
2405	  continue;
2406	}
2407
2408      if (!base && !step)
2409	continue;
2410
2411      if (operand_equal_p (base, cand->iv->base, 0)
2412	  && operand_equal_p (step, cand->iv->step, 0)
2413          && (TYPE_PRECISION (TREE_TYPE (base))
2414              == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2415	break;
2416    }
2417
2418  if (i == n_iv_cands (data))
2419    {
2420      cand = XCNEW (struct iv_cand);
2421      cand->id = i;
2422
2423      if (!base && !step)
2424	cand->iv = NULL;
2425      else
2426	cand->iv = alloc_iv (base, step);
2427
2428      cand->pos = pos;
2429      if (pos != IP_ORIGINAL && cand->iv)
2430	{
2431	  cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2432	  cand->var_after = cand->var_before;
2433	}
2434      cand->important = important;
2435      cand->incremented_at = incremented_at;
2436      data->iv_candidates.safe_push (cand);
2437
2438      if (step
2439	  && TREE_CODE (step) != INTEGER_CST)
2440	{
2441	  fd_ivopts_data = data;
2442	  walk_tree (&step, find_depends, &cand->depends_on, NULL);
2443	}
2444
2445      if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2446	cand->ainc_use = use;
2447      else
2448	cand->ainc_use = NULL;
2449
2450      if (dump_file && (dump_flags & TDF_DETAILS))
2451	dump_cand (dump_file, cand);
2452    }
2453
2454  if (important && !cand->important)
2455    {
2456      cand->important = true;
2457      if (dump_file && (dump_flags & TDF_DETAILS))
2458	fprintf (dump_file, "Candidate %d is important\n", cand->id);
2459    }
2460
2461  if (use)
2462    {
2463      bitmap_set_bit (use->related_cands, i);
2464      if (dump_file && (dump_flags & TDF_DETAILS))
2465	fprintf (dump_file, "Candidate %d is related to use %d\n",
2466		 cand->id, use->id);
2467    }
2468
2469  return cand;
2470}
2471
2472/* Returns true if incrementing the induction variable at the end of the LOOP
2473   is allowed.
2474
2475   The purpose is to avoid splitting latch edge with a biv increment, thus
2476   creating a jump, possibly confusing other optimization passes and leaving
2477   less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2478   is not available (so we do not have a better alternative), or if the latch
2479   edge is already nonempty.  */
2480
2481static bool
2482allow_ip_end_pos_p (struct loop *loop)
2483{
2484  if (!ip_normal_pos (loop))
2485    return true;
2486
2487  if (!empty_block_p (ip_end_pos (loop)))
2488    return true;
2489
2490  return false;
2491}
2492
2493/* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2494   Important field is set to IMPORTANT.  */
2495
2496static void
2497add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2498			bool important, struct iv_use *use)
2499{
2500  basic_block use_bb = gimple_bb (use->stmt);
2501  machine_mode mem_mode;
2502  unsigned HOST_WIDE_INT cstepi;
2503
2504  /* If we insert the increment in any position other than the standard
2505     ones, we must ensure that it is incremented once per iteration.
2506     It must not be in an inner nested loop, or one side of an if
2507     statement.  */
2508  if (use_bb->loop_father != data->current_loop
2509      || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2510      || stmt_could_throw_p (use->stmt)
2511      || !cst_and_fits_in_hwi (step))
2512    return;
2513
2514  cstepi = int_cst_value (step);
2515
2516  mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2517  if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2518	|| USE_STORE_PRE_INCREMENT (mem_mode))
2519       && GET_MODE_SIZE (mem_mode) == cstepi)
2520      || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2521	   || USE_STORE_PRE_DECREMENT (mem_mode))
2522	  && GET_MODE_SIZE (mem_mode) == -cstepi))
2523    {
2524      enum tree_code code = MINUS_EXPR;
2525      tree new_base;
2526      tree new_step = step;
2527
2528      if (POINTER_TYPE_P (TREE_TYPE (base)))
2529	{
2530	  new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2531	  code = POINTER_PLUS_EXPR;
2532	}
2533      else
2534	new_step = fold_convert (TREE_TYPE (base), new_step);
2535      new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2536      add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2537		       use->stmt);
2538    }
2539  if (((USE_LOAD_POST_INCREMENT (mem_mode)
2540	|| USE_STORE_POST_INCREMENT (mem_mode))
2541       && GET_MODE_SIZE (mem_mode) == cstepi)
2542      || ((USE_LOAD_POST_DECREMENT (mem_mode)
2543	   || USE_STORE_POST_DECREMENT (mem_mode))
2544	  && GET_MODE_SIZE (mem_mode) == -cstepi))
2545    {
2546      add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2547		       use->stmt);
2548    }
2549}
2550
2551/* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2552   position to POS.  If USE is not NULL, the candidate is set as related to
2553   it.  The candidate computation is scheduled on all available positions.  */
2554
2555static void
2556add_candidate (struct ivopts_data *data,
2557	       tree base, tree step, bool important, struct iv_use *use)
2558{
2559  if (ip_normal_pos (data->current_loop))
2560    add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2561  if (ip_end_pos (data->current_loop)
2562      && allow_ip_end_pos_p (data->current_loop))
2563    add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2564
2565  if (use != NULL && use->type == USE_ADDRESS)
2566    add_autoinc_candidates (data, base, step, important, use);
2567}
2568
2569/* Adds standard iv candidates.  */
2570
2571static void
2572add_standard_iv_candidates (struct ivopts_data *data)
2573{
2574  add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2575
2576  /* The same for a double-integer type if it is still fast enough.  */
2577  if (TYPE_PRECISION
2578        (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2579      && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2580    add_candidate (data, build_int_cst (long_integer_type_node, 0),
2581		   build_int_cst (long_integer_type_node, 1), true, NULL);
2582
2583  /* The same for a double-integer type if it is still fast enough.  */
2584  if (TYPE_PRECISION
2585        (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2586      && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2587    add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2588		   build_int_cst (long_long_integer_type_node, 1), true, NULL);
2589}
2590
2591
2592/* Adds candidates bases on the old induction variable IV.  */
2593
2594static void
2595add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2596{
2597  gimple phi;
2598  tree def;
2599  struct iv_cand *cand;
2600
2601  add_candidate (data, iv->base, iv->step, true, NULL);
2602
2603  /* The same, but with initial value zero.  */
2604  if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2605    add_candidate (data, size_int (0), iv->step, true, NULL);
2606  else
2607    add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2608		   iv->step, true, NULL);
2609
2610  phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2611  if (gimple_code (phi) == GIMPLE_PHI)
2612    {
2613      /* Additionally record the possibility of leaving the original iv
2614	 untouched.  */
2615      def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2616      /* Don't add candidate if it's from another PHI node because
2617	 it's an affine iv appearing in the form of PEELED_CHREC.  */
2618      phi = SSA_NAME_DEF_STMT (def);
2619      if (gimple_code (phi) != GIMPLE_PHI)
2620	{
2621	  cand = add_candidate_1 (data,
2622				  iv->base, iv->step, true, IP_ORIGINAL, NULL,
2623				  SSA_NAME_DEF_STMT (def));
2624	  cand->var_before = iv->ssa_name;
2625	  cand->var_after = def;
2626	}
2627      else
2628	gcc_assert (gimple_bb (phi) == data->current_loop->header);
2629    }
2630}
2631
2632/* Adds candidates based on the old induction variables.  */
2633
2634static void
2635add_old_ivs_candidates (struct ivopts_data *data)
2636{
2637  unsigned i;
2638  struct iv *iv;
2639  bitmap_iterator bi;
2640
2641  EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2642    {
2643      iv = ver_info (data, i)->iv;
2644      if (iv && iv->biv_p && !integer_zerop (iv->step))
2645	add_old_iv_candidates (data, iv);
2646    }
2647}
2648
2649/* Adds candidates based on the value of the induction variable IV and USE.  */
2650
2651static void
2652add_iv_value_candidates (struct ivopts_data *data,
2653			 struct iv *iv, struct iv_use *use)
2654{
2655  unsigned HOST_WIDE_INT offset;
2656  tree base;
2657  tree basetype;
2658
2659  add_candidate (data, iv->base, iv->step, false, use);
2660
2661  /* The same, but with initial value zero.  Make such variable important,
2662     since it is generic enough so that possibly many uses may be based
2663     on it.  */
2664  basetype = TREE_TYPE (iv->base);
2665  if (POINTER_TYPE_P (basetype))
2666    basetype = sizetype;
2667  add_candidate (data, build_int_cst (basetype, 0),
2668		 iv->step, true, use);
2669
2670  /* Third, try removing the constant offset.  Make sure to even
2671     add a candidate for &a[0] vs. (T *)&a.  */
2672  base = strip_offset (iv->base, &offset);
2673  if (offset
2674      || base != iv->base)
2675    add_candidate (data, base, iv->step, false, use);
2676}
2677
2678/* Adds candidates based on the uses.  */
2679
2680static void
2681add_derived_ivs_candidates (struct ivopts_data *data)
2682{
2683  unsigned i;
2684
2685  for (i = 0; i < n_iv_uses (data); i++)
2686    {
2687      struct iv_use *use = iv_use (data, i);
2688
2689      if (!use)
2690	continue;
2691
2692      switch (use->type)
2693	{
2694	case USE_NONLINEAR_EXPR:
2695	case USE_COMPARE:
2696	case USE_ADDRESS:
2697	  /* Just add the ivs based on the value of the iv used here.  */
2698	  add_iv_value_candidates (data, use->iv, use);
2699	  break;
2700
2701	default:
2702	  gcc_unreachable ();
2703	}
2704    }
2705}
2706
2707/* Record important candidates and add them to related_cands bitmaps
2708   if needed.  */
2709
2710static void
2711record_important_candidates (struct ivopts_data *data)
2712{
2713  unsigned i;
2714  struct iv_use *use;
2715
2716  for (i = 0; i < n_iv_cands (data); i++)
2717    {
2718      struct iv_cand *cand = iv_cand (data, i);
2719
2720      if (cand->important)
2721	bitmap_set_bit (data->important_candidates, i);
2722    }
2723
2724  data->consider_all_candidates = (n_iv_cands (data)
2725				   <= CONSIDER_ALL_CANDIDATES_BOUND);
2726
2727  if (data->consider_all_candidates)
2728    {
2729      /* We will not need "related_cands" bitmaps in this case,
2730	 so release them to decrease peak memory consumption.  */
2731      for (i = 0; i < n_iv_uses (data); i++)
2732	{
2733	  use = iv_use (data, i);
2734	  BITMAP_FREE (use->related_cands);
2735	}
2736    }
2737  else
2738    {
2739      /* Add important candidates to the related_cands bitmaps.  */
2740      for (i = 0; i < n_iv_uses (data); i++)
2741	bitmap_ior_into (iv_use (data, i)->related_cands,
2742			 data->important_candidates);
2743    }
2744}
2745
2746/* Allocates the data structure mapping the (use, candidate) pairs to costs.
2747   If consider_all_candidates is true, we use a two-dimensional array, otherwise
2748   we allocate a simple list to every use.  */
2749
2750static void
2751alloc_use_cost_map (struct ivopts_data *data)
2752{
2753  unsigned i, size, s;
2754
2755  for (i = 0; i < n_iv_uses (data); i++)
2756    {
2757      struct iv_use *use = iv_use (data, i);
2758
2759      if (data->consider_all_candidates)
2760	size = n_iv_cands (data);
2761      else
2762	{
2763	  s = bitmap_count_bits (use->related_cands);
2764
2765	  /* Round up to the power of two, so that moduling by it is fast.  */
2766	  size = s ? (1 << ceil_log2 (s)) : 1;
2767	}
2768
2769      use->n_map_members = size;
2770      use->cost_map = XCNEWVEC (struct cost_pair, size);
2771    }
2772}
2773
2774/* Returns description of computation cost of expression whose runtime
2775   cost is RUNTIME and complexity corresponds to COMPLEXITY.  */
2776
2777static comp_cost
2778new_cost (unsigned runtime, unsigned complexity)
2779{
2780  comp_cost cost;
2781
2782  cost.cost = runtime;
2783  cost.complexity = complexity;
2784
2785  return cost;
2786}
2787
2788/* Adds costs COST1 and COST2.  */
2789
2790static comp_cost
2791add_costs (comp_cost cost1, comp_cost cost2)
2792{
2793  cost1.cost += cost2.cost;
2794  cost1.complexity += cost2.complexity;
2795
2796  return cost1;
2797}
2798/* Subtracts costs COST1 and COST2.  */
2799
2800static comp_cost
2801sub_costs (comp_cost cost1, comp_cost cost2)
2802{
2803  cost1.cost -= cost2.cost;
2804  cost1.complexity -= cost2.complexity;
2805
2806  return cost1;
2807}
2808
2809/* Returns a negative number if COST1 < COST2, a positive number if
2810   COST1 > COST2, and 0 if COST1 = COST2.  */
2811
2812static int
2813compare_costs (comp_cost cost1, comp_cost cost2)
2814{
2815  if (cost1.cost == cost2.cost)
2816    return cost1.complexity - cost2.complexity;
2817
2818  return cost1.cost - cost2.cost;
2819}
2820
2821/* Returns true if COST is infinite.  */
2822
2823static bool
2824infinite_cost_p (comp_cost cost)
2825{
2826  return cost.cost == INFTY;
2827}
2828
2829/* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2830   on invariants DEPENDS_ON and that the value used in expressing it
2831   is VALUE, and in case of iv elimination the comparison operator is COMP.  */
2832
2833static void
2834set_use_iv_cost (struct ivopts_data *data,
2835		 struct iv_use *use, struct iv_cand *cand,
2836		 comp_cost cost, bitmap depends_on, tree value,
2837		 enum tree_code comp, int inv_expr_id)
2838{
2839  unsigned i, s;
2840
2841  if (infinite_cost_p (cost))
2842    {
2843      BITMAP_FREE (depends_on);
2844      return;
2845    }
2846
2847  if (data->consider_all_candidates)
2848    {
2849      use->cost_map[cand->id].cand = cand;
2850      use->cost_map[cand->id].cost = cost;
2851      use->cost_map[cand->id].depends_on = depends_on;
2852      use->cost_map[cand->id].value = value;
2853      use->cost_map[cand->id].comp = comp;
2854      use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2855      return;
2856    }
2857
2858  /* n_map_members is a power of two, so this computes modulo.  */
2859  s = cand->id & (use->n_map_members - 1);
2860  for (i = s; i < use->n_map_members; i++)
2861    if (!use->cost_map[i].cand)
2862      goto found;
2863  for (i = 0; i < s; i++)
2864    if (!use->cost_map[i].cand)
2865      goto found;
2866
2867  gcc_unreachable ();
2868
2869found:
2870  use->cost_map[i].cand = cand;
2871  use->cost_map[i].cost = cost;
2872  use->cost_map[i].depends_on = depends_on;
2873  use->cost_map[i].value = value;
2874  use->cost_map[i].comp = comp;
2875  use->cost_map[i].inv_expr_id = inv_expr_id;
2876}
2877
2878/* Gets cost of (USE, CANDIDATE) pair.  */
2879
2880static struct cost_pair *
2881get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2882		 struct iv_cand *cand)
2883{
2884  unsigned i, s;
2885  struct cost_pair *ret;
2886
2887  if (!cand)
2888    return NULL;
2889
2890  if (data->consider_all_candidates)
2891    {
2892      ret = use->cost_map + cand->id;
2893      if (!ret->cand)
2894	return NULL;
2895
2896      return ret;
2897    }
2898
2899  /* n_map_members is a power of two, so this computes modulo.  */
2900  s = cand->id & (use->n_map_members - 1);
2901  for (i = s; i < use->n_map_members; i++)
2902    if (use->cost_map[i].cand == cand)
2903      return use->cost_map + i;
2904    else if (use->cost_map[i].cand == NULL)
2905      return NULL;
2906  for (i = 0; i < s; i++)
2907    if (use->cost_map[i].cand == cand)
2908      return use->cost_map + i;
2909    else if (use->cost_map[i].cand == NULL)
2910      return NULL;
2911
2912  return NULL;
2913}
2914
2915/* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2916static rtx
2917produce_memory_decl_rtl (tree obj, int *regno)
2918{
2919  addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2920  machine_mode address_mode = targetm.addr_space.address_mode (as);
2921  rtx x;
2922
2923  gcc_assert (obj);
2924  if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2925    {
2926      const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2927      x = gen_rtx_SYMBOL_REF (address_mode, name);
2928      SET_SYMBOL_REF_DECL (x, obj);
2929      x = gen_rtx_MEM (DECL_MODE (obj), x);
2930      set_mem_addr_space (x, as);
2931      targetm.encode_section_info (obj, x, true);
2932    }
2933  else
2934    {
2935      x = gen_raw_REG (address_mode, (*regno)++);
2936      x = gen_rtx_MEM (DECL_MODE (obj), x);
2937      set_mem_addr_space (x, as);
2938    }
2939
2940  return x;
2941}
2942
2943/* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2944   walk_tree.  DATA contains the actual fake register number.  */
2945
2946static tree
2947prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2948{
2949  tree obj = NULL_TREE;
2950  rtx x = NULL_RTX;
2951  int *regno = (int *) data;
2952
2953  switch (TREE_CODE (*expr_p))
2954    {
2955    case ADDR_EXPR:
2956      for (expr_p = &TREE_OPERAND (*expr_p, 0);
2957	   handled_component_p (*expr_p);
2958	   expr_p = &TREE_OPERAND (*expr_p, 0))
2959	continue;
2960      obj = *expr_p;
2961      if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
2962        x = produce_memory_decl_rtl (obj, regno);
2963      break;
2964
2965    case SSA_NAME:
2966      *ws = 0;
2967      obj = SSA_NAME_VAR (*expr_p);
2968      /* Defer handling of anonymous SSA_NAMEs to the expander.  */
2969      if (!obj)
2970	return NULL_TREE;
2971      if (!DECL_RTL_SET_P (obj))
2972	x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2973      break;
2974
2975    case VAR_DECL:
2976    case PARM_DECL:
2977    case RESULT_DECL:
2978      *ws = 0;
2979      obj = *expr_p;
2980
2981      if (DECL_RTL_SET_P (obj))
2982	break;
2983
2984      if (DECL_MODE (obj) == BLKmode)
2985	x = produce_memory_decl_rtl (obj, regno);
2986      else
2987	x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2988
2989      break;
2990
2991    default:
2992      break;
2993    }
2994
2995  if (x)
2996    {
2997      decl_rtl_to_reset.safe_push (obj);
2998      SET_DECL_RTL (obj, x);
2999    }
3000
3001  return NULL_TREE;
3002}
3003
3004/* Determines cost of the computation of EXPR.  */
3005
3006static unsigned
3007computation_cost (tree expr, bool speed)
3008{
3009  rtx_insn *seq;
3010  rtx rslt;
3011  tree type = TREE_TYPE (expr);
3012  unsigned cost;
3013  /* Avoid using hard regs in ways which may be unsupported.  */
3014  int regno = LAST_VIRTUAL_REGISTER + 1;
3015  struct cgraph_node *node = cgraph_node::get (current_function_decl);
3016  enum node_frequency real_frequency = node->frequency;
3017
3018  node->frequency = NODE_FREQUENCY_NORMAL;
3019  crtl->maybe_hot_insn_p = speed;
3020  walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3021  start_sequence ();
3022  rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3023  seq = get_insns ();
3024  end_sequence ();
3025  default_rtl_profile ();
3026  node->frequency = real_frequency;
3027
3028  cost = seq_cost (seq, speed);
3029  if (MEM_P (rslt))
3030    cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3031			  TYPE_ADDR_SPACE (type), speed);
3032  else if (!REG_P (rslt))
3033    cost += set_src_cost (rslt, speed);
3034
3035  return cost;
3036}
3037
3038/* Returns variable containing the value of candidate CAND at statement AT.  */
3039
3040static tree
3041var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
3042{
3043  if (stmt_after_increment (loop, cand, stmt))
3044    return cand->var_after;
3045  else
3046    return cand->var_before;
3047}
3048
3049/* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3050   same precision that is at least as wide as the precision of TYPE, stores
3051   BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
3052   type of A and B.  */
3053
3054static tree
3055determine_common_wider_type (tree *a, tree *b)
3056{
3057  tree wider_type = NULL;
3058  tree suba, subb;
3059  tree atype = TREE_TYPE (*a);
3060
3061  if (CONVERT_EXPR_P (*a))
3062    {
3063      suba = TREE_OPERAND (*a, 0);
3064      wider_type = TREE_TYPE (suba);
3065      if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3066	return atype;
3067    }
3068  else
3069    return atype;
3070
3071  if (CONVERT_EXPR_P (*b))
3072    {
3073      subb = TREE_OPERAND (*b, 0);
3074      if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3075	return atype;
3076    }
3077  else
3078    return atype;
3079
3080  *a = suba;
3081  *b = subb;
3082  return wider_type;
3083}
3084
3085/* Determines the expression by that USE is expressed from induction variable
3086   CAND at statement AT in LOOP.  The expression is stored in a decomposed
3087   form into AFF.  Returns false if USE cannot be expressed using CAND.  */
3088
3089static bool
3090get_computation_aff (struct loop *loop,
3091		     struct iv_use *use, struct iv_cand *cand, gimple at,
3092		     struct aff_tree *aff)
3093{
3094  tree ubase = use->iv->base;
3095  tree ustep = use->iv->step;
3096  tree cbase = cand->iv->base;
3097  tree cstep = cand->iv->step, cstep_common;
3098  tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3099  tree common_type, var;
3100  tree uutype;
3101  aff_tree cbase_aff, var_aff;
3102  widest_int rat;
3103
3104  if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3105    {
3106      /* We do not have a precision to express the values of use.  */
3107      return false;
3108    }
3109
3110  var = var_at_stmt (loop, cand, at);
3111  uutype = unsigned_type_for (utype);
3112
3113  /* If the conversion is not noop, perform it.  */
3114  if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3115    {
3116      cstep = fold_convert (uutype, cstep);
3117      cbase = fold_convert (uutype, cbase);
3118      var = fold_convert (uutype, var);
3119    }
3120
3121  if (!constant_multiple_of (ustep, cstep, &rat))
3122    return false;
3123
3124  /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3125     type, we achieve better folding by computing their difference in this
3126     wider type, and cast the result to UUTYPE.  We do not need to worry about
3127     overflows, as all the arithmetics will in the end be performed in UUTYPE
3128     anyway.  */
3129  common_type = determine_common_wider_type (&ubase, &cbase);
3130
3131  /* use = ubase - ratio * cbase + ratio * var.  */
3132  tree_to_aff_combination (ubase, common_type, aff);
3133  tree_to_aff_combination (cbase, common_type, &cbase_aff);
3134  tree_to_aff_combination (var, uutype, &var_aff);
3135
3136  /* We need to shift the value if we are after the increment.  */
3137  if (stmt_after_increment (loop, cand, at))
3138    {
3139      aff_tree cstep_aff;
3140
3141      if (common_type != uutype)
3142	cstep_common = fold_convert (common_type, cstep);
3143      else
3144	cstep_common = cstep;
3145
3146      tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3147      aff_combination_add (&cbase_aff, &cstep_aff);
3148    }
3149
3150  aff_combination_scale (&cbase_aff, -rat);
3151  aff_combination_add (aff, &cbase_aff);
3152  if (common_type != uutype)
3153    aff_combination_convert (aff, uutype);
3154
3155  aff_combination_scale (&var_aff, rat);
3156  aff_combination_add (aff, &var_aff);
3157
3158  return true;
3159}
3160
3161/* Return the type of USE.  */
3162
3163static tree
3164get_use_type (struct iv_use *use)
3165{
3166  tree base_type = TREE_TYPE (use->iv->base);
3167  tree type;
3168
3169  if (use->type == USE_ADDRESS)
3170    {
3171      /* The base_type may be a void pointer.  Create a pointer type based on
3172	 the mem_ref instead.  */
3173      type = build_pointer_type (TREE_TYPE (*use->op_p));
3174      gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3175		  == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3176    }
3177  else
3178    type = base_type;
3179
3180  return type;
3181}
3182
3183/* Determines the expression by that USE is expressed from induction variable
3184   CAND at statement AT in LOOP.  The computation is unshared.  */
3185
3186static tree
3187get_computation_at (struct loop *loop,
3188		    struct iv_use *use, struct iv_cand *cand, gimple at)
3189{
3190  aff_tree aff;
3191  tree type = get_use_type (use);
3192
3193  if (!get_computation_aff (loop, use, cand, at, &aff))
3194    return NULL_TREE;
3195  unshare_aff_combination (&aff);
3196  return fold_convert (type, aff_combination_to_tree (&aff));
3197}
3198
3199/* Determines the expression by that USE is expressed from induction variable
3200   CAND in LOOP.  The computation is unshared.  */
3201
3202static tree
3203get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3204{
3205  return get_computation_at (loop, use, cand, use->stmt);
3206}
3207
3208/* Adjust the cost COST for being in loop setup rather than loop body.
3209   If we're optimizing for space, the loop setup overhead is constant;
3210   if we're optimizing for speed, amortize it over the per-iteration cost.  */
3211static unsigned
3212adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3213{
3214  if (cost == INFTY)
3215    return cost;
3216  else if (optimize_loop_for_speed_p (data->current_loop))
3217    return cost / avg_loop_niter (data->current_loop);
3218  else
3219    return cost;
3220}
3221
3222/* Returns true if multiplying by RATIO is allowed in an address.  Test the
3223   validity for a memory reference accessing memory of mode MODE in
3224   address space AS.  */
3225
3226
3227bool
3228multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode,
3229				 addr_space_t as)
3230{
3231#define MAX_RATIO 128
3232  unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3233  static vec<sbitmap> valid_mult_list;
3234  sbitmap valid_mult;
3235
3236  if (data_index >= valid_mult_list.length ())
3237    valid_mult_list.safe_grow_cleared (data_index + 1);
3238
3239  valid_mult = valid_mult_list[data_index];
3240  if (!valid_mult)
3241    {
3242      machine_mode address_mode = targetm.addr_space.address_mode (as);
3243      rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3244      rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3245      rtx addr, scaled;
3246      HOST_WIDE_INT i;
3247
3248      valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3249      bitmap_clear (valid_mult);
3250      scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3251      addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3252      for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3253	{
3254	  XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3255	  if (memory_address_addr_space_p (mode, addr, as)
3256	      || memory_address_addr_space_p (mode, scaled, as))
3257	    bitmap_set_bit (valid_mult, i + MAX_RATIO);
3258	}
3259
3260      if (dump_file && (dump_flags & TDF_DETAILS))
3261	{
3262	  fprintf (dump_file, "  allowed multipliers:");
3263	  for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3264	    if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3265	      fprintf (dump_file, " %d", (int) i);
3266	  fprintf (dump_file, "\n");
3267	  fprintf (dump_file, "\n");
3268	}
3269
3270      valid_mult_list[data_index] = valid_mult;
3271    }
3272
3273  if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3274    return false;
3275
3276  return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3277}
3278
3279/* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3280   If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3281   variable is omitted.  Compute the cost for a memory reference that accesses
3282   a memory location of mode MEM_MODE in address space AS.
3283
3284   MAY_AUTOINC is set to true if the autoincrement (increasing index by
3285   size of MEM_MODE / RATIO) is available.  To make this determination, we
3286   look at the size of the increment to be made, which is given in CSTEP.
3287   CSTEP may be zero if the step is unknown.
3288   STMT_AFTER_INC is true iff the statement we're looking at is after the
3289   increment of the original biv.
3290
3291   TODO -- there must be some better way.  This all is quite crude.  */
3292
3293enum ainc_type
3294{
3295  AINC_PRE_INC,		/* Pre increment.  */
3296  AINC_PRE_DEC,		/* Pre decrement.  */
3297  AINC_POST_INC,	/* Post increment.  */
3298  AINC_POST_DEC,	/* Post decrement.  */
3299  AINC_NONE		/* Also the number of auto increment types.  */
3300};
3301
3302typedef struct address_cost_data_s
3303{
3304  HOST_WIDE_INT min_offset, max_offset;
3305  unsigned costs[2][2][2][2];
3306  unsigned ainc_costs[AINC_NONE];
3307} *address_cost_data;
3308
3309
3310static comp_cost
3311get_address_cost (bool symbol_present, bool var_present,
3312		  unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3313		  HOST_WIDE_INT cstep, machine_mode mem_mode,
3314		  addr_space_t as, bool speed,
3315		  bool stmt_after_inc, bool *may_autoinc)
3316{
3317  machine_mode address_mode = targetm.addr_space.address_mode (as);
3318  static vec<address_cost_data> address_cost_data_list;
3319  unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3320  address_cost_data data;
3321  static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3322  static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3323  unsigned cost, acost, complexity;
3324  enum ainc_type autoinc_type;
3325  bool offset_p, ratio_p, autoinc;
3326  HOST_WIDE_INT s_offset, autoinc_offset, msize;
3327  unsigned HOST_WIDE_INT mask;
3328  unsigned bits;
3329
3330  if (data_index >= address_cost_data_list.length ())
3331    address_cost_data_list.safe_grow_cleared (data_index + 1);
3332
3333  data = address_cost_data_list[data_index];
3334  if (!data)
3335    {
3336      HOST_WIDE_INT i;
3337      HOST_WIDE_INT rat, off = 0;
3338      int old_cse_not_expected, width;
3339      unsigned sym_p, var_p, off_p, rat_p, add_c;
3340      rtx_insn *seq;
3341      rtx addr, base;
3342      rtx reg0, reg1;
3343
3344      data = (address_cost_data) xcalloc (1, sizeof (*data));
3345
3346      reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3347
3348      width = GET_MODE_BITSIZE (address_mode) - 1;
3349      if (width > (HOST_BITS_PER_WIDE_INT - 1))
3350	width = HOST_BITS_PER_WIDE_INT - 1;
3351      addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3352
3353      for (i = width; i >= 0; i--)
3354	{
3355	  off = -((unsigned HOST_WIDE_INT) 1 << i);
3356	  XEXP (addr, 1) = gen_int_mode (off, address_mode);
3357	  if (memory_address_addr_space_p (mem_mode, addr, as))
3358	    break;
3359	}
3360      data->min_offset = (i == -1? 0 : off);
3361
3362      for (i = width; i >= 0; i--)
3363	{
3364	  off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3365	  XEXP (addr, 1) = gen_int_mode (off, address_mode);
3366	  if (memory_address_addr_space_p (mem_mode, addr, as))
3367	    break;
3368	  /* For some strict-alignment targets, the offset must be naturally
3369	     aligned.  Try an aligned offset if mem_mode is not QImode.  */
3370	  off = mem_mode != QImode
3371		? ((unsigned HOST_WIDE_INT) 1 << i)
3372		    - GET_MODE_SIZE (mem_mode)
3373		: 0;
3374	  if (off > 0)
3375	    {
3376	      XEXP (addr, 1) = gen_int_mode (off, address_mode);
3377	      if (memory_address_addr_space_p (mem_mode, addr, as))
3378		break;
3379	    }
3380	}
3381      if (i == -1)
3382        off = 0;
3383      data->max_offset = off;
3384
3385      if (dump_file && (dump_flags & TDF_DETAILS))
3386	{
3387	  fprintf (dump_file, "get_address_cost:\n");
3388	  fprintf (dump_file, "  min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3389		   GET_MODE_NAME (mem_mode),
3390		   data->min_offset);
3391	  fprintf (dump_file, "  max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3392		   GET_MODE_NAME (mem_mode),
3393		   data->max_offset);
3394	}
3395
3396      rat = 1;
3397      for (i = 2; i <= MAX_RATIO; i++)
3398	if (multiplier_allowed_in_address_p (i, mem_mode, as))
3399	  {
3400	    rat = i;
3401	    break;
3402	  }
3403
3404      /* Compute the cost of various addressing modes.  */
3405      acost = 0;
3406      reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3407      reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3408
3409      if (USE_LOAD_PRE_DECREMENT (mem_mode)
3410	  || USE_STORE_PRE_DECREMENT (mem_mode))
3411	{
3412	  addr = gen_rtx_PRE_DEC (address_mode, reg0);
3413	  has_predec[mem_mode]
3414	    = memory_address_addr_space_p (mem_mode, addr, as);
3415
3416	  if (has_predec[mem_mode])
3417	    data->ainc_costs[AINC_PRE_DEC]
3418	      = address_cost (addr, mem_mode, as, speed);
3419	}
3420      if (USE_LOAD_POST_DECREMENT (mem_mode)
3421	  || USE_STORE_POST_DECREMENT (mem_mode))
3422	{
3423	  addr = gen_rtx_POST_DEC (address_mode, reg0);
3424	  has_postdec[mem_mode]
3425	    = memory_address_addr_space_p (mem_mode, addr, as);
3426
3427	  if (has_postdec[mem_mode])
3428	    data->ainc_costs[AINC_POST_DEC]
3429	      = address_cost (addr, mem_mode, as, speed);
3430	}
3431      if (USE_LOAD_PRE_INCREMENT (mem_mode)
3432	  || USE_STORE_PRE_DECREMENT (mem_mode))
3433	{
3434	  addr = gen_rtx_PRE_INC (address_mode, reg0);
3435	  has_preinc[mem_mode]
3436	    = memory_address_addr_space_p (mem_mode, addr, as);
3437
3438	  if (has_preinc[mem_mode])
3439	    data->ainc_costs[AINC_PRE_INC]
3440	      = address_cost (addr, mem_mode, as, speed);
3441	}
3442      if (USE_LOAD_POST_INCREMENT (mem_mode)
3443	  || USE_STORE_POST_INCREMENT (mem_mode))
3444	{
3445	  addr = gen_rtx_POST_INC (address_mode, reg0);
3446	  has_postinc[mem_mode]
3447	    = memory_address_addr_space_p (mem_mode, addr, as);
3448
3449	  if (has_postinc[mem_mode])
3450	    data->ainc_costs[AINC_POST_INC]
3451	      = address_cost (addr, mem_mode, as, speed);
3452	}
3453      for (i = 0; i < 16; i++)
3454	{
3455	  sym_p = i & 1;
3456	  var_p = (i >> 1) & 1;
3457	  off_p = (i >> 2) & 1;
3458	  rat_p = (i >> 3) & 1;
3459
3460	  addr = reg0;
3461	  if (rat_p)
3462	    addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3463				   gen_int_mode (rat, address_mode));
3464
3465	  if (var_p)
3466	    addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3467
3468	  if (sym_p)
3469	    {
3470	      base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3471	      /* ??? We can run into trouble with some backends by presenting
3472		 it with symbols which haven't been properly passed through
3473		 targetm.encode_section_info.  By setting the local bit, we
3474		 enhance the probability of things working.  */
3475	      SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3476
3477	      if (off_p)
3478		base = gen_rtx_fmt_e (CONST, address_mode,
3479				      gen_rtx_fmt_ee
3480					(PLUS, address_mode, base,
3481					 gen_int_mode (off, address_mode)));
3482	    }
3483	  else if (off_p)
3484	    base = gen_int_mode (off, address_mode);
3485	  else
3486	    base = NULL_RTX;
3487
3488	  if (base)
3489	    addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3490
3491	  start_sequence ();
3492	  /* To avoid splitting addressing modes, pretend that no cse will
3493	     follow.  */
3494	  old_cse_not_expected = cse_not_expected;
3495	  cse_not_expected = true;
3496	  addr = memory_address_addr_space (mem_mode, addr, as);
3497	  cse_not_expected = old_cse_not_expected;
3498	  seq = get_insns ();
3499	  end_sequence ();
3500
3501	  acost = seq_cost (seq, speed);
3502	  acost += address_cost (addr, mem_mode, as, speed);
3503
3504	  if (!acost)
3505	    acost = 1;
3506	  data->costs[sym_p][var_p][off_p][rat_p] = acost;
3507	}
3508
3509      /* On some targets, it is quite expensive to load symbol to a register,
3510	 which makes addresses that contain symbols look much more expensive.
3511	 However, the symbol will have to be loaded in any case before the
3512	 loop (and quite likely we have it in register already), so it does not
3513	 make much sense to penalize them too heavily.  So make some final
3514         tweaks for the SYMBOL_PRESENT modes:
3515
3516         If VAR_PRESENT is false, and the mode obtained by changing symbol to
3517	 var is cheaper, use this mode with small penalty.
3518	 If VAR_PRESENT is true, try whether the mode with
3519	 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3520	 if this is the case, use it.  */
3521      add_c = add_cost (speed, address_mode);
3522      for (i = 0; i < 8; i++)
3523	{
3524	  var_p = i & 1;
3525	  off_p = (i >> 1) & 1;
3526	  rat_p = (i >> 2) & 1;
3527
3528	  acost = data->costs[0][1][off_p][rat_p] + 1;
3529	  if (var_p)
3530	    acost += add_c;
3531
3532	  if (acost < data->costs[1][var_p][off_p][rat_p])
3533	    data->costs[1][var_p][off_p][rat_p] = acost;
3534	}
3535
3536      if (dump_file && (dump_flags & TDF_DETAILS))
3537	{
3538	  fprintf (dump_file, "Address costs:\n");
3539
3540	  for (i = 0; i < 16; i++)
3541	    {
3542	      sym_p = i & 1;
3543	      var_p = (i >> 1) & 1;
3544	      off_p = (i >> 2) & 1;
3545	      rat_p = (i >> 3) & 1;
3546
3547	      fprintf (dump_file, "  ");
3548	      if (sym_p)
3549		fprintf (dump_file, "sym + ");
3550	      if (var_p)
3551		fprintf (dump_file, "var + ");
3552	      if (off_p)
3553		fprintf (dump_file, "cst + ");
3554	      if (rat_p)
3555		fprintf (dump_file, "rat * ");
3556
3557	      acost = data->costs[sym_p][var_p][off_p][rat_p];
3558	      fprintf (dump_file, "index costs %d\n", acost);
3559	    }
3560	  if (has_predec[mem_mode] || has_postdec[mem_mode]
3561	      || has_preinc[mem_mode] || has_postinc[mem_mode])
3562	    fprintf (dump_file, "  May include autoinc/dec\n");
3563	  fprintf (dump_file, "\n");
3564	}
3565
3566      address_cost_data_list[data_index] = data;
3567    }
3568
3569  bits = GET_MODE_BITSIZE (address_mode);
3570  mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3571  offset &= mask;
3572  if ((offset >> (bits - 1) & 1))
3573    offset |= ~mask;
3574  s_offset = offset;
3575
3576  autoinc = false;
3577  autoinc_type = AINC_NONE;
3578  msize = GET_MODE_SIZE (mem_mode);
3579  autoinc_offset = offset;
3580  if (stmt_after_inc)
3581    autoinc_offset += ratio * cstep;
3582  if (symbol_present || var_present || ratio != 1)
3583    autoinc = false;
3584  else
3585    {
3586      if (has_postinc[mem_mode] && autoinc_offset == 0
3587	  && msize == cstep)
3588	autoinc_type = AINC_POST_INC;
3589      else if (has_postdec[mem_mode] && autoinc_offset == 0
3590	       && msize == -cstep)
3591	autoinc_type = AINC_POST_DEC;
3592      else if (has_preinc[mem_mode] && autoinc_offset == msize
3593	       && msize == cstep)
3594	autoinc_type = AINC_PRE_INC;
3595      else if (has_predec[mem_mode] && autoinc_offset == -msize
3596	       && msize == -cstep)
3597	autoinc_type = AINC_PRE_DEC;
3598
3599      if (autoinc_type != AINC_NONE)
3600	autoinc = true;
3601    }
3602
3603  cost = 0;
3604  offset_p = (s_offset != 0
3605	      && data->min_offset <= s_offset
3606	      && s_offset <= data->max_offset);
3607  ratio_p = (ratio != 1
3608	     && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3609
3610  if (ratio != 1 && !ratio_p)
3611    cost += mult_by_coeff_cost (ratio, address_mode, speed);
3612
3613  if (s_offset && !offset_p && !symbol_present)
3614    cost += add_cost (speed, address_mode);
3615
3616  if (may_autoinc)
3617    *may_autoinc = autoinc;
3618  if (autoinc)
3619    acost = data->ainc_costs[autoinc_type];
3620  else
3621    acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3622  complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3623  return new_cost (cost + acost, complexity);
3624}
3625
3626 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
3627    the EXPR operand holding the shift.  COST0 and COST1 are the costs for
3628    calculating the operands of EXPR.  Returns true if successful, and returns
3629    the cost in COST.  */
3630
3631static bool
3632get_shiftadd_cost (tree expr, machine_mode mode, comp_cost cost0,
3633                   comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3634{
3635  comp_cost res;
3636  tree op1 = TREE_OPERAND (expr, 1);
3637  tree cst = TREE_OPERAND (mult, 1);
3638  tree multop = TREE_OPERAND (mult, 0);
3639  int m = exact_log2 (int_cst_value (cst));
3640  int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3641  int as_cost, sa_cost;
3642  bool mult_in_op1;
3643
3644  if (!(m >= 0 && m < maxm))
3645    return false;
3646
3647  mult_in_op1 = operand_equal_p (op1, mult, 0);
3648
3649  as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3650
3651  /* If the target has a cheap shift-and-add or shift-and-sub instruction,
3652     use that in preference to a shift insn followed by an add insn.  */
3653  sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3654             ? shiftadd_cost (speed, mode, m)
3655             : (mult_in_op1
3656                ? shiftsub1_cost (speed, mode, m)
3657                : shiftsub0_cost (speed, mode, m)));
3658
3659  res = new_cost (MIN (as_cost, sa_cost), 0);
3660  res = add_costs (res, mult_in_op1 ? cost0 : cost1);
3661
3662  STRIP_NOPS (multop);
3663  if (!is_gimple_val (multop))
3664    res = add_costs (res, force_expr_to_var_cost (multop, speed));
3665
3666  *cost = res;
3667  return true;
3668}
3669
3670/* Estimates cost of forcing expression EXPR into a variable.  */
3671
3672static comp_cost
3673force_expr_to_var_cost (tree expr, bool speed)
3674{
3675  static bool costs_initialized = false;
3676  static unsigned integer_cost [2];
3677  static unsigned symbol_cost [2];
3678  static unsigned address_cost [2];
3679  tree op0, op1;
3680  comp_cost cost0, cost1, cost;
3681  machine_mode mode;
3682
3683  if (!costs_initialized)
3684    {
3685      tree type = build_pointer_type (integer_type_node);
3686      tree var, addr;
3687      rtx x;
3688      int i;
3689
3690      var = create_tmp_var_raw (integer_type_node, "test_var");
3691      TREE_STATIC (var) = 1;
3692      x = produce_memory_decl_rtl (var, NULL);
3693      SET_DECL_RTL (var, x);
3694
3695      addr = build1 (ADDR_EXPR, type, var);
3696
3697
3698      for (i = 0; i < 2; i++)
3699	{
3700	  integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3701							     2000), i);
3702
3703	  symbol_cost[i] = computation_cost (addr, i) + 1;
3704
3705	  address_cost[i]
3706	    = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3707	  if (dump_file && (dump_flags & TDF_DETAILS))
3708	    {
3709	      fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3710	      fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
3711	      fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
3712	      fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
3713	      fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
3714	      fprintf (dump_file, "\n");
3715	    }
3716	}
3717
3718      costs_initialized = true;
3719    }
3720
3721  STRIP_NOPS (expr);
3722
3723  if (SSA_VAR_P (expr))
3724    return no_cost;
3725
3726  if (is_gimple_min_invariant (expr))
3727    {
3728      if (TREE_CODE (expr) == INTEGER_CST)
3729	return new_cost (integer_cost [speed], 0);
3730
3731      if (TREE_CODE (expr) == ADDR_EXPR)
3732	{
3733	  tree obj = TREE_OPERAND (expr, 0);
3734
3735	  if (TREE_CODE (obj) == VAR_DECL
3736	      || TREE_CODE (obj) == PARM_DECL
3737	      || TREE_CODE (obj) == RESULT_DECL)
3738	    return new_cost (symbol_cost [speed], 0);
3739	}
3740
3741      return new_cost (address_cost [speed], 0);
3742    }
3743
3744  switch (TREE_CODE (expr))
3745    {
3746    case POINTER_PLUS_EXPR:
3747    case PLUS_EXPR:
3748    case MINUS_EXPR:
3749    case MULT_EXPR:
3750      op0 = TREE_OPERAND (expr, 0);
3751      op1 = TREE_OPERAND (expr, 1);
3752      STRIP_NOPS (op0);
3753      STRIP_NOPS (op1);
3754      break;
3755
3756    CASE_CONVERT:
3757    case NEGATE_EXPR:
3758      op0 = TREE_OPERAND (expr, 0);
3759      STRIP_NOPS (op0);
3760      op1 = NULL_TREE;
3761      break;
3762
3763    default:
3764      /* Just an arbitrary value, FIXME.  */
3765      return new_cost (target_spill_cost[speed], 0);
3766    }
3767
3768  if (op0 == NULL_TREE
3769      || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
3770    cost0 = no_cost;
3771  else
3772    cost0 = force_expr_to_var_cost (op0, speed);
3773
3774  if (op1 == NULL_TREE
3775      || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
3776    cost1 = no_cost;
3777  else
3778    cost1 = force_expr_to_var_cost (op1, speed);
3779
3780  mode = TYPE_MODE (TREE_TYPE (expr));
3781  switch (TREE_CODE (expr))
3782    {
3783    case POINTER_PLUS_EXPR:
3784    case PLUS_EXPR:
3785    case MINUS_EXPR:
3786    case NEGATE_EXPR:
3787      cost = new_cost (add_cost (speed, mode), 0);
3788      if (TREE_CODE (expr) != NEGATE_EXPR)
3789        {
3790          tree mult = NULL_TREE;
3791          comp_cost sa_cost;
3792          if (TREE_CODE (op1) == MULT_EXPR)
3793            mult = op1;
3794          else if (TREE_CODE (op0) == MULT_EXPR)
3795            mult = op0;
3796
3797          if (mult != NULL_TREE
3798              && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
3799              && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
3800                                    speed, &sa_cost))
3801            return sa_cost;
3802        }
3803      break;
3804
3805    CASE_CONVERT:
3806      {
3807	tree inner_mode, outer_mode;
3808	outer_mode = TREE_TYPE (expr);
3809	inner_mode = TREE_TYPE (op0);
3810	cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
3811				       TYPE_MODE (inner_mode), speed), 0);
3812      }
3813      break;
3814
3815    case MULT_EXPR:
3816      if (cst_and_fits_in_hwi (op0))
3817	cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
3818					     mode, speed), 0);
3819      else if (cst_and_fits_in_hwi (op1))
3820	cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
3821					     mode, speed), 0);
3822      else
3823	return new_cost (target_spill_cost [speed], 0);
3824      break;
3825
3826    default:
3827      gcc_unreachable ();
3828    }
3829
3830  cost = add_costs (cost, cost0);
3831  cost = add_costs (cost, cost1);
3832
3833  /* Bound the cost by target_spill_cost.  The parts of complicated
3834     computations often are either loop invariant or at least can
3835     be shared between several iv uses, so letting this grow without
3836     limits would not give reasonable results.  */
3837  if (cost.cost > (int) target_spill_cost [speed])
3838    cost.cost = target_spill_cost [speed];
3839
3840  return cost;
3841}
3842
3843/* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3844   invariants the computation depends on.  */
3845
3846static comp_cost
3847force_var_cost (struct ivopts_data *data,
3848		tree expr, bitmap *depends_on)
3849{
3850  if (depends_on)
3851    {
3852      fd_ivopts_data = data;
3853      walk_tree (&expr, find_depends, depends_on, NULL);
3854    }
3855
3856  return force_expr_to_var_cost (expr, data->speed);
3857}
3858
3859/* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3860   value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3861   to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3862   invariants the computation depends on.  */
3863
3864static comp_cost
3865split_address_cost (struct ivopts_data *data,
3866		    tree addr, bool *symbol_present, bool *var_present,
3867		    unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3868{
3869  tree core;
3870  HOST_WIDE_INT bitsize;
3871  HOST_WIDE_INT bitpos;
3872  tree toffset;
3873  machine_mode mode;
3874  int unsignedp, volatilep;
3875
3876  core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3877			      &unsignedp, &volatilep, false);
3878
3879  if (toffset != 0
3880      || bitpos % BITS_PER_UNIT != 0
3881      || TREE_CODE (core) != VAR_DECL)
3882    {
3883      *symbol_present = false;
3884      *var_present = true;
3885      fd_ivopts_data = data;
3886      walk_tree (&addr, find_depends, depends_on, NULL);
3887      return new_cost (target_spill_cost[data->speed], 0);
3888    }
3889
3890  *offset += bitpos / BITS_PER_UNIT;
3891  if (TREE_STATIC (core)
3892      || DECL_EXTERNAL (core))
3893    {
3894      *symbol_present = true;
3895      *var_present = false;
3896      return no_cost;
3897    }
3898
3899  *symbol_present = false;
3900  *var_present = true;
3901  return no_cost;
3902}
3903
3904/* Estimates cost of expressing difference of addresses E1 - E2 as
3905   var + symbol + offset.  The value of offset is added to OFFSET,
3906   SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3907   part is missing.  DEPENDS_ON is a set of the invariants the computation
3908   depends on.  */
3909
3910static comp_cost
3911ptr_difference_cost (struct ivopts_data *data,
3912		     tree e1, tree e2, bool *symbol_present, bool *var_present,
3913		     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3914{
3915  HOST_WIDE_INT diff = 0;
3916  aff_tree aff_e1, aff_e2;
3917  tree type;
3918
3919  gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3920
3921  if (ptr_difference_const (e1, e2, &diff))
3922    {
3923      *offset += diff;
3924      *symbol_present = false;
3925      *var_present = false;
3926      return no_cost;
3927    }
3928
3929  if (integer_zerop (e2))
3930    return split_address_cost (data, TREE_OPERAND (e1, 0),
3931			       symbol_present, var_present, offset, depends_on);
3932
3933  *symbol_present = false;
3934  *var_present = true;
3935
3936  type = signed_type_for (TREE_TYPE (e1));
3937  tree_to_aff_combination (e1, type, &aff_e1);
3938  tree_to_aff_combination (e2, type, &aff_e2);
3939  aff_combination_scale (&aff_e2, -1);
3940  aff_combination_add (&aff_e1, &aff_e2);
3941
3942  return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3943}
3944
3945/* Estimates cost of expressing difference E1 - E2 as
3946   var + symbol + offset.  The value of offset is added to OFFSET,
3947   SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3948   part is missing.  DEPENDS_ON is a set of the invariants the computation
3949   depends on.  */
3950
3951static comp_cost
3952difference_cost (struct ivopts_data *data,
3953		 tree e1, tree e2, bool *symbol_present, bool *var_present,
3954		 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3955{
3956  machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3957  unsigned HOST_WIDE_INT off1, off2;
3958  aff_tree aff_e1, aff_e2;
3959  tree type;
3960
3961  e1 = strip_offset (e1, &off1);
3962  e2 = strip_offset (e2, &off2);
3963  *offset += off1 - off2;
3964
3965  STRIP_NOPS (e1);
3966  STRIP_NOPS (e2);
3967
3968  if (TREE_CODE (e1) == ADDR_EXPR)
3969    return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3970				offset, depends_on);
3971  *symbol_present = false;
3972
3973  if (operand_equal_p (e1, e2, 0))
3974    {
3975      *var_present = false;
3976      return no_cost;
3977    }
3978
3979  *var_present = true;
3980
3981  if (integer_zerop (e2))
3982    return force_var_cost (data, e1, depends_on);
3983
3984  if (integer_zerop (e1))
3985    {
3986      comp_cost cost = force_var_cost (data, e2, depends_on);
3987      cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
3988      return cost;
3989    }
3990
3991  type = signed_type_for (TREE_TYPE (e1));
3992  tree_to_aff_combination (e1, type, &aff_e1);
3993  tree_to_aff_combination (e2, type, &aff_e2);
3994  aff_combination_scale (&aff_e2, -1);
3995  aff_combination_add (&aff_e1, &aff_e2);
3996
3997  return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3998}
3999
4000/* Returns true if AFF1 and AFF2 are identical.  */
4001
4002static bool
4003compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
4004{
4005  unsigned i;
4006
4007  if (aff1->n != aff2->n)
4008    return false;
4009
4010  for (i = 0; i < aff1->n; i++)
4011    {
4012      if (aff1->elts[i].coef != aff2->elts[i].coef)
4013        return false;
4014
4015      if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
4016        return false;
4017    }
4018  return true;
4019}
4020
4021/* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id.  */
4022
4023static int
4024get_expr_id (struct ivopts_data *data, tree expr)
4025{
4026  struct iv_inv_expr_ent ent;
4027  struct iv_inv_expr_ent **slot;
4028
4029  ent.expr = expr;
4030  ent.hash = iterative_hash_expr (expr, 0);
4031  slot = data->inv_expr_tab->find_slot (&ent, INSERT);
4032  if (*slot)
4033    return (*slot)->id;
4034
4035  *slot = XNEW (struct iv_inv_expr_ent);
4036  (*slot)->expr = expr;
4037  (*slot)->hash = ent.hash;
4038  (*slot)->id = data->inv_expr_id++;
4039  return (*slot)->id;
4040}
4041
4042/* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
4043   requires a new compiler generated temporary.  Returns -1 otherwise.
4044   ADDRESS_P is a flag indicating if the expression is for address
4045   computation.  */
4046
4047static int
4048get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
4049                            tree cbase, HOST_WIDE_INT ratio,
4050                            bool address_p)
4051{
4052  aff_tree ubase_aff, cbase_aff;
4053  tree expr, ub, cb;
4054
4055  STRIP_NOPS (ubase);
4056  STRIP_NOPS (cbase);
4057  ub = ubase;
4058  cb = cbase;
4059
4060  if ((TREE_CODE (ubase) == INTEGER_CST)
4061      && (TREE_CODE (cbase) == INTEGER_CST))
4062    return -1;
4063
4064  /* Strips the constant part. */
4065  if (TREE_CODE (ubase) == PLUS_EXPR
4066      || TREE_CODE (ubase) == MINUS_EXPR
4067      || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
4068    {
4069      if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
4070        ubase = TREE_OPERAND (ubase, 0);
4071    }
4072
4073  /* Strips the constant part. */
4074  if (TREE_CODE (cbase) == PLUS_EXPR
4075      || TREE_CODE (cbase) == MINUS_EXPR
4076      || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4077    {
4078      if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4079        cbase = TREE_OPERAND (cbase, 0);
4080    }
4081
4082  if (address_p)
4083    {
4084      if (((TREE_CODE (ubase) == SSA_NAME)
4085           || (TREE_CODE (ubase) == ADDR_EXPR
4086               && is_gimple_min_invariant (ubase)))
4087          && (TREE_CODE (cbase) == INTEGER_CST))
4088        return -1;
4089
4090      if (((TREE_CODE (cbase) == SSA_NAME)
4091           || (TREE_CODE (cbase) == ADDR_EXPR
4092               && is_gimple_min_invariant (cbase)))
4093          && (TREE_CODE (ubase) == INTEGER_CST))
4094        return -1;
4095    }
4096
4097  if (ratio == 1)
4098    {
4099      if (operand_equal_p (ubase, cbase, 0))
4100        return -1;
4101
4102      if (TREE_CODE (ubase) == ADDR_EXPR
4103          && TREE_CODE (cbase) == ADDR_EXPR)
4104        {
4105          tree usym, csym;
4106
4107          usym = TREE_OPERAND (ubase, 0);
4108          csym = TREE_OPERAND (cbase, 0);
4109          if (TREE_CODE (usym) == ARRAY_REF)
4110            {
4111              tree ind = TREE_OPERAND (usym, 1);
4112              if (TREE_CODE (ind) == INTEGER_CST
4113                  && tree_fits_shwi_p (ind)
4114                  && tree_to_shwi (ind) == 0)
4115                usym = TREE_OPERAND (usym, 0);
4116            }
4117          if (TREE_CODE (csym) == ARRAY_REF)
4118            {
4119              tree ind = TREE_OPERAND (csym, 1);
4120              if (TREE_CODE (ind) == INTEGER_CST
4121                  && tree_fits_shwi_p (ind)
4122                  && tree_to_shwi (ind) == 0)
4123                csym = TREE_OPERAND (csym, 0);
4124            }
4125          if (operand_equal_p (usym, csym, 0))
4126            return -1;
4127        }
4128      /* Now do more complex comparison  */
4129      tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4130      tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4131      if (compare_aff_trees (&ubase_aff, &cbase_aff))
4132        return -1;
4133    }
4134
4135  tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4136  tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4137
4138  aff_combination_scale (&cbase_aff, -1 * ratio);
4139  aff_combination_add (&ubase_aff, &cbase_aff);
4140  expr = aff_combination_to_tree (&ubase_aff);
4141  return get_expr_id (data, expr);
4142}
4143
4144
4145
4146/* Determines the cost of the computation by that USE is expressed
4147   from induction variable CAND.  If ADDRESS_P is true, we just need
4148   to create an address from it, otherwise we want to get it into
4149   register.  A set of invariants we depend on is stored in
4150   DEPENDS_ON.  AT is the statement at that the value is computed.
4151   If CAN_AUTOINC is nonnull, use it to record whether autoinc
4152   addressing is likely.  */
4153
4154static comp_cost
4155get_computation_cost_at (struct ivopts_data *data,
4156			 struct iv_use *use, struct iv_cand *cand,
4157			 bool address_p, bitmap *depends_on, gimple at,
4158			 bool *can_autoinc,
4159                         int *inv_expr_id)
4160{
4161  tree ubase = use->iv->base, ustep = use->iv->step;
4162  tree cbase, cstep;
4163  tree utype = TREE_TYPE (ubase), ctype;
4164  unsigned HOST_WIDE_INT cstepi, offset = 0;
4165  HOST_WIDE_INT ratio, aratio;
4166  bool var_present, symbol_present, stmt_is_after_inc;
4167  comp_cost cost;
4168  widest_int rat;
4169  bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4170  machine_mode mem_mode = (address_p
4171				? TYPE_MODE (TREE_TYPE (*use->op_p))
4172				: VOIDmode);
4173
4174  *depends_on = NULL;
4175
4176  /* Only consider real candidates.  */
4177  if (!cand->iv)
4178    return infinite_cost;
4179
4180  cbase = cand->iv->base;
4181  cstep = cand->iv->step;
4182  ctype = TREE_TYPE (cbase);
4183
4184  if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4185    {
4186      /* We do not have a precision to express the values of use.  */
4187      return infinite_cost;
4188    }
4189
4190  if (address_p
4191      || (use->iv->base_object
4192	  && cand->iv->base_object
4193	  && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4194	  && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4195    {
4196      /* Do not try to express address of an object with computation based
4197	 on address of a different object.  This may cause problems in rtl
4198	 level alias analysis (that does not expect this to be happening,
4199	 as this is illegal in C), and would be unlikely to be useful
4200	 anyway.  */
4201      if (use->iv->base_object
4202	  && cand->iv->base_object
4203	  && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4204	return infinite_cost;
4205    }
4206
4207  if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4208    {
4209      /* TODO -- add direct handling of this case.  */
4210      goto fallback;
4211    }
4212
4213  /* CSTEPI is removed from the offset in case statement is after the
4214     increment.  If the step is not constant, we use zero instead.
4215     This is a bit imprecise (there is the extra addition), but
4216     redundancy elimination is likely to transform the code so that
4217     it uses value of the variable before increment anyway,
4218     so it is not that much unrealistic.  */
4219  if (cst_and_fits_in_hwi (cstep))
4220    cstepi = int_cst_value (cstep);
4221  else
4222    cstepi = 0;
4223
4224  if (!constant_multiple_of (ustep, cstep, &rat))
4225    return infinite_cost;
4226
4227  if (wi::fits_shwi_p (rat))
4228    ratio = rat.to_shwi ();
4229  else
4230    return infinite_cost;
4231
4232  STRIP_NOPS (cbase);
4233  ctype = TREE_TYPE (cbase);
4234
4235  stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4236
4237  /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
4238     or ratio == 1, it is better to handle this like
4239
4240     ubase - ratio * cbase + ratio * var
4241
4242     (also holds in the case ratio == -1, TODO.  */
4243
4244  if (cst_and_fits_in_hwi (cbase))
4245    {
4246      offset = - ratio * (unsigned HOST_WIDE_INT) int_cst_value (cbase);
4247      cost = difference_cost (data,
4248			      ubase, build_int_cst (utype, 0),
4249			      &symbol_present, &var_present, &offset,
4250			      depends_on);
4251      cost.cost /= avg_loop_niter (data->current_loop);
4252    }
4253  else if (ratio == 1)
4254    {
4255      tree real_cbase = cbase;
4256
4257      /* Check to see if any adjustment is needed.  */
4258      if (cstepi == 0 && stmt_is_after_inc)
4259        {
4260          aff_tree real_cbase_aff;
4261          aff_tree cstep_aff;
4262
4263          tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4264                                   &real_cbase_aff);
4265          tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4266
4267          aff_combination_add (&real_cbase_aff, &cstep_aff);
4268          real_cbase = aff_combination_to_tree (&real_cbase_aff);
4269        }
4270
4271      cost = difference_cost (data,
4272			      ubase, real_cbase,
4273			      &symbol_present, &var_present, &offset,
4274			      depends_on);
4275      cost.cost /= avg_loop_niter (data->current_loop);
4276    }
4277  else if (address_p
4278	   && !POINTER_TYPE_P (ctype)
4279	   && multiplier_allowed_in_address_p
4280		(ratio, mem_mode,
4281			TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4282    {
4283      cbase
4284	= fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4285      cost = difference_cost (data,
4286			      ubase, cbase,
4287			      &symbol_present, &var_present, &offset,
4288			      depends_on);
4289      cost.cost /= avg_loop_niter (data->current_loop);
4290    }
4291  else
4292    {
4293      cost = force_var_cost (data, cbase, depends_on);
4294      cost = add_costs (cost,
4295			difference_cost (data,
4296					 ubase, build_int_cst (utype, 0),
4297					 &symbol_present, &var_present,
4298					 &offset, depends_on));
4299      cost.cost /= avg_loop_niter (data->current_loop);
4300      cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4301    }
4302
4303  if (inv_expr_id)
4304    {
4305      *inv_expr_id =
4306          get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4307      /* Clear depends on.  */
4308      if (*inv_expr_id != -1 && depends_on && *depends_on)
4309        bitmap_clear (*depends_on);
4310    }
4311
4312  /* If we are after the increment, the value of the candidate is higher by
4313     one iteration.  */
4314  if (stmt_is_after_inc)
4315    offset -= ratio * cstepi;
4316
4317  /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4318     (symbol/var1/const parts may be omitted).  If we are looking for an
4319     address, find the cost of addressing this.  */
4320  if (address_p)
4321    return add_costs (cost,
4322		      get_address_cost (symbol_present, var_present,
4323					offset, ratio, cstepi,
4324					mem_mode,
4325					TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4326					speed, stmt_is_after_inc,
4327					can_autoinc));
4328
4329  /* Otherwise estimate the costs for computing the expression.  */
4330  if (!symbol_present && !var_present && !offset)
4331    {
4332      if (ratio != 1)
4333	cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4334      return cost;
4335    }
4336
4337  /* Symbol + offset should be compile-time computable so consider that they
4338      are added once to the variable, if present.  */
4339  if (var_present && (symbol_present || offset))
4340    cost.cost += adjust_setup_cost (data,
4341				    add_cost (speed, TYPE_MODE (ctype)));
4342
4343  /* Having offset does not affect runtime cost in case it is added to
4344     symbol, but it increases complexity.  */
4345  if (offset)
4346    cost.complexity++;
4347
4348  cost.cost += add_cost (speed, TYPE_MODE (ctype));
4349
4350  aratio = ratio > 0 ? ratio : -ratio;
4351  if (aratio != 1)
4352    cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4353  return cost;
4354
4355fallback:
4356  if (can_autoinc)
4357    *can_autoinc = false;
4358
4359  {
4360    /* Just get the expression, expand it and measure the cost.  */
4361    tree comp = get_computation_at (data->current_loop, use, cand, at);
4362
4363    if (!comp)
4364      return infinite_cost;
4365
4366    if (address_p)
4367      comp = build_simple_mem_ref (comp);
4368
4369    return new_cost (computation_cost (comp, speed), 0);
4370  }
4371}
4372
4373/* Determines the cost of the computation by that USE is expressed
4374   from induction variable CAND.  If ADDRESS_P is true, we just need
4375   to create an address from it, otherwise we want to get it into
4376   register.  A set of invariants we depend on is stored in
4377   DEPENDS_ON.  If CAN_AUTOINC is nonnull, use it to record whether
4378   autoinc addressing is likely.  */
4379
4380static comp_cost
4381get_computation_cost (struct ivopts_data *data,
4382		      struct iv_use *use, struct iv_cand *cand,
4383		      bool address_p, bitmap *depends_on,
4384                      bool *can_autoinc, int *inv_expr_id)
4385{
4386  return get_computation_cost_at (data,
4387				  use, cand, address_p, depends_on, use->stmt,
4388				  can_autoinc, inv_expr_id);
4389}
4390
4391/* Determines cost of basing replacement of USE on CAND in a generic
4392   expression.  */
4393
4394static bool
4395determine_use_iv_cost_generic (struct ivopts_data *data,
4396			       struct iv_use *use, struct iv_cand *cand)
4397{
4398  bitmap depends_on;
4399  comp_cost cost;
4400  int inv_expr_id = -1;
4401
4402  /* The simple case first -- if we need to express value of the preserved
4403     original biv, the cost is 0.  This also prevents us from counting the
4404     cost of increment twice -- once at this use and once in the cost of
4405     the candidate.  */
4406  if (cand->pos == IP_ORIGINAL
4407      && cand->incremented_at == use->stmt)
4408    {
4409      set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4410                       ERROR_MARK, -1);
4411      return true;
4412    }
4413
4414  cost = get_computation_cost (data, use, cand, false, &depends_on,
4415                               NULL, &inv_expr_id);
4416
4417  set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4418                   inv_expr_id);
4419
4420  return !infinite_cost_p (cost);
4421}
4422
4423/* Determines cost of basing replacement of USE on CAND in an address.  */
4424
4425static bool
4426determine_use_iv_cost_address (struct ivopts_data *data,
4427			       struct iv_use *use, struct iv_cand *cand)
4428{
4429  bitmap depends_on;
4430  bool can_autoinc;
4431  int inv_expr_id = -1;
4432  comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4433					 &can_autoinc, &inv_expr_id);
4434
4435  if (cand->ainc_use == use)
4436    {
4437      if (can_autoinc)
4438	cost.cost -= cand->cost_step;
4439      /* If we generated the candidate solely for exploiting autoincrement
4440	 opportunities, and it turns out it can't be used, set the cost to
4441	 infinity to make sure we ignore it.  */
4442      else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4443	cost = infinite_cost;
4444    }
4445  set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4446                   inv_expr_id);
4447
4448  return !infinite_cost_p (cost);
4449}
4450
4451/* Computes value of candidate CAND at position AT in iteration NITER, and
4452   stores it to VAL.  */
4453
4454static void
4455cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4456	       aff_tree *val)
4457{
4458  aff_tree step, delta, nit;
4459  struct iv *iv = cand->iv;
4460  tree type = TREE_TYPE (iv->base);
4461  tree steptype = type;
4462  if (POINTER_TYPE_P (type))
4463    steptype = sizetype;
4464  steptype = unsigned_type_for (type);
4465
4466  tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4467  aff_combination_convert (&step, steptype);
4468  tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4469  aff_combination_convert (&nit, steptype);
4470  aff_combination_mult (&nit, &step, &delta);
4471  if (stmt_after_increment (loop, cand, at))
4472    aff_combination_add (&delta, &step);
4473
4474  tree_to_aff_combination (iv->base, type, val);
4475  if (!POINTER_TYPE_P (type))
4476    aff_combination_convert (val, steptype);
4477  aff_combination_add (val, &delta);
4478}
4479
4480/* Returns period of induction variable iv.  */
4481
4482static tree
4483iv_period (struct iv *iv)
4484{
4485  tree step = iv->step, period, type;
4486  tree pow2div;
4487
4488  gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4489
4490  type = unsigned_type_for (TREE_TYPE (step));
4491  /* Period of the iv is lcm (step, type_range)/step -1,
4492     i.e., N*type_range/step - 1. Since type range is power
4493     of two, N == (step >> num_of_ending_zeros_binary (step),
4494     so the final result is
4495
4496       (type_range >> num_of_ending_zeros_binary (step)) - 1
4497
4498  */
4499  pow2div = num_ending_zeros (step);
4500
4501  period = build_low_bits_mask (type,
4502                                (TYPE_PRECISION (type)
4503                                 - tree_to_uhwi (pow2div)));
4504
4505  return period;
4506}
4507
4508/* Returns the comparison operator used when eliminating the iv USE.  */
4509
4510static enum tree_code
4511iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4512{
4513  struct loop *loop = data->current_loop;
4514  basic_block ex_bb;
4515  edge exit;
4516
4517  ex_bb = gimple_bb (use->stmt);
4518  exit = EDGE_SUCC (ex_bb, 0);
4519  if (flow_bb_inside_loop_p (loop, exit->dest))
4520    exit = EDGE_SUCC (ex_bb, 1);
4521
4522  return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4523}
4524
4525/* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
4526   we only detect the situation that BASE = SOMETHING + OFFSET, where the
4527   calculation is performed in non-wrapping type.
4528
4529   TODO: More generally, we could test for the situation that
4530	 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4531	 This would require knowing the sign of OFFSET.  */
4532
4533static bool
4534difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
4535{
4536  enum tree_code code;
4537  tree e1, e2;
4538  aff_tree aff_e1, aff_e2, aff_offset;
4539
4540  if (!nowrap_type_p (TREE_TYPE (base)))
4541    return false;
4542
4543  base = expand_simple_operations (base);
4544
4545  if (TREE_CODE (base) == SSA_NAME)
4546    {
4547      gimple stmt = SSA_NAME_DEF_STMT (base);
4548
4549      if (gimple_code (stmt) != GIMPLE_ASSIGN)
4550	return false;
4551
4552      code = gimple_assign_rhs_code (stmt);
4553      if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4554	return false;
4555
4556      e1 = gimple_assign_rhs1 (stmt);
4557      e2 = gimple_assign_rhs2 (stmt);
4558    }
4559  else
4560    {
4561      code = TREE_CODE (base);
4562      if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4563	return false;
4564      e1 = TREE_OPERAND (base, 0);
4565      e2 = TREE_OPERAND (base, 1);
4566    }
4567
4568  /* Use affine expansion as deeper inspection to prove the equality.  */
4569  tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
4570				  &aff_e2, &data->name_expansion_cache);
4571  tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
4572				  &aff_offset, &data->name_expansion_cache);
4573  aff_combination_scale (&aff_offset, -1);
4574  switch (code)
4575    {
4576    case PLUS_EXPR:
4577      aff_combination_add (&aff_e2, &aff_offset);
4578      if (aff_combination_zero_p (&aff_e2))
4579	return true;
4580
4581      tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
4582				      &aff_e1, &data->name_expansion_cache);
4583      aff_combination_add (&aff_e1, &aff_offset);
4584      return aff_combination_zero_p (&aff_e1);
4585
4586    case POINTER_PLUS_EXPR:
4587      aff_combination_add (&aff_e2, &aff_offset);
4588      return aff_combination_zero_p (&aff_e2);
4589
4590    default:
4591      return false;
4592    }
4593}
4594
4595/* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4596   comparison with CAND.  NITER describes the number of iterations of
4597   the loops.  If successful, the comparison in COMP_P is altered accordingly.
4598
4599   We aim to handle the following situation:
4600
4601   sometype *base, *p;
4602   int a, b, i;
4603
4604   i = a;
4605   p = p_0 = base + a;
4606
4607   do
4608     {
4609       bla (*p);
4610       p++;
4611       i++;
4612     }
4613   while (i < b);
4614
4615   Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4616   We aim to optimize this to
4617
4618   p = p_0 = base + a;
4619   do
4620     {
4621       bla (*p);
4622       p++;
4623     }
4624   while (p < p_0 - a + b);
4625
4626   This preserves the correctness, since the pointer arithmetics does not
4627   overflow.  More precisely:
4628
4629   1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4630      overflow in computing it or the values of p.
4631   2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4632      overflow.  To prove this, we use the fact that p_0 = base + a.  */
4633
4634static bool
4635iv_elimination_compare_lt (struct ivopts_data *data,
4636                           struct iv_cand *cand, enum tree_code *comp_p,
4637			   struct tree_niter_desc *niter)
4638{
4639  tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4640  struct aff_tree nit, tmpa, tmpb;
4641  enum tree_code comp;
4642  HOST_WIDE_INT step;
4643
4644  /* We need to know that the candidate induction variable does not overflow.
4645     While more complex analysis may be used to prove this, for now just
4646     check that the variable appears in the original program and that it
4647     is computed in a type that guarantees no overflows.  */
4648  cand_type = TREE_TYPE (cand->iv->base);
4649  if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4650    return false;
4651
4652  /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4653     the calculation of the BOUND could overflow, making the comparison
4654     invalid.  */
4655  if (!data->loop_single_exit_p)
4656    return false;
4657
4658  /* We need to be able to decide whether candidate is increasing or decreasing
4659     in order to choose the right comparison operator.  */
4660  if (!cst_and_fits_in_hwi (cand->iv->step))
4661    return false;
4662  step = int_cst_value (cand->iv->step);
4663
4664  /* Check that the number of iterations matches the expected pattern:
4665     a + 1 > b ? 0 : b - a - 1.  */
4666  mbz = niter->may_be_zero;
4667  if (TREE_CODE (mbz) == GT_EXPR)
4668    {
4669      /* Handle a + 1 > b.  */
4670      tree op0 = TREE_OPERAND (mbz, 0);
4671      if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4672	{
4673	  a = TREE_OPERAND (op0, 0);
4674	  b = TREE_OPERAND (mbz, 1);
4675	}
4676      else
4677	return false;
4678    }
4679  else if (TREE_CODE (mbz) == LT_EXPR)
4680    {
4681      tree op1 = TREE_OPERAND (mbz, 1);
4682
4683      /* Handle b < a + 1.  */
4684      if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4685        {
4686          a = TREE_OPERAND (op1, 0);
4687          b = TREE_OPERAND (mbz, 0);
4688        }
4689      else
4690	return false;
4691    }
4692  else
4693    return false;
4694
4695  /* Expected number of iterations is B - A - 1.  Check that it matches
4696     the actual number, i.e., that B - A - NITER = 1.  */
4697  tree_to_aff_combination (niter->niter, nit_type, &nit);
4698  tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4699  tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4700  aff_combination_scale (&nit, -1);
4701  aff_combination_scale (&tmpa, -1);
4702  aff_combination_add (&tmpb, &tmpa);
4703  aff_combination_add (&tmpb, &nit);
4704  if (tmpb.n != 0 || tmpb.offset != 1)
4705    return false;
4706
4707  /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4708     overflow.  */
4709  offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4710			cand->iv->step,
4711			fold_convert (TREE_TYPE (cand->iv->step), a));
4712  if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
4713    return false;
4714
4715  /* Determine the new comparison operator.  */
4716  comp = step < 0 ? GT_EXPR : LT_EXPR;
4717  if (*comp_p == NE_EXPR)
4718    *comp_p = comp;
4719  else if (*comp_p == EQ_EXPR)
4720    *comp_p = invert_tree_comparison (comp, false);
4721  else
4722    gcc_unreachable ();
4723
4724  return true;
4725}
4726
4727/* Check whether it is possible to express the condition in USE by comparison
4728   of candidate CAND.  If so, store the value compared with to BOUND, and the
4729   comparison operator to COMP.  */
4730
4731static bool
4732may_eliminate_iv (struct ivopts_data *data,
4733		  struct iv_use *use, struct iv_cand *cand, tree *bound,
4734		  enum tree_code *comp)
4735{
4736  basic_block ex_bb;
4737  edge exit;
4738  tree period;
4739  struct loop *loop = data->current_loop;
4740  aff_tree bnd;
4741  struct tree_niter_desc *desc = NULL;
4742
4743  if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4744    return false;
4745
4746  /* For now works only for exits that dominate the loop latch.
4747     TODO: extend to other conditions inside loop body.  */
4748  ex_bb = gimple_bb (use->stmt);
4749  if (use->stmt != last_stmt (ex_bb)
4750      || gimple_code (use->stmt) != GIMPLE_COND
4751      || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4752    return false;
4753
4754  exit = EDGE_SUCC (ex_bb, 0);
4755  if (flow_bb_inside_loop_p (loop, exit->dest))
4756    exit = EDGE_SUCC (ex_bb, 1);
4757  if (flow_bb_inside_loop_p (loop, exit->dest))
4758    return false;
4759
4760  desc = niter_for_exit (data, exit);
4761  if (!desc)
4762    return false;
4763
4764  /* Determine whether we can use the variable to test the exit condition.
4765     This is the case iff the period of the induction variable is greater
4766     than the number of iterations for which the exit condition is true.  */
4767  period = iv_period (cand->iv);
4768
4769  /* If the number of iterations is constant, compare against it directly.  */
4770  if (TREE_CODE (desc->niter) == INTEGER_CST)
4771    {
4772      /* See cand_value_at.  */
4773      if (stmt_after_increment (loop, cand, use->stmt))
4774        {
4775          if (!tree_int_cst_lt (desc->niter, period))
4776            return false;
4777        }
4778      else
4779        {
4780          if (tree_int_cst_lt (period, desc->niter))
4781            return false;
4782        }
4783    }
4784
4785  /* If not, and if this is the only possible exit of the loop, see whether
4786     we can get a conservative estimate on the number of iterations of the
4787     entire loop and compare against that instead.  */
4788  else
4789    {
4790      widest_int period_value, max_niter;
4791
4792      max_niter = desc->max;
4793      if (stmt_after_increment (loop, cand, use->stmt))
4794        max_niter += 1;
4795      period_value = wi::to_widest (period);
4796      if (wi::gtu_p (max_niter, period_value))
4797        {
4798          /* See if we can take advantage of inferred loop bound information.  */
4799          if (data->loop_single_exit_p)
4800            {
4801              if (!max_loop_iterations (loop, &max_niter))
4802                return false;
4803              /* The loop bound is already adjusted by adding 1.  */
4804              if (wi::gtu_p (max_niter, period_value))
4805                return false;
4806            }
4807          else
4808            return false;
4809        }
4810    }
4811
4812  cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
4813
4814  *bound = fold_convert (TREE_TYPE (cand->iv->base),
4815			 aff_combination_to_tree (&bnd));
4816  *comp = iv_elimination_compare (data, use);
4817
4818  /* It is unlikely that computing the number of iterations using division
4819     would be more profitable than keeping the original induction variable.  */
4820  if (expression_expensive_p (*bound))
4821    return false;
4822
4823  /* Sometimes, it is possible to handle the situation that the number of
4824     iterations may be zero unless additional assumtions by using <
4825     instead of != in the exit condition.
4826
4827     TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4828	   base the exit condition on it.  However, that is often too
4829	   expensive.  */
4830  if (!integer_zerop (desc->may_be_zero))
4831    return iv_elimination_compare_lt (data, cand, comp, desc);
4832
4833  return true;
4834}
4835
4836 /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
4837    be copied, if is is used in the loop body and DATA->body_includes_call.  */
4838
4839static int
4840parm_decl_cost (struct ivopts_data *data, tree bound)
4841{
4842  tree sbound = bound;
4843  STRIP_NOPS (sbound);
4844
4845  if (TREE_CODE (sbound) == SSA_NAME
4846      && SSA_NAME_IS_DEFAULT_DEF (sbound)
4847      && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
4848      && data->body_includes_call)
4849    return COSTS_N_INSNS (1);
4850
4851  return 0;
4852}
4853
4854/* Determines cost of basing replacement of USE on CAND in a condition.  */
4855
4856static bool
4857determine_use_iv_cost_condition (struct ivopts_data *data,
4858				 struct iv_use *use, struct iv_cand *cand)
4859{
4860  tree bound = NULL_TREE;
4861  struct iv *cmp_iv;
4862  bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4863  comp_cost elim_cost, express_cost, cost, bound_cost;
4864  bool ok;
4865  int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
4866  tree *control_var, *bound_cst;
4867  enum tree_code comp = ERROR_MARK;
4868
4869  /* Only consider real candidates.  */
4870  if (!cand->iv)
4871    {
4872      set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
4873		       ERROR_MARK, -1);
4874      return false;
4875    }
4876
4877  /* Try iv elimination.  */
4878  if (may_eliminate_iv (data, use, cand, &bound, &comp))
4879    {
4880      elim_cost = force_var_cost (data, bound, &depends_on_elim);
4881      if (elim_cost.cost == 0)
4882        elim_cost.cost = parm_decl_cost (data, bound);
4883      else if (TREE_CODE (bound) == INTEGER_CST)
4884        elim_cost.cost = 0;
4885      /* If we replace a loop condition 'i < n' with 'p < base + n',
4886	 depends_on_elim will have 'base' and 'n' set, which implies
4887	 that both 'base' and 'n' will be live during the loop.	 More likely,
4888	 'base + n' will be loop invariant, resulting in only one live value
4889	 during the loop.  So in that case we clear depends_on_elim and set
4890        elim_inv_expr_id instead.  */
4891      if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
4892	{
4893	  elim_inv_expr_id = get_expr_id (data, bound);
4894	  bitmap_clear (depends_on_elim);
4895	}
4896      /* The bound is a loop invariant, so it will be only computed
4897	 once.  */
4898      elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4899    }
4900  else
4901    elim_cost = infinite_cost;
4902
4903  /* Try expressing the original giv.  If it is compared with an invariant,
4904     note that we cannot get rid of it.  */
4905  ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4906			      NULL, &cmp_iv);
4907  gcc_assert (ok);
4908
4909  /* When the condition is a comparison of the candidate IV against
4910     zero, prefer this IV.
4911
4912     TODO: The constant that we're subtracting from the cost should
4913     be target-dependent.  This information should be added to the
4914     target costs for each backend.  */
4915  if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4916      && integer_zerop (*bound_cst)
4917      && (operand_equal_p (*control_var, cand->var_after, 0)
4918	  || operand_equal_p (*control_var, cand->var_before, 0)))
4919    elim_cost.cost -= 1;
4920
4921  express_cost = get_computation_cost (data, use, cand, false,
4922				       &depends_on_express, NULL,
4923                                       &express_inv_expr_id);
4924  fd_ivopts_data = data;
4925  walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4926
4927  /* Count the cost of the original bound as well.  */
4928  bound_cost = force_var_cost (data, *bound_cst, NULL);
4929  if (bound_cost.cost == 0)
4930    bound_cost.cost = parm_decl_cost (data, *bound_cst);
4931  else if (TREE_CODE (*bound_cst) == INTEGER_CST)
4932    bound_cost.cost = 0;
4933  express_cost.cost += bound_cost.cost;
4934
4935  /* Choose the better approach, preferring the eliminated IV. */
4936  if (compare_costs (elim_cost, express_cost) <= 0)
4937    {
4938      cost = elim_cost;
4939      depends_on = depends_on_elim;
4940      depends_on_elim = NULL;
4941      inv_expr_id = elim_inv_expr_id;
4942    }
4943  else
4944    {
4945      cost = express_cost;
4946      depends_on = depends_on_express;
4947      depends_on_express = NULL;
4948      bound = NULL_TREE;
4949      comp = ERROR_MARK;
4950      inv_expr_id = express_inv_expr_id;
4951    }
4952
4953  set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
4954
4955  if (depends_on_elim)
4956    BITMAP_FREE (depends_on_elim);
4957  if (depends_on_express)
4958    BITMAP_FREE (depends_on_express);
4959
4960  return !infinite_cost_p (cost);
4961}
4962
4963/* Determines cost of basing replacement of USE on CAND.  Returns false
4964   if USE cannot be based on CAND.  */
4965
4966static bool
4967determine_use_iv_cost (struct ivopts_data *data,
4968		       struct iv_use *use, struct iv_cand *cand)
4969{
4970  switch (use->type)
4971    {
4972    case USE_NONLINEAR_EXPR:
4973      return determine_use_iv_cost_generic (data, use, cand);
4974
4975    case USE_ADDRESS:
4976      return determine_use_iv_cost_address (data, use, cand);
4977
4978    case USE_COMPARE:
4979      return determine_use_iv_cost_condition (data, use, cand);
4980
4981    default:
4982      gcc_unreachable ();
4983    }
4984}
4985
4986/* Return true if get_computation_cost indicates that autoincrement is
4987   a possibility for the pair of USE and CAND, false otherwise.  */
4988
4989static bool
4990autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4991			   struct iv_cand *cand)
4992{
4993  bitmap depends_on;
4994  bool can_autoinc;
4995  comp_cost cost;
4996
4997  if (use->type != USE_ADDRESS)
4998    return false;
4999
5000  cost = get_computation_cost (data, use, cand, true, &depends_on,
5001			       &can_autoinc, NULL);
5002
5003  BITMAP_FREE (depends_on);
5004
5005  return !infinite_cost_p (cost) && can_autoinc;
5006}
5007
5008/* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5009   use that allows autoincrement, and set their AINC_USE if possible.  */
5010
5011static void
5012set_autoinc_for_original_candidates (struct ivopts_data *data)
5013{
5014  unsigned i, j;
5015
5016  for (i = 0; i < n_iv_cands (data); i++)
5017    {
5018      struct iv_cand *cand = iv_cand (data, i);
5019      struct iv_use *closest_before = NULL;
5020      struct iv_use *closest_after = NULL;
5021      if (cand->pos != IP_ORIGINAL)
5022	continue;
5023
5024      for (j = 0; j < n_iv_uses (data); j++)
5025	{
5026	  struct iv_use *use = iv_use (data, j);
5027	  unsigned uid = gimple_uid (use->stmt);
5028
5029	  if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5030	    continue;
5031
5032	  if (uid < gimple_uid (cand->incremented_at)
5033	      && (closest_before == NULL
5034		  || uid > gimple_uid (closest_before->stmt)))
5035	    closest_before = use;
5036
5037	  if (uid > gimple_uid (cand->incremented_at)
5038	      && (closest_after == NULL
5039		  || uid < gimple_uid (closest_after->stmt)))
5040	    closest_after = use;
5041	}
5042
5043      if (closest_before != NULL
5044	  && autoinc_possible_for_pair (data, closest_before, cand))
5045	cand->ainc_use = closest_before;
5046      else if (closest_after != NULL
5047	       && autoinc_possible_for_pair (data, closest_after, cand))
5048	cand->ainc_use = closest_after;
5049    }
5050}
5051
5052/* Finds the candidates for the induction variables.  */
5053
5054static void
5055find_iv_candidates (struct ivopts_data *data)
5056{
5057  /* Add commonly used ivs.  */
5058  add_standard_iv_candidates (data);
5059
5060  /* Add old induction variables.  */
5061  add_old_ivs_candidates (data);
5062
5063  /* Add induction variables derived from uses.  */
5064  add_derived_ivs_candidates (data);
5065
5066  set_autoinc_for_original_candidates (data);
5067
5068  /* Record the important candidates.  */
5069  record_important_candidates (data);
5070}
5071
5072/* Determines costs of basing the use of the iv on an iv candidate.  */
5073
5074static void
5075determine_use_iv_costs (struct ivopts_data *data)
5076{
5077  unsigned i, j;
5078  struct iv_use *use;
5079  struct iv_cand *cand;
5080  bitmap to_clear = BITMAP_ALLOC (NULL);
5081
5082  alloc_use_cost_map (data);
5083
5084  for (i = 0; i < n_iv_uses (data); i++)
5085    {
5086      use = iv_use (data, i);
5087
5088      if (data->consider_all_candidates)
5089	{
5090	  for (j = 0; j < n_iv_cands (data); j++)
5091	    {
5092	      cand = iv_cand (data, j);
5093	      determine_use_iv_cost (data, use, cand);
5094	    }
5095	}
5096      else
5097	{
5098	  bitmap_iterator bi;
5099
5100	  EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
5101	    {
5102	      cand = iv_cand (data, j);
5103	      if (!determine_use_iv_cost (data, use, cand))
5104		bitmap_set_bit (to_clear, j);
5105	    }
5106
5107	  /* Remove the candidates for that the cost is infinite from
5108	     the list of related candidates.  */
5109	  bitmap_and_compl_into (use->related_cands, to_clear);
5110	  bitmap_clear (to_clear);
5111	}
5112    }
5113
5114  BITMAP_FREE (to_clear);
5115
5116  if (dump_file && (dump_flags & TDF_DETAILS))
5117    {
5118      fprintf (dump_file, "Use-candidate costs:\n");
5119
5120      for (i = 0; i < n_iv_uses (data); i++)
5121	{
5122	  use = iv_use (data, i);
5123
5124	  fprintf (dump_file, "Use %d:\n", i);
5125	  fprintf (dump_file, "  cand\tcost\tcompl.\tdepends on\n");
5126	  for (j = 0; j < use->n_map_members; j++)
5127	    {
5128	      if (!use->cost_map[j].cand
5129		  || infinite_cost_p (use->cost_map[j].cost))
5130		continue;
5131
5132	      fprintf (dump_file, "  %d\t%d\t%d\t",
5133		       use->cost_map[j].cand->id,
5134		       use->cost_map[j].cost.cost,
5135		       use->cost_map[j].cost.complexity);
5136	      if (use->cost_map[j].depends_on)
5137		bitmap_print (dump_file,
5138			      use->cost_map[j].depends_on, "","");
5139              if (use->cost_map[j].inv_expr_id != -1)
5140                fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5141	      fprintf (dump_file, "\n");
5142	    }
5143
5144	  fprintf (dump_file, "\n");
5145	}
5146      fprintf (dump_file, "\n");
5147    }
5148}
5149
5150/* Determines cost of the candidate CAND.  */
5151
5152static void
5153determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5154{
5155  comp_cost cost_base;
5156  unsigned cost, cost_step;
5157  tree base;
5158
5159  if (!cand->iv)
5160    {
5161      cand->cost = 0;
5162      return;
5163    }
5164
5165  /* There are two costs associated with the candidate -- its increment
5166     and its initialization.  The second is almost negligible for any loop
5167     that rolls enough, so we take it just very little into account.  */
5168
5169  base = cand->iv->base;
5170  cost_base = force_var_cost (data, base, NULL);
5171  /* It will be exceptional that the iv register happens to be initialized with
5172     the proper value at no cost.  In general, there will at least be a regcopy
5173     or a const set.  */
5174  if (cost_base.cost == 0)
5175    cost_base.cost = COSTS_N_INSNS (1);
5176  cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5177
5178  cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5179
5180  /* Prefer the original ivs unless we may gain something by replacing it.
5181     The reason is to make debugging simpler; so this is not relevant for
5182     artificial ivs created by other optimization passes.  */
5183  if (cand->pos != IP_ORIGINAL
5184      || !SSA_NAME_VAR (cand->var_before)
5185      || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5186    cost++;
5187
5188  /* Prefer not to insert statements into latch unless there are some
5189     already (so that we do not create unnecessary jumps).  */
5190  if (cand->pos == IP_END
5191      && empty_block_p (ip_end_pos (data->current_loop)))
5192    cost++;
5193
5194  cand->cost = cost;
5195  cand->cost_step = cost_step;
5196}
5197
5198/* Determines costs of computation of the candidates.  */
5199
5200static void
5201determine_iv_costs (struct ivopts_data *data)
5202{
5203  unsigned i;
5204
5205  if (dump_file && (dump_flags & TDF_DETAILS))
5206    {
5207      fprintf (dump_file, "Candidate costs:\n");
5208      fprintf (dump_file, "  cand\tcost\n");
5209    }
5210
5211  for (i = 0; i < n_iv_cands (data); i++)
5212    {
5213      struct iv_cand *cand = iv_cand (data, i);
5214
5215      determine_iv_cost (data, cand);
5216
5217      if (dump_file && (dump_flags & TDF_DETAILS))
5218	fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5219    }
5220
5221  if (dump_file && (dump_flags & TDF_DETAILS))
5222    fprintf (dump_file, "\n");
5223}
5224
5225/* Calculates cost for having SIZE induction variables.  */
5226
5227static unsigned
5228ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5229{
5230  /* We add size to the cost, so that we prefer eliminating ivs
5231     if possible.  */
5232  return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5233					    data->body_includes_call);
5234}
5235
5236/* For each size of the induction variable set determine the penalty.  */
5237
5238static void
5239determine_set_costs (struct ivopts_data *data)
5240{
5241  unsigned j, n;
5242  gphi *phi;
5243  gphi_iterator psi;
5244  tree op;
5245  struct loop *loop = data->current_loop;
5246  bitmap_iterator bi;
5247
5248  if (dump_file && (dump_flags & TDF_DETAILS))
5249    {
5250      fprintf (dump_file, "Global costs:\n");
5251      fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
5252      fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
5253      fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
5254      fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
5255    }
5256
5257  n = 0;
5258  for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5259    {
5260      phi = psi.phi ();
5261      op = PHI_RESULT (phi);
5262
5263      if (virtual_operand_p (op))
5264	continue;
5265
5266      if (get_iv (data, op))
5267	continue;
5268
5269      n++;
5270    }
5271
5272  EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5273    {
5274      struct version_info *info = ver_info (data, j);
5275
5276      if (info->inv_id && info->has_nonlin_use)
5277	n++;
5278    }
5279
5280  data->regs_used = n;
5281  if (dump_file && (dump_flags & TDF_DETAILS))
5282    fprintf (dump_file, "  regs_used %d\n", n);
5283
5284  if (dump_file && (dump_flags & TDF_DETAILS))
5285    {
5286      fprintf (dump_file, "  cost for size:\n");
5287      fprintf (dump_file, "  ivs\tcost\n");
5288      for (j = 0; j <= 2 * target_avail_regs; j++)
5289	fprintf (dump_file, "  %d\t%d\n", j,
5290		 ivopts_global_cost_for_size (data, j));
5291      fprintf (dump_file, "\n");
5292    }
5293}
5294
5295/* Returns true if A is a cheaper cost pair than B.  */
5296
5297static bool
5298cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5299{
5300  int cmp;
5301
5302  if (!a)
5303    return false;
5304
5305  if (!b)
5306    return true;
5307
5308  cmp = compare_costs (a->cost, b->cost);
5309  if (cmp < 0)
5310    return true;
5311
5312  if (cmp > 0)
5313    return false;
5314
5315  /* In case the costs are the same, prefer the cheaper candidate.  */
5316  if (a->cand->cost < b->cand->cost)
5317    return true;
5318
5319  return false;
5320}
5321
5322
5323/* Returns candidate by that USE is expressed in IVS.  */
5324
5325static struct cost_pair *
5326iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5327{
5328  return ivs->cand_for_use[use->id];
5329}
5330
5331/* Computes the cost field of IVS structure.  */
5332
5333static void
5334iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5335{
5336  comp_cost cost = ivs->cand_use_cost;
5337
5338  cost.cost += ivs->cand_cost;
5339
5340  cost.cost += ivopts_global_cost_for_size (data,
5341                                            ivs->n_regs + ivs->num_used_inv_expr);
5342
5343  ivs->cost = cost;
5344}
5345
5346/* Remove invariants in set INVS to set IVS.  */
5347
5348static void
5349iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5350{
5351  bitmap_iterator bi;
5352  unsigned iid;
5353
5354  if (!invs)
5355    return;
5356
5357  EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5358    {
5359      ivs->n_invariant_uses[iid]--;
5360      if (ivs->n_invariant_uses[iid] == 0)
5361        ivs->n_regs--;
5362    }
5363}
5364
5365/* Set USE not to be expressed by any candidate in IVS.  */
5366
5367static void
5368iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5369		 struct iv_use *use)
5370{
5371  unsigned uid = use->id, cid;
5372  struct cost_pair *cp;
5373
5374  cp = ivs->cand_for_use[uid];
5375  if (!cp)
5376    return;
5377  cid = cp->cand->id;
5378
5379  ivs->bad_uses++;
5380  ivs->cand_for_use[uid] = NULL;
5381  ivs->n_cand_uses[cid]--;
5382
5383  if (ivs->n_cand_uses[cid] == 0)
5384    {
5385      bitmap_clear_bit (ivs->cands, cid);
5386      /* Do not count the pseudocandidates.  */
5387      if (cp->cand->iv)
5388	ivs->n_regs--;
5389      ivs->n_cands--;
5390      ivs->cand_cost -= cp->cand->cost;
5391
5392      iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5393    }
5394
5395  ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5396
5397  iv_ca_set_remove_invariants (ivs, cp->depends_on);
5398
5399  if (cp->inv_expr_id != -1)
5400    {
5401      ivs->used_inv_expr[cp->inv_expr_id]--;
5402      if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5403        ivs->num_used_inv_expr--;
5404    }
5405  iv_ca_recount_cost (data, ivs);
5406}
5407
5408/* Add invariants in set INVS to set IVS.  */
5409
5410static void
5411iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5412{
5413  bitmap_iterator bi;
5414  unsigned iid;
5415
5416  if (!invs)
5417    return;
5418
5419  EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5420    {
5421      ivs->n_invariant_uses[iid]++;
5422      if (ivs->n_invariant_uses[iid] == 1)
5423        ivs->n_regs++;
5424    }
5425}
5426
5427/* Set cost pair for USE in set IVS to CP.  */
5428
5429static void
5430iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5431	      struct iv_use *use, struct cost_pair *cp)
5432{
5433  unsigned uid = use->id, cid;
5434
5435  if (ivs->cand_for_use[uid] == cp)
5436    return;
5437
5438  if (ivs->cand_for_use[uid])
5439    iv_ca_set_no_cp (data, ivs, use);
5440
5441  if (cp)
5442    {
5443      cid = cp->cand->id;
5444
5445      ivs->bad_uses--;
5446      ivs->cand_for_use[uid] = cp;
5447      ivs->n_cand_uses[cid]++;
5448      if (ivs->n_cand_uses[cid] == 1)
5449	{
5450	  bitmap_set_bit (ivs->cands, cid);
5451	  /* Do not count the pseudocandidates.  */
5452	  if (cp->cand->iv)
5453	    ivs->n_regs++;
5454	  ivs->n_cands++;
5455	  ivs->cand_cost += cp->cand->cost;
5456
5457	  iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5458	}
5459
5460      ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5461      iv_ca_set_add_invariants (ivs, cp->depends_on);
5462
5463      if (cp->inv_expr_id != -1)
5464        {
5465          ivs->used_inv_expr[cp->inv_expr_id]++;
5466          if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5467            ivs->num_used_inv_expr++;
5468        }
5469      iv_ca_recount_cost (data, ivs);
5470    }
5471}
5472
5473/* Extend set IVS by expressing USE by some of the candidates in it
5474   if possible.  Consider all important candidates if candidates in
5475   set IVS don't give any result.  */
5476
5477static void
5478iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5479	       struct iv_use *use)
5480{
5481  struct cost_pair *best_cp = NULL, *cp;
5482  bitmap_iterator bi;
5483  unsigned i;
5484  struct iv_cand *cand;
5485
5486  gcc_assert (ivs->upto >= use->id);
5487  ivs->upto++;
5488  ivs->bad_uses++;
5489
5490  EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5491    {
5492      cand = iv_cand (data, i);
5493      cp = get_use_iv_cost (data, use, cand);
5494      if (cheaper_cost_pair (cp, best_cp))
5495	best_cp = cp;
5496    }
5497
5498  if (best_cp == NULL)
5499    {
5500      EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5501	{
5502	  cand = iv_cand (data, i);
5503	  cp = get_use_iv_cost (data, use, cand);
5504	  if (cheaper_cost_pair (cp, best_cp))
5505	    best_cp = cp;
5506	}
5507    }
5508
5509  iv_ca_set_cp (data, ivs, use, best_cp);
5510}
5511
5512/* Get cost for assignment IVS.  */
5513
5514static comp_cost
5515iv_ca_cost (struct iv_ca *ivs)
5516{
5517  /* This was a conditional expression but it triggered a bug in
5518     Sun C 5.5.  */
5519  if (ivs->bad_uses)
5520    return infinite_cost;
5521  else
5522    return ivs->cost;
5523}
5524
5525/* Returns true if all dependences of CP are among invariants in IVS.  */
5526
5527static bool
5528iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5529{
5530  unsigned i;
5531  bitmap_iterator bi;
5532
5533  if (!cp->depends_on)
5534    return true;
5535
5536  EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5537    {
5538      if (ivs->n_invariant_uses[i] == 0)
5539	return false;
5540    }
5541
5542  return true;
5543}
5544
5545/* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5546   it before NEXT_CHANGE.  */
5547
5548static struct iv_ca_delta *
5549iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5550		 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5551{
5552  struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5553
5554  change->use = use;
5555  change->old_cp = old_cp;
5556  change->new_cp = new_cp;
5557  change->next_change = next_change;
5558
5559  return change;
5560}
5561
5562/* Joins two lists of changes L1 and L2.  Destructive -- old lists
5563   are rewritten.  */
5564
5565static struct iv_ca_delta *
5566iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5567{
5568  struct iv_ca_delta *last;
5569
5570  if (!l2)
5571    return l1;
5572
5573  if (!l1)
5574    return l2;
5575
5576  for (last = l1; last->next_change; last = last->next_change)
5577    continue;
5578  last->next_change = l2;
5579
5580  return l1;
5581}
5582
5583/* Reverse the list of changes DELTA, forming the inverse to it.  */
5584
5585static struct iv_ca_delta *
5586iv_ca_delta_reverse (struct iv_ca_delta *delta)
5587{
5588  struct iv_ca_delta *act, *next, *prev = NULL;
5589  struct cost_pair *tmp;
5590
5591  for (act = delta; act; act = next)
5592    {
5593      next = act->next_change;
5594      act->next_change = prev;
5595      prev = act;
5596
5597      tmp = act->old_cp;
5598      act->old_cp = act->new_cp;
5599      act->new_cp = tmp;
5600    }
5601
5602  return prev;
5603}
5604
5605/* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
5606   reverted instead.  */
5607
5608static void
5609iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5610		    struct iv_ca_delta *delta, bool forward)
5611{
5612  struct cost_pair *from, *to;
5613  struct iv_ca_delta *act;
5614
5615  if (!forward)
5616    delta = iv_ca_delta_reverse (delta);
5617
5618  for (act = delta; act; act = act->next_change)
5619    {
5620      from = act->old_cp;
5621      to = act->new_cp;
5622      gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5623      iv_ca_set_cp (data, ivs, act->use, to);
5624    }
5625
5626  if (!forward)
5627    iv_ca_delta_reverse (delta);
5628}
5629
5630/* Returns true if CAND is used in IVS.  */
5631
5632static bool
5633iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5634{
5635  return ivs->n_cand_uses[cand->id] > 0;
5636}
5637
5638/* Returns number of induction variable candidates in the set IVS.  */
5639
5640static unsigned
5641iv_ca_n_cands (struct iv_ca *ivs)
5642{
5643  return ivs->n_cands;
5644}
5645
5646/* Free the list of changes DELTA.  */
5647
5648static void
5649iv_ca_delta_free (struct iv_ca_delta **delta)
5650{
5651  struct iv_ca_delta *act, *next;
5652
5653  for (act = *delta; act; act = next)
5654    {
5655      next = act->next_change;
5656      free (act);
5657    }
5658
5659  *delta = NULL;
5660}
5661
5662/* Allocates new iv candidates assignment.  */
5663
5664static struct iv_ca *
5665iv_ca_new (struct ivopts_data *data)
5666{
5667  struct iv_ca *nw = XNEW (struct iv_ca);
5668
5669  nw->upto = 0;
5670  nw->bad_uses = 0;
5671  nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5672  nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5673  nw->cands = BITMAP_ALLOC (NULL);
5674  nw->n_cands = 0;
5675  nw->n_regs = 0;
5676  nw->cand_use_cost = no_cost;
5677  nw->cand_cost = 0;
5678  nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5679  nw->cost = no_cost;
5680  nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5681  nw->num_used_inv_expr = 0;
5682
5683  return nw;
5684}
5685
5686/* Free memory occupied by the set IVS.  */
5687
5688static void
5689iv_ca_free (struct iv_ca **ivs)
5690{
5691  free ((*ivs)->cand_for_use);
5692  free ((*ivs)->n_cand_uses);
5693  BITMAP_FREE ((*ivs)->cands);
5694  free ((*ivs)->n_invariant_uses);
5695  free ((*ivs)->used_inv_expr);
5696  free (*ivs);
5697  *ivs = NULL;
5698}
5699
5700/* Dumps IVS to FILE.  */
5701
5702static void
5703iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5704{
5705  const char *pref = "  invariants ";
5706  unsigned i;
5707  comp_cost cost = iv_ca_cost (ivs);
5708
5709  fprintf (file, "  cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5710  fprintf (file, "  cand_cost: %d\n  cand_use_cost: %d (complexity %d)\n",
5711           ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5712  bitmap_print (file, ivs->cands, "  candidates: ","\n");
5713
5714   for (i = 0; i < ivs->upto; i++)
5715    {
5716      struct iv_use *use = iv_use (data, i);
5717      struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5718      if (cp)
5719        fprintf (file, "   use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5720                 use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5721      else
5722        fprintf (file, "   use:%d --> ??\n", use->id);
5723    }
5724
5725  for (i = 1; i <= data->max_inv_id; i++)
5726    if (ivs->n_invariant_uses[i])
5727      {
5728	fprintf (file, "%s%d", pref, i);
5729	pref = ", ";
5730      }
5731  fprintf (file, "\n\n");
5732}
5733
5734/* Try changing candidate in IVS to CAND for each use.  Return cost of the
5735   new set, and store differences in DELTA.  Number of induction variables
5736   in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5737   the function will try to find a solution with mimimal iv candidates.  */
5738
5739static comp_cost
5740iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5741	      struct iv_cand *cand, struct iv_ca_delta **delta,
5742	      unsigned *n_ivs, bool min_ncand)
5743{
5744  unsigned i;
5745  comp_cost cost;
5746  struct iv_use *use;
5747  struct cost_pair *old_cp, *new_cp;
5748
5749  *delta = NULL;
5750  for (i = 0; i < ivs->upto; i++)
5751    {
5752      use = iv_use (data, i);
5753      old_cp = iv_ca_cand_for_use (ivs, use);
5754
5755      if (old_cp
5756	  && old_cp->cand == cand)
5757	continue;
5758
5759      new_cp = get_use_iv_cost (data, use, cand);
5760      if (!new_cp)
5761	continue;
5762
5763      if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5764	continue;
5765
5766      if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5767        continue;
5768
5769      *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5770    }
5771
5772  iv_ca_delta_commit (data, ivs, *delta, true);
5773  cost = iv_ca_cost (ivs);
5774  if (n_ivs)
5775    *n_ivs = iv_ca_n_cands (ivs);
5776  iv_ca_delta_commit (data, ivs, *delta, false);
5777
5778  return cost;
5779}
5780
5781/* Try narrowing set IVS by removing CAND.  Return the cost of
5782   the new set and store the differences in DELTA.  START is
5783   the candidate with which we start narrowing.  */
5784
5785static comp_cost
5786iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5787	      struct iv_cand *cand, struct iv_cand *start,
5788	      struct iv_ca_delta **delta)
5789{
5790  unsigned i, ci;
5791  struct iv_use *use;
5792  struct cost_pair *old_cp, *new_cp, *cp;
5793  bitmap_iterator bi;
5794  struct iv_cand *cnd;
5795  comp_cost cost, best_cost, acost;
5796
5797  *delta = NULL;
5798  for (i = 0; i < n_iv_uses (data); i++)
5799    {
5800      use = iv_use (data, i);
5801
5802      old_cp = iv_ca_cand_for_use (ivs, use);
5803      if (old_cp->cand != cand)
5804	continue;
5805
5806      best_cost = iv_ca_cost (ivs);
5807      /* Start narrowing with START.  */
5808      new_cp = get_use_iv_cost (data, use, start);
5809
5810      if (data->consider_all_candidates)
5811	{
5812	  EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5813	    {
5814	      if (ci == cand->id || (start && ci == start->id))
5815		continue;
5816
5817	      cnd = iv_cand (data, ci);
5818
5819	      cp = get_use_iv_cost (data, use, cnd);
5820	      if (!cp)
5821		continue;
5822
5823	      iv_ca_set_cp (data, ivs, use, cp);
5824	      acost = iv_ca_cost (ivs);
5825
5826	      if (compare_costs (acost, best_cost) < 0)
5827		{
5828		  best_cost = acost;
5829		  new_cp = cp;
5830		}
5831	    }
5832	}
5833      else
5834	{
5835	  EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5836	    {
5837	      if (ci == cand->id || (start && ci == start->id))
5838		continue;
5839
5840	      cnd = iv_cand (data, ci);
5841
5842	      cp = get_use_iv_cost (data, use, cnd);
5843	      if (!cp)
5844		continue;
5845
5846	      iv_ca_set_cp (data, ivs, use, cp);
5847	      acost = iv_ca_cost (ivs);
5848
5849	      if (compare_costs (acost, best_cost) < 0)
5850		{
5851		  best_cost = acost;
5852		  new_cp = cp;
5853		}
5854	    }
5855	}
5856      /* Restore to old cp for use.  */
5857      iv_ca_set_cp (data, ivs, use, old_cp);
5858
5859      if (!new_cp)
5860	{
5861	  iv_ca_delta_free (delta);
5862	  return infinite_cost;
5863	}
5864
5865      *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5866    }
5867
5868  iv_ca_delta_commit (data, ivs, *delta, true);
5869  cost = iv_ca_cost (ivs);
5870  iv_ca_delta_commit (data, ivs, *delta, false);
5871
5872  return cost;
5873}
5874
5875/* Try optimizing the set of candidates IVS by removing candidates different
5876   from to EXCEPT_CAND from it.  Return cost of the new set, and store
5877   differences in DELTA.  */
5878
5879static comp_cost
5880iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5881	     struct iv_cand *except_cand, struct iv_ca_delta **delta)
5882{
5883  bitmap_iterator bi;
5884  struct iv_ca_delta *act_delta, *best_delta;
5885  unsigned i;
5886  comp_cost best_cost, acost;
5887  struct iv_cand *cand;
5888
5889  best_delta = NULL;
5890  best_cost = iv_ca_cost (ivs);
5891
5892  EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5893    {
5894      cand = iv_cand (data, i);
5895
5896      if (cand == except_cand)
5897	continue;
5898
5899      acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
5900
5901      if (compare_costs (acost, best_cost) < 0)
5902	{
5903	  best_cost = acost;
5904	  iv_ca_delta_free (&best_delta);
5905	  best_delta = act_delta;
5906	}
5907      else
5908	iv_ca_delta_free (&act_delta);
5909    }
5910
5911  if (!best_delta)
5912    {
5913      *delta = NULL;
5914      return best_cost;
5915    }
5916
5917  /* Recurse to possibly remove other unnecessary ivs.  */
5918  iv_ca_delta_commit (data, ivs, best_delta, true);
5919  best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5920  iv_ca_delta_commit (data, ivs, best_delta, false);
5921  *delta = iv_ca_delta_join (best_delta, *delta);
5922  return best_cost;
5923}
5924
5925/* Check if CAND_IDX is a candidate other than OLD_CAND and has
5926   cheaper local cost for USE than BEST_CP.  Return pointer to
5927   the corresponding cost_pair, otherwise just return BEST_CP.  */
5928
5929static struct cost_pair*
5930cheaper_cost_with_cand (struct ivopts_data *data, struct iv_use *use,
5931			unsigned int cand_idx, struct iv_cand *old_cand,
5932			struct cost_pair *best_cp)
5933{
5934  struct iv_cand *cand;
5935  struct cost_pair *cp;
5936
5937  gcc_assert (old_cand != NULL && best_cp != NULL);
5938  if (cand_idx == old_cand->id)
5939    return best_cp;
5940
5941  cand = iv_cand (data, cand_idx);
5942  cp = get_use_iv_cost (data, use, cand);
5943  if (cp != NULL && cheaper_cost_pair (cp, best_cp))
5944    return cp;
5945
5946  return best_cp;
5947}
5948
5949/* Try breaking local optimal fixed-point for IVS by replacing candidates
5950   which are used by more than one iv uses.  For each of those candidates,
5951   this function tries to represent iv uses under that candidate using
5952   other ones with lower local cost, then tries to prune the new set.
5953   If the new set has lower cost, It returns the new cost after recording
5954   candidate replacement in list DELTA.  */
5955
5956static comp_cost
5957iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
5958	       struct iv_ca_delta **delta)
5959{
5960  bitmap_iterator bi, bj;
5961  unsigned int i, j, k;
5962  struct iv_use *use;
5963  struct iv_cand *cand;
5964  comp_cost orig_cost, acost;
5965  struct iv_ca_delta *act_delta, *tmp_delta;
5966  struct cost_pair *old_cp, *best_cp = NULL;
5967
5968  *delta = NULL;
5969  orig_cost = iv_ca_cost (ivs);
5970
5971  EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5972    {
5973      if (ivs->n_cand_uses[i] == 1
5974	  || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
5975	continue;
5976
5977      cand = iv_cand (data, i);
5978
5979      act_delta = NULL;
5980      /*  Represent uses under current candidate using other ones with
5981	  lower local cost.  */
5982      for (j = 0; j < ivs->upto; j++)
5983	{
5984	  use = iv_use (data, j);
5985	  old_cp = iv_ca_cand_for_use (ivs, use);
5986
5987	  if (old_cp->cand != cand)
5988	    continue;
5989
5990	  best_cp = old_cp;
5991	  if (data->consider_all_candidates)
5992	    for (k = 0; k < n_iv_cands (data); k++)
5993	      best_cp = cheaper_cost_with_cand (data, use, k,
5994						old_cp->cand, best_cp);
5995	  else
5996	    EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, k, bj)
5997	      best_cp = cheaper_cost_with_cand (data, use, k,
5998						old_cp->cand, best_cp);
5999
6000	  if (best_cp == old_cp)
6001	    continue;
6002
6003	  act_delta = iv_ca_delta_add (use, old_cp, best_cp, act_delta);
6004	}
6005      /* No need for further prune.  */
6006      if (!act_delta)
6007	continue;
6008
6009      /* Prune the new candidate set.  */
6010      iv_ca_delta_commit (data, ivs, act_delta, true);
6011      acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6012      iv_ca_delta_commit (data, ivs, act_delta, false);
6013      act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6014
6015      if (compare_costs (acost, orig_cost) < 0)
6016	{
6017	  *delta = act_delta;
6018	  return acost;
6019	}
6020      else
6021	iv_ca_delta_free (&act_delta);
6022    }
6023
6024  return orig_cost;
6025}
6026
6027/* Tries to extend the sets IVS in the best possible way in order
6028   to express the USE.  If ORIGINALP is true, prefer candidates from
6029   the original set of IVs, otherwise favor important candidates not
6030   based on any memory object.  */
6031
6032static bool
6033try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6034		  struct iv_use *use, bool originalp)
6035{
6036  comp_cost best_cost, act_cost;
6037  unsigned i;
6038  bitmap_iterator bi;
6039  struct iv_cand *cand;
6040  struct iv_ca_delta *best_delta = NULL, *act_delta;
6041  struct cost_pair *cp;
6042
6043  iv_ca_add_use (data, ivs, use);
6044  best_cost = iv_ca_cost (ivs);
6045  cp = iv_ca_cand_for_use (ivs, use);
6046  if (cp)
6047    {
6048      best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
6049      iv_ca_set_no_cp (data, ivs, use);
6050    }
6051
6052  /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
6053     first try important candidates not based on any memory object.  Only if
6054     this fails, try the specific ones.  Rationale -- in loops with many
6055     variables the best choice often is to use just one generic biv.  If we
6056     added here many ivs specific to the uses, the optimization algorithm later
6057     would be likely to get stuck in a local minimum, thus causing us to create
6058     too many ivs.  The approach from few ivs to more seems more likely to be
6059     successful -- starting from few ivs, replacing an expensive use by a
6060     specific iv should always be a win.  */
6061  EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6062    {
6063      cand = iv_cand (data, i);
6064
6065      if (originalp && cand->pos !=IP_ORIGINAL)
6066	continue;
6067
6068      if (!originalp && cand->iv->base_object != NULL_TREE)
6069	continue;
6070
6071      if (iv_ca_cand_used_p (ivs, cand))
6072        continue;
6073
6074      cp = get_use_iv_cost (data, use, cand);
6075      if (!cp)
6076	continue;
6077
6078      iv_ca_set_cp (data, ivs, use, cp);
6079      act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6080                               true);
6081      iv_ca_set_no_cp (data, ivs, use);
6082      act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
6083
6084      if (compare_costs (act_cost, best_cost) < 0)
6085	{
6086	  best_cost = act_cost;
6087
6088	  iv_ca_delta_free (&best_delta);
6089	  best_delta = act_delta;
6090	}
6091      else
6092	iv_ca_delta_free (&act_delta);
6093    }
6094
6095  if (infinite_cost_p (best_cost))
6096    {
6097      for (i = 0; i < use->n_map_members; i++)
6098	{
6099	  cp = use->cost_map + i;
6100	  cand = cp->cand;
6101	  if (!cand)
6102	    continue;
6103
6104	  /* Already tried this.  */
6105	  if (cand->important)
6106	    {
6107	      if (originalp && cand->pos == IP_ORIGINAL)
6108		continue;
6109	      if (!originalp && cand->iv->base_object == NULL_TREE)
6110		continue;
6111	    }
6112
6113	  if (iv_ca_cand_used_p (ivs, cand))
6114	    continue;
6115
6116	  act_delta = NULL;
6117	  iv_ca_set_cp (data, ivs, use, cp);
6118	  act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6119	  iv_ca_set_no_cp (data, ivs, use);
6120	  act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
6121				       cp, act_delta);
6122
6123	  if (compare_costs (act_cost, best_cost) < 0)
6124	    {
6125	      best_cost = act_cost;
6126
6127	      if (best_delta)
6128		iv_ca_delta_free (&best_delta);
6129	      best_delta = act_delta;
6130	    }
6131	  else
6132	    iv_ca_delta_free (&act_delta);
6133	}
6134    }
6135
6136  iv_ca_delta_commit (data, ivs, best_delta, true);
6137  iv_ca_delta_free (&best_delta);
6138
6139  return !infinite_cost_p (best_cost);
6140}
6141
6142/* Finds an initial assignment of candidates to uses.  */
6143
6144static struct iv_ca *
6145get_initial_solution (struct ivopts_data *data, bool originalp)
6146{
6147  struct iv_ca *ivs = iv_ca_new (data);
6148  unsigned i;
6149
6150  for (i = 0; i < n_iv_uses (data); i++)
6151    if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
6152      {
6153	iv_ca_free (&ivs);
6154	return NULL;
6155      }
6156
6157  return ivs;
6158}
6159
6160/* Tries to improve set of induction variables IVS.  TRY_REPLACE_P
6161   points to a bool variable, this function tries to break local
6162   optimal fixed-point by replacing candidates in IVS if it's true.  */
6163
6164static bool
6165try_improve_iv_set (struct ivopts_data *data,
6166		    struct iv_ca *ivs, bool *try_replace_p)
6167{
6168  unsigned i, n_ivs;
6169  comp_cost acost, best_cost = iv_ca_cost (ivs);
6170  struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6171  struct iv_cand *cand;
6172
6173  /* Try extending the set of induction variables by one.  */
6174  for (i = 0; i < n_iv_cands (data); i++)
6175    {
6176      cand = iv_cand (data, i);
6177
6178      if (iv_ca_cand_used_p (ivs, cand))
6179	continue;
6180
6181      acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6182      if (!act_delta)
6183	continue;
6184
6185      /* If we successfully added the candidate and the set is small enough,
6186	 try optimizing it by removing other candidates.  */
6187      if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6188      	{
6189	  iv_ca_delta_commit (data, ivs, act_delta, true);
6190	  acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6191	  iv_ca_delta_commit (data, ivs, act_delta, false);
6192	  act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6193	}
6194
6195      if (compare_costs (acost, best_cost) < 0)
6196	{
6197	  best_cost = acost;
6198	  iv_ca_delta_free (&best_delta);
6199	  best_delta = act_delta;
6200	}
6201      else
6202	iv_ca_delta_free (&act_delta);
6203    }
6204
6205  if (!best_delta)
6206    {
6207      /* Try removing the candidates from the set instead.  */
6208      best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6209
6210      if (!best_delta && *try_replace_p)
6211	{
6212	  *try_replace_p = false;
6213	  /* So far candidate selecting algorithm tends to choose fewer IVs
6214	     so that it can handle cases in which loops have many variables
6215	     but the best choice is often to use only one general biv.  One
6216	     weakness is it can't handle opposite cases, in which different
6217	     candidates should be chosen with respect to each use.  To solve
6218	     the problem, we replace candidates in a manner described by the
6219	     comments of iv_ca_replace, thus give general algorithm a chance
6220	     to break local optimal fixed-point in these cases.  */
6221	  best_cost = iv_ca_replace (data, ivs, &best_delta);
6222	}
6223
6224      if (!best_delta)
6225	return false;
6226    }
6227
6228  iv_ca_delta_commit (data, ivs, best_delta, true);
6229  gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6230  iv_ca_delta_free (&best_delta);
6231  return true;
6232}
6233
6234/* Attempts to find the optimal set of induction variables.  We do simple
6235   greedy heuristic -- we try to replace at most one candidate in the selected
6236   solution and remove the unused ivs while this improves the cost.  */
6237
6238static struct iv_ca *
6239find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6240{
6241  struct iv_ca *set;
6242  bool try_replace_p = true;
6243
6244  /* Get the initial solution.  */
6245  set = get_initial_solution (data, originalp);
6246  if (!set)
6247    {
6248      if (dump_file && (dump_flags & TDF_DETAILS))
6249	fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6250      return NULL;
6251    }
6252
6253  if (dump_file && (dump_flags & TDF_DETAILS))
6254    {
6255      fprintf (dump_file, "Initial set of candidates:\n");
6256      iv_ca_dump (data, dump_file, set);
6257    }
6258
6259  while (try_improve_iv_set (data, set, &try_replace_p))
6260    {
6261      if (dump_file && (dump_flags & TDF_DETAILS))
6262	{
6263	  fprintf (dump_file, "Improved to:\n");
6264	  iv_ca_dump (data, dump_file, set);
6265	}
6266    }
6267
6268  return set;
6269}
6270
6271static struct iv_ca *
6272find_optimal_iv_set (struct ivopts_data *data)
6273{
6274  unsigned i;
6275  struct iv_ca *set, *origset;
6276  struct iv_use *use;
6277  comp_cost cost, origcost;
6278
6279  /* Determine the cost based on a strategy that starts with original IVs,
6280     and try again using a strategy that prefers candidates not based
6281     on any IVs.  */
6282  origset = find_optimal_iv_set_1 (data, true);
6283  set = find_optimal_iv_set_1 (data, false);
6284
6285  if (!origset && !set)
6286    return NULL;
6287
6288  origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6289  cost = set ? iv_ca_cost (set) : infinite_cost;
6290
6291  if (dump_file && (dump_flags & TDF_DETAILS))
6292    {
6293      fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6294	       origcost.cost, origcost.complexity);
6295      fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6296	       cost.cost, cost.complexity);
6297    }
6298
6299  /* Choose the one with the best cost.  */
6300  if (compare_costs (origcost, cost) <= 0)
6301    {
6302      if (set)
6303	iv_ca_free (&set);
6304      set = origset;
6305    }
6306  else if (origset)
6307    iv_ca_free (&origset);
6308
6309  for (i = 0; i < n_iv_uses (data); i++)
6310    {
6311      use = iv_use (data, i);
6312      use->selected = iv_ca_cand_for_use (set, use)->cand;
6313    }
6314
6315  return set;
6316}
6317
6318/* Creates a new induction variable corresponding to CAND.  */
6319
6320static void
6321create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6322{
6323  gimple_stmt_iterator incr_pos;
6324  tree base;
6325  bool after = false;
6326
6327  if (!cand->iv)
6328    return;
6329
6330  switch (cand->pos)
6331    {
6332    case IP_NORMAL:
6333      incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6334      break;
6335
6336    case IP_END:
6337      incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6338      after = true;
6339      break;
6340
6341    case IP_AFTER_USE:
6342      after = true;
6343      /* fall through */
6344    case IP_BEFORE_USE:
6345      incr_pos = gsi_for_stmt (cand->incremented_at);
6346      break;
6347
6348    case IP_ORIGINAL:
6349      /* Mark that the iv is preserved.  */
6350      name_info (data, cand->var_before)->preserve_biv = true;
6351      name_info (data, cand->var_after)->preserve_biv = true;
6352
6353      /* Rewrite the increment so that it uses var_before directly.  */
6354      find_interesting_uses_op (data, cand->var_after)->selected = cand;
6355      return;
6356    }
6357
6358  gimple_add_tmp_var (cand->var_before);
6359
6360  base = unshare_expr (cand->iv->base);
6361
6362  create_iv (base, unshare_expr (cand->iv->step),
6363	     cand->var_before, data->current_loop,
6364	     &incr_pos, after, &cand->var_before, &cand->var_after);
6365}
6366
6367/* Creates new induction variables described in SET.  */
6368
6369static void
6370create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6371{
6372  unsigned i;
6373  struct iv_cand *cand;
6374  bitmap_iterator bi;
6375
6376  EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6377    {
6378      cand = iv_cand (data, i);
6379      create_new_iv (data, cand);
6380    }
6381
6382  if (dump_file && (dump_flags & TDF_DETAILS))
6383    {
6384      fprintf (dump_file, "Selected IV set for loop %d",
6385	       data->current_loop->num);
6386      if (data->loop_loc != UNKNOWN_LOCATION)
6387	fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
6388		 LOCATION_LINE (data->loop_loc));
6389      fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
6390      EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6391        {
6392          cand = iv_cand (data, i);
6393          dump_cand (dump_file, cand);
6394        }
6395      fprintf (dump_file, "\n");
6396    }
6397}
6398
6399/* Rewrites USE (definition of iv used in a nonlinear expression)
6400   using candidate CAND.  */
6401
6402static void
6403rewrite_use_nonlinear_expr (struct ivopts_data *data,
6404			    struct iv_use *use, struct iv_cand *cand)
6405{
6406  tree comp;
6407  tree op, tgt;
6408  gassign *ass;
6409  gimple_stmt_iterator bsi;
6410
6411  /* An important special case -- if we are asked to express value of
6412     the original iv by itself, just exit; there is no need to
6413     introduce a new computation (that might also need casting the
6414     variable to unsigned and back).  */
6415  if (cand->pos == IP_ORIGINAL
6416      && cand->incremented_at == use->stmt)
6417    {
6418      enum tree_code stmt_code;
6419
6420      gcc_assert (is_gimple_assign (use->stmt));
6421      gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6422
6423      /* Check whether we may leave the computation unchanged.
6424	 This is the case only if it does not rely on other
6425	 computations in the loop -- otherwise, the computation
6426	 we rely upon may be removed in remove_unused_ivs,
6427	 thus leading to ICE.  */
6428      stmt_code = gimple_assign_rhs_code (use->stmt);
6429      if (stmt_code == PLUS_EXPR
6430	  || stmt_code == MINUS_EXPR
6431	  || stmt_code == POINTER_PLUS_EXPR)
6432	{
6433	  if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6434	    op = gimple_assign_rhs2 (use->stmt);
6435	  else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6436	    op = gimple_assign_rhs1 (use->stmt);
6437	  else
6438	    op = NULL_TREE;
6439	}
6440      else
6441	op = NULL_TREE;
6442
6443      if (op && expr_invariant_in_loop_p (data->current_loop, op))
6444	return;
6445    }
6446
6447  comp = get_computation (data->current_loop, use, cand);
6448  gcc_assert (comp != NULL_TREE);
6449
6450  switch (gimple_code (use->stmt))
6451    {
6452    case GIMPLE_PHI:
6453      tgt = PHI_RESULT (use->stmt);
6454
6455      /* If we should keep the biv, do not replace it.  */
6456      if (name_info (data, tgt)->preserve_biv)
6457	return;
6458
6459      bsi = gsi_after_labels (gimple_bb (use->stmt));
6460      break;
6461
6462    case GIMPLE_ASSIGN:
6463      tgt = gimple_assign_lhs (use->stmt);
6464      bsi = gsi_for_stmt (use->stmt);
6465      break;
6466
6467    default:
6468      gcc_unreachable ();
6469    }
6470
6471  if (!valid_gimple_rhs_p (comp)
6472      || (gimple_code (use->stmt) != GIMPLE_PHI
6473	  /* We can't allow re-allocating the stmt as it might be pointed
6474	     to still.  */
6475	  && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6476	      >= gimple_num_ops (gsi_stmt (bsi)))))
6477    {
6478      comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6479				       true, GSI_SAME_STMT);
6480      if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6481	{
6482	  duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6483	  /* As this isn't a plain copy we have to reset alignment
6484	     information.  */
6485	  if (SSA_NAME_PTR_INFO (comp))
6486	    mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6487	}
6488    }
6489
6490  if (gimple_code (use->stmt) == GIMPLE_PHI)
6491    {
6492      ass = gimple_build_assign (tgt, comp);
6493      gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6494
6495      bsi = gsi_for_stmt (use->stmt);
6496      remove_phi_node (&bsi, false);
6497    }
6498  else
6499    {
6500      gimple_assign_set_rhs_from_tree (&bsi, comp);
6501      use->stmt = gsi_stmt (bsi);
6502    }
6503}
6504
6505/* Performs a peephole optimization to reorder the iv update statement with
6506   a mem ref to enable instruction combining in later phases. The mem ref uses
6507   the iv value before the update, so the reordering transformation requires
6508   adjustment of the offset. CAND is the selected IV_CAND.
6509
6510   Example:
6511
6512   t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
6513   iv2 = iv1 + 1;
6514
6515   if (t < val)      (1)
6516     goto L;
6517   goto Head;
6518
6519
6520   directly propagating t over to (1) will introduce overlapping live range
6521   thus increase register pressure. This peephole transform it into:
6522
6523
6524   iv2 = iv1 + 1;
6525   t = MEM_REF (base, iv2, 8, 8);
6526   if (t < val)
6527     goto L;
6528   goto Head;
6529*/
6530
6531static void
6532adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6533{
6534  tree var_after;
6535  gimple iv_update, stmt;
6536  basic_block bb;
6537  gimple_stmt_iterator gsi, gsi_iv;
6538
6539  if (cand->pos != IP_NORMAL)
6540    return;
6541
6542  var_after = cand->var_after;
6543  iv_update = SSA_NAME_DEF_STMT (var_after);
6544
6545  bb = gimple_bb (iv_update);
6546  gsi = gsi_last_nondebug_bb (bb);
6547  stmt = gsi_stmt (gsi);
6548
6549  /* Only handle conditional statement for now.  */
6550  if (gimple_code (stmt) != GIMPLE_COND)
6551    return;
6552
6553  gsi_prev_nondebug (&gsi);
6554  stmt = gsi_stmt (gsi);
6555  if (stmt != iv_update)
6556    return;
6557
6558  gsi_prev_nondebug (&gsi);
6559  if (gsi_end_p (gsi))
6560    return;
6561
6562  stmt = gsi_stmt (gsi);
6563  if (gimple_code (stmt) != GIMPLE_ASSIGN)
6564    return;
6565
6566  if (stmt != use->stmt)
6567    return;
6568
6569  if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6570    return;
6571
6572  if (dump_file && (dump_flags & TDF_DETAILS))
6573    {
6574      fprintf (dump_file, "Reordering \n");
6575      print_gimple_stmt (dump_file, iv_update, 0, 0);
6576      print_gimple_stmt (dump_file, use->stmt, 0, 0);
6577      fprintf (dump_file, "\n");
6578    }
6579
6580  gsi = gsi_for_stmt (use->stmt);
6581  gsi_iv = gsi_for_stmt (iv_update);
6582  gsi_move_before (&gsi_iv, &gsi);
6583
6584  cand->pos = IP_BEFORE_USE;
6585  cand->incremented_at = use->stmt;
6586}
6587
6588/* Rewrites USE (address that is an iv) using candidate CAND.  */
6589
6590static void
6591rewrite_use_address (struct ivopts_data *data,
6592		     struct iv_use *use, struct iv_cand *cand)
6593{
6594  aff_tree aff;
6595  gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6596  tree base_hint = NULL_TREE;
6597  tree ref, iv;
6598  bool ok;
6599
6600  adjust_iv_update_pos (cand, use);
6601  ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6602  gcc_assert (ok);
6603  unshare_aff_combination (&aff);
6604
6605  /* To avoid undefined overflow problems, all IV candidates use unsigned
6606     integer types.  The drawback is that this makes it impossible for
6607     create_mem_ref to distinguish an IV that is based on a memory object
6608     from one that represents simply an offset.
6609
6610     To work around this problem, we pass a hint to create_mem_ref that
6611     indicates which variable (if any) in aff is an IV based on a memory
6612     object.  Note that we only consider the candidate.  If this is not
6613     based on an object, the base of the reference is in some subexpression
6614     of the use -- but these will use pointer types, so they are recognized
6615     by the create_mem_ref heuristics anyway.  */
6616  if (cand->iv->base_object)
6617    base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6618
6619  iv = var_at_stmt (data->current_loop, cand, use->stmt);
6620  ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6621			reference_alias_ptr_type (*use->op_p),
6622			iv, base_hint, data->speed);
6623  copy_ref_info (ref, *use->op_p);
6624  *use->op_p = ref;
6625}
6626
6627/* Rewrites USE (the condition such that one of the arguments is an iv) using
6628   candidate CAND.  */
6629
6630static void
6631rewrite_use_compare (struct ivopts_data *data,
6632		     struct iv_use *use, struct iv_cand *cand)
6633{
6634  tree comp, *var_p, op, bound;
6635  gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6636  enum tree_code compare;
6637  struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6638  bool ok;
6639
6640  bound = cp->value;
6641  if (bound)
6642    {
6643      tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6644      tree var_type = TREE_TYPE (var);
6645      gimple_seq stmts;
6646
6647      if (dump_file && (dump_flags & TDF_DETAILS))
6648        {
6649          fprintf (dump_file, "Replacing exit test: ");
6650          print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6651        }
6652      compare = cp->comp;
6653      bound = unshare_expr (fold_convert (var_type, bound));
6654      op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6655      if (stmts)
6656	gsi_insert_seq_on_edge_immediate (
6657		loop_preheader_edge (data->current_loop),
6658		stmts);
6659
6660      gcond *cond_stmt = as_a <gcond *> (use->stmt);
6661      gimple_cond_set_lhs (cond_stmt, var);
6662      gimple_cond_set_code (cond_stmt, compare);
6663      gimple_cond_set_rhs (cond_stmt, op);
6664      return;
6665    }
6666
6667  /* The induction variable elimination failed; just express the original
6668     giv.  */
6669  comp = get_computation (data->current_loop, use, cand);
6670  gcc_assert (comp != NULL_TREE);
6671
6672  ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6673  gcc_assert (ok);
6674
6675  *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6676				     true, GSI_SAME_STMT);
6677}
6678
6679/* Rewrites USE using candidate CAND.  */
6680
6681static void
6682rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6683{
6684  switch (use->type)
6685    {
6686      case USE_NONLINEAR_EXPR:
6687	rewrite_use_nonlinear_expr (data, use, cand);
6688	break;
6689
6690      case USE_ADDRESS:
6691	rewrite_use_address (data, use, cand);
6692	break;
6693
6694      case USE_COMPARE:
6695	rewrite_use_compare (data, use, cand);
6696	break;
6697
6698      default:
6699	gcc_unreachable ();
6700    }
6701
6702  update_stmt (use->stmt);
6703}
6704
6705/* Rewrite the uses using the selected induction variables.  */
6706
6707static void
6708rewrite_uses (struct ivopts_data *data)
6709{
6710  unsigned i;
6711  struct iv_cand *cand;
6712  struct iv_use *use;
6713
6714  for (i = 0; i < n_iv_uses (data); i++)
6715    {
6716      use = iv_use (data, i);
6717      cand = use->selected;
6718      gcc_assert (cand);
6719
6720      rewrite_use (data, use, cand);
6721    }
6722}
6723
6724/* Removes the ivs that are not used after rewriting.  */
6725
6726static void
6727remove_unused_ivs (struct ivopts_data *data)
6728{
6729  unsigned j;
6730  bitmap_iterator bi;
6731  bitmap toremove = BITMAP_ALLOC (NULL);
6732
6733  /* Figure out an order in which to release SSA DEFs so that we don't
6734     release something that we'd have to propagate into a debug stmt
6735     afterwards.  */
6736  EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6737    {
6738      struct version_info *info;
6739
6740      info = ver_info (data, j);
6741      if (info->iv
6742	  && !integer_zerop (info->iv->step)
6743	  && !info->inv_id
6744	  && !info->iv->have_use_for
6745	  && !info->preserve_biv)
6746	{
6747	  bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6748
6749	  tree def = info->iv->ssa_name;
6750
6751	  if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
6752	    {
6753	      imm_use_iterator imm_iter;
6754	      use_operand_p use_p;
6755	      gimple stmt;
6756	      int count = 0;
6757
6758	      FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6759		{
6760		  if (!gimple_debug_bind_p (stmt))
6761		    continue;
6762
6763		  /* We just want to determine whether to do nothing
6764		     (count == 0), to substitute the computed
6765		     expression into a single use of the SSA DEF by
6766		     itself (count == 1), or to use a debug temp
6767		     because the SSA DEF is used multiple times or as
6768		     part of a larger expression (count > 1). */
6769		  count++;
6770		  if (gimple_debug_bind_get_value (stmt) != def)
6771		    count++;
6772
6773		  if (count > 1)
6774		    BREAK_FROM_IMM_USE_STMT (imm_iter);
6775		}
6776
6777	      if (!count)
6778		continue;
6779
6780	      struct iv_use dummy_use;
6781	      struct iv_cand *best_cand = NULL, *cand;
6782	      unsigned i, best_pref = 0, cand_pref;
6783
6784	      memset (&dummy_use, 0, sizeof (dummy_use));
6785	      dummy_use.iv = info->iv;
6786	      for (i = 0; i < n_iv_uses (data) && i < 64; i++)
6787		{
6788		  cand = iv_use (data, i)->selected;
6789		  if (cand == best_cand)
6790		    continue;
6791		  cand_pref = operand_equal_p (cand->iv->step,
6792					       info->iv->step, 0)
6793		    ? 4 : 0;
6794		  cand_pref
6795		    += TYPE_MODE (TREE_TYPE (cand->iv->base))
6796		    == TYPE_MODE (TREE_TYPE (info->iv->base))
6797		    ? 2 : 0;
6798		  cand_pref
6799		    += TREE_CODE (cand->iv->base) == INTEGER_CST
6800		    ? 1 : 0;
6801		  if (best_cand == NULL || best_pref < cand_pref)
6802		    {
6803		      best_cand = cand;
6804		      best_pref = cand_pref;
6805		    }
6806		}
6807
6808	      if (!best_cand)
6809		continue;
6810
6811	      tree comp = get_computation_at (data->current_loop,
6812					      &dummy_use, best_cand,
6813					      SSA_NAME_DEF_STMT (def));
6814	      if (!comp)
6815		continue;
6816
6817	      if (count > 1)
6818		{
6819		  tree vexpr = make_node (DEBUG_EXPR_DECL);
6820		  DECL_ARTIFICIAL (vexpr) = 1;
6821		  TREE_TYPE (vexpr) = TREE_TYPE (comp);
6822		  if (SSA_NAME_VAR (def))
6823		    DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
6824		  else
6825		    DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
6826		  gdebug *def_temp
6827		    = gimple_build_debug_bind (vexpr, comp, NULL);
6828		  gimple_stmt_iterator gsi;
6829
6830		  if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
6831		    gsi = gsi_after_labels (gimple_bb
6832					    (SSA_NAME_DEF_STMT (def)));
6833		  else
6834		    gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
6835
6836		  gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
6837		  comp = vexpr;
6838		}
6839
6840	      FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6841		{
6842		  if (!gimple_debug_bind_p (stmt))
6843		    continue;
6844
6845		  FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
6846		    SET_USE (use_p, comp);
6847
6848		  update_stmt (stmt);
6849		}
6850	    }
6851	}
6852    }
6853
6854  release_defs_bitset (toremove);
6855
6856  BITMAP_FREE (toremove);
6857}
6858
6859/* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6860   for hash_map::traverse.  */
6861
6862bool
6863free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
6864{
6865  free (value);
6866  return true;
6867}
6868
6869/* Frees data allocated by the optimization of a single loop.  */
6870
6871static void
6872free_loop_data (struct ivopts_data *data)
6873{
6874  unsigned i, j;
6875  bitmap_iterator bi;
6876  tree obj;
6877
6878  if (data->niters)
6879    {
6880      data->niters->traverse<void *, free_tree_niter_desc> (NULL);
6881      delete data->niters;
6882      data->niters = NULL;
6883    }
6884
6885  EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6886    {
6887      struct version_info *info;
6888
6889      info = ver_info (data, i);
6890      free (info->iv);
6891      info->iv = NULL;
6892      info->has_nonlin_use = false;
6893      info->preserve_biv = false;
6894      info->inv_id = 0;
6895    }
6896  bitmap_clear (data->relevant);
6897  bitmap_clear (data->important_candidates);
6898
6899  for (i = 0; i < n_iv_uses (data); i++)
6900    {
6901      struct iv_use *use = iv_use (data, i);
6902
6903      free (use->iv);
6904      BITMAP_FREE (use->related_cands);
6905      for (j = 0; j < use->n_map_members; j++)
6906	if (use->cost_map[j].depends_on)
6907	  BITMAP_FREE (use->cost_map[j].depends_on);
6908      free (use->cost_map);
6909      free (use);
6910    }
6911  data->iv_uses.truncate (0);
6912
6913  for (i = 0; i < n_iv_cands (data); i++)
6914    {
6915      struct iv_cand *cand = iv_cand (data, i);
6916
6917      free (cand->iv);
6918      if (cand->depends_on)
6919	BITMAP_FREE (cand->depends_on);
6920      free (cand);
6921    }
6922  data->iv_candidates.truncate (0);
6923
6924  if (data->version_info_size < num_ssa_names)
6925    {
6926      data->version_info_size = 2 * num_ssa_names;
6927      free (data->version_info);
6928      data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6929    }
6930
6931  data->max_inv_id = 0;
6932
6933  FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
6934    SET_DECL_RTL (obj, NULL_RTX);
6935
6936  decl_rtl_to_reset.truncate (0);
6937
6938  data->inv_expr_tab->empty ();
6939  data->inv_expr_id = 0;
6940}
6941
6942/* Finalizes data structures used by the iv optimization pass.  LOOPS is the
6943   loop tree.  */
6944
6945static void
6946tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6947{
6948  free_loop_data (data);
6949  free (data->version_info);
6950  BITMAP_FREE (data->relevant);
6951  BITMAP_FREE (data->important_candidates);
6952
6953  decl_rtl_to_reset.release ();
6954  data->iv_uses.release ();
6955  data->iv_candidates.release ();
6956  delete data->inv_expr_tab;
6957  data->inv_expr_tab = NULL;
6958  free_affine_expand_cache (&data->name_expansion_cache);
6959}
6960
6961/* Returns true if the loop body BODY includes any function calls.  */
6962
6963static bool
6964loop_body_includes_call (basic_block *body, unsigned num_nodes)
6965{
6966  gimple_stmt_iterator gsi;
6967  unsigned i;
6968
6969  for (i = 0; i < num_nodes; i++)
6970    for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6971      {
6972	gimple stmt = gsi_stmt (gsi);
6973	if (is_gimple_call (stmt)
6974	    && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6975	  return true;
6976      }
6977  return false;
6978}
6979
6980/* Optimizes the LOOP.  Returns true if anything changed.  */
6981
6982static bool
6983tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6984{
6985  bool changed = false;
6986  struct iv_ca *iv_ca;
6987  edge exit = single_dom_exit (loop);
6988  basic_block *body;
6989
6990  gcc_assert (!data->niters);
6991  data->current_loop = loop;
6992  data->loop_loc = find_loop_location (loop);
6993  data->speed = optimize_loop_for_speed_p (loop);
6994
6995  if (dump_file && (dump_flags & TDF_DETAILS))
6996    {
6997      fprintf (dump_file, "Processing loop %d", loop->num);
6998      if (data->loop_loc != UNKNOWN_LOCATION)
6999	fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7000		 LOCATION_LINE (data->loop_loc));
7001      fprintf (dump_file, "\n");
7002
7003      if (exit)
7004	{
7005	  fprintf (dump_file, "  single exit %d -> %d, exit condition ",
7006		   exit->src->index, exit->dest->index);
7007	  print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7008	  fprintf (dump_file, "\n");
7009	}
7010
7011      fprintf (dump_file, "\n");
7012    }
7013
7014  body = get_loop_body (loop);
7015  data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7016  renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7017  free (body);
7018
7019  data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7020
7021  /* For each ssa name determines whether it behaves as an induction variable
7022     in some loop.  */
7023  if (!find_induction_variables (data))
7024    goto finish;
7025
7026  /* Finds interesting uses (item 1).  */
7027  find_interesting_uses (data);
7028  if (n_iv_uses (data) > MAX_CONSIDERED_USES)
7029    goto finish;
7030
7031  /* Finds candidates for the induction variables (item 2).  */
7032  find_iv_candidates (data);
7033
7034  /* Calculates the costs (item 3, part 1).  */
7035  determine_iv_costs (data);
7036  determine_use_iv_costs (data);
7037  determine_set_costs (data);
7038
7039  /* Find the optimal set of induction variables (item 3, part 2).  */
7040  iv_ca = find_optimal_iv_set (data);
7041  if (!iv_ca)
7042    goto finish;
7043  changed = true;
7044
7045  /* Create the new induction variables (item 4, part 1).  */
7046  create_new_ivs (data, iv_ca);
7047  iv_ca_free (&iv_ca);
7048
7049  /* Rewrite the uses (item 4, part 2).  */
7050  rewrite_uses (data);
7051
7052  /* Remove the ivs that are unused after rewriting.  */
7053  remove_unused_ivs (data);
7054
7055  /* We have changed the structure of induction variables; it might happen
7056     that definitions in the scev database refer to some of them that were
7057     eliminated.  */
7058  scev_reset ();
7059
7060finish:
7061  free_loop_data (data);
7062
7063  return changed;
7064}
7065
7066/* Main entry point.  Optimizes induction variables in loops.  */
7067
7068void
7069tree_ssa_iv_optimize (void)
7070{
7071  struct loop *loop;
7072  struct ivopts_data data;
7073
7074  tree_ssa_iv_optimize_init (&data);
7075
7076  /* Optimize the loops starting with the innermost ones.  */
7077  FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7078    {
7079      if (dump_file && (dump_flags & TDF_DETAILS))
7080	flow_loop_dump (loop, dump_file, NULL, 1);
7081
7082      tree_ssa_iv_optimize_loop (&data, loop);
7083    }
7084
7085  tree_ssa_iv_optimize_finalize (&data);
7086}
7087