1/* Loop unrolling.
2   Copyright (C) 2002-2015 Free Software Foundation, Inc.
3
4This file is part of GCC.
5
6GCC is free software; you can redistribute it and/or modify it under
7the terms of the GNU General Public License as published by the Free
8Software Foundation; either version 3, or (at your option) any later
9version.
10
11GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14for more details.
15
16You should have received a copy of the GNU General Public License
17along with GCC; see the file COPYING3.  If not see
18<http://www.gnu.org/licenses/>.  */
19
20#include "config.h"
21#include "system.h"
22#include "coretypes.h"
23#include "tm.h"
24#include "rtl.h"
25#include "hash-set.h"
26#include "machmode.h"
27#include "vec.h"
28#include "double-int.h"
29#include "input.h"
30#include "alias.h"
31#include "symtab.h"
32#include "wide-int.h"
33#include "inchash.h"
34#include "tree.h"
35#include "hard-reg-set.h"
36#include "obstack.h"
37#include "profile.h"
38#include "predict.h"
39#include "function.h"
40#include "dominance.h"
41#include "cfg.h"
42#include "cfgrtl.h"
43#include "basic-block.h"
44#include "cfgloop.h"
45#include "params.h"
46#include "insn-codes.h"
47#include "optabs.h"
48#include "hashtab.h"
49#include "flags.h"
50#include "statistics.h"
51#include "real.h"
52#include "fixed-value.h"
53#include "insn-config.h"
54#include "expmed.h"
55#include "dojump.h"
56#include "explow.h"
57#include "calls.h"
58#include "emit-rtl.h"
59#include "varasm.h"
60#include "stmt.h"
61#include "expr.h"
62#include "hash-table.h"
63#include "recog.h"
64#include "target.h"
65#include "dumpfile.h"
66
67/* This pass performs loop unrolling.  We only perform this
68   optimization on innermost loops (with single exception) because
69   the impact on performance is greatest here, and we want to avoid
70   unnecessary code size growth.  The gain is caused by greater sequentiality
71   of code, better code to optimize for further passes and in some cases
72   by fewer testings of exit conditions.  The main problem is code growth,
73   that impacts performance negatively due to effect of caches.
74
75   What we do:
76
77   -- unrolling of loops that roll constant times; this is almost always
78      win, as we get rid of exit condition tests.
79   -- unrolling of loops that roll number of times that we can compute
80      in runtime; we also get rid of exit condition tests here, but there
81      is the extra expense for calculating the number of iterations
82   -- simple unrolling of remaining loops; this is performed only if we
83      are asked to, as the gain is questionable in this case and often
84      it may even slow down the code
85   For more detailed descriptions of each of those, see comments at
86   appropriate function below.
87
88   There is a lot of parameters (defined and described in params.def) that
89   control how much we unroll.
90
91   ??? A great problem is that we don't have a good way how to determine
92   how many times we should unroll the loop; the experiments I have made
93   showed that this choice may affect performance in order of several %.
94   */
95
96/* Information about induction variables to split.  */
97
98struct iv_to_split
99{
100  rtx_insn *insn;	/* The insn in that the induction variable occurs.  */
101  rtx orig_var;		/* The variable (register) for the IV before split.  */
102  rtx base_var;		/* The variable on that the values in the further
103			   iterations are based.  */
104  rtx step;		/* Step of the induction variable.  */
105  struct iv_to_split *next; /* Next entry in walking order.  */
106};
107
108/* Information about accumulators to expand.  */
109
110struct var_to_expand
111{
112  rtx_insn *insn;	           /* The insn in that the variable expansion occurs.  */
113  rtx reg;                         /* The accumulator which is expanded.  */
114  vec<rtx> var_expansions;   /* The copies of the accumulator which is expanded.  */
115  struct var_to_expand *next;	   /* Next entry in walking order.  */
116  enum rtx_code op;                /* The type of the accumulation - addition, subtraction
117                                      or multiplication.  */
118  int expansion_count;             /* Count the number of expansions generated so far.  */
119  int reuse_expansion;             /* The expansion we intend to reuse to expand
120                                      the accumulator.  If REUSE_EXPANSION is 0 reuse
121                                      the original accumulator.  Else use
122                                      var_expansions[REUSE_EXPANSION - 1].  */
123};
124
125/* Hashtable helper for iv_to_split.  */
126
127struct iv_split_hasher : typed_free_remove <iv_to_split>
128{
129  typedef iv_to_split value_type;
130  typedef iv_to_split compare_type;
131  static inline hashval_t hash (const value_type *);
132  static inline bool equal (const value_type *, const compare_type *);
133};
134
135
136/* A hash function for information about insns to split.  */
137
138inline hashval_t
139iv_split_hasher::hash (const value_type *ivts)
140{
141  return (hashval_t) INSN_UID (ivts->insn);
142}
143
144/* An equality functions for information about insns to split.  */
145
146inline bool
147iv_split_hasher::equal (const value_type *i1, const compare_type *i2)
148{
149  return i1->insn == i2->insn;
150}
151
152/* Hashtable helper for iv_to_split.  */
153
154struct var_expand_hasher : typed_free_remove <var_to_expand>
155{
156  typedef var_to_expand value_type;
157  typedef var_to_expand compare_type;
158  static inline hashval_t hash (const value_type *);
159  static inline bool equal (const value_type *, const compare_type *);
160};
161
162/* Return a hash for VES.  */
163
164inline hashval_t
165var_expand_hasher::hash (const value_type *ves)
166{
167  return (hashval_t) INSN_UID (ves->insn);
168}
169
170/* Return true if I1 and I2 refer to the same instruction.  */
171
172inline bool
173var_expand_hasher::equal (const value_type *i1, const compare_type *i2)
174{
175  return i1->insn == i2->insn;
176}
177
178/* Information about optimization applied in
179   the unrolled loop.  */
180
181struct opt_info
182{
183  hash_table<iv_split_hasher> *insns_to_split; /* A hashtable of insns to
184						  split.  */
185  struct iv_to_split *iv_to_split_head; /* The first iv to split.  */
186  struct iv_to_split **iv_to_split_tail; /* Pointer to the tail of the list.  */
187  hash_table<var_expand_hasher> *insns_with_var_to_expand; /* A hashtable of
188					insns with accumulators to expand.  */
189  struct var_to_expand *var_to_expand_head; /* The first var to expand.  */
190  struct var_to_expand **var_to_expand_tail; /* Pointer to the tail of the list.  */
191  unsigned first_new_block;        /* The first basic block that was
192                                      duplicated.  */
193  basic_block loop_exit;           /* The loop exit basic block.  */
194  basic_block loop_preheader;      /* The loop preheader basic block.  */
195};
196
197static void decide_unroll_stupid (struct loop *, int);
198static void decide_unroll_constant_iterations (struct loop *, int);
199static void decide_unroll_runtime_iterations (struct loop *, int);
200static void unroll_loop_stupid (struct loop *);
201static void decide_unrolling (int);
202static void unroll_loop_constant_iterations (struct loop *);
203static void unroll_loop_runtime_iterations (struct loop *);
204static struct opt_info *analyze_insns_in_loop (struct loop *);
205static void opt_info_start_duplication (struct opt_info *);
206static void apply_opt_in_copies (struct opt_info *, unsigned, bool, bool);
207static void free_opt_info (struct opt_info *);
208static struct var_to_expand *analyze_insn_to_expand_var (struct loop*, rtx_insn *);
209static bool referenced_in_one_insn_in_loop_p (struct loop *, rtx, int *);
210static struct iv_to_split *analyze_iv_to_split_insn (rtx_insn *);
211static void expand_var_during_unrolling (struct var_to_expand *, rtx_insn *);
212static void insert_var_expansion_initialization (struct var_to_expand *,
213						 basic_block);
214static void combine_var_copies_in_loop_exit (struct var_to_expand *,
215					     basic_block);
216static rtx get_expansion (struct var_to_expand *);
217
218/* Emit a message summarizing the unroll that will be
219   performed for LOOP, along with the loop's location LOCUS, if
220   appropriate given the dump or -fopt-info settings.  */
221
222static void
223report_unroll (struct loop *loop, location_t locus)
224{
225  int report_flags = MSG_OPTIMIZED_LOCATIONS | TDF_RTL | TDF_DETAILS;
226
227  if (loop->lpt_decision.decision == LPT_NONE)
228    return;
229
230  if (!dump_enabled_p ())
231    return;
232
233  dump_printf_loc (report_flags, locus,
234                   "loop unrolled %d times",
235                   loop->lpt_decision.times);
236  if (profile_info)
237    dump_printf (report_flags,
238                 " (header execution count %d)",
239                 (int)loop->header->count);
240
241  dump_printf (report_flags, "\n");
242}
243
244/* Decide whether unroll loops and how much.  */
245static void
246decide_unrolling (int flags)
247{
248  struct loop *loop;
249
250  /* Scan the loops, inner ones first.  */
251  FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
252    {
253      loop->lpt_decision.decision = LPT_NONE;
254      location_t locus = get_loop_location (loop);
255
256      if (dump_enabled_p ())
257	dump_printf_loc (TDF_RTL, locus,
258                         ";; *** Considering loop %d at BB %d for "
259                         "unrolling ***\n",
260                         loop->num, loop->header->index);
261
262      /* Do not peel cold areas.  */
263      if (optimize_loop_for_size_p (loop))
264	{
265	  if (dump_file)
266	    fprintf (dump_file, ";; Not considering loop, cold area\n");
267	  continue;
268	}
269
270      /* Can the loop be manipulated?  */
271      if (!can_duplicate_loop_p (loop))
272	{
273	  if (dump_file)
274	    fprintf (dump_file,
275		     ";; Not considering loop, cannot duplicate\n");
276	  continue;
277	}
278
279      /* Skip non-innermost loops.  */
280      if (loop->inner)
281	{
282	  if (dump_file)
283	    fprintf (dump_file, ";; Not considering loop, is not innermost\n");
284	  continue;
285	}
286
287      loop->ninsns = num_loop_insns (loop);
288      loop->av_ninsns = average_num_loop_insns (loop);
289
290      /* Try transformations one by one in decreasing order of
291	 priority.  */
292
293      decide_unroll_constant_iterations (loop, flags);
294      if (loop->lpt_decision.decision == LPT_NONE)
295	decide_unroll_runtime_iterations (loop, flags);
296      if (loop->lpt_decision.decision == LPT_NONE)
297	decide_unroll_stupid (loop, flags);
298
299      report_unroll (loop, locus);
300    }
301}
302
303/* Unroll LOOPS.  */
304void
305unroll_loops (int flags)
306{
307  struct loop *loop;
308  bool changed = false;
309
310  /* Now decide rest of unrolling.  */
311  decide_unrolling (flags);
312
313  /* Scan the loops, inner ones first.  */
314  FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
315    {
316      /* And perform the appropriate transformations.  */
317      switch (loop->lpt_decision.decision)
318	{
319	case LPT_UNROLL_CONSTANT:
320	  unroll_loop_constant_iterations (loop);
321	  changed = true;
322	  break;
323	case LPT_UNROLL_RUNTIME:
324	  unroll_loop_runtime_iterations (loop);
325	  changed = true;
326	  break;
327	case LPT_UNROLL_STUPID:
328	  unroll_loop_stupid (loop);
329	  changed = true;
330	  break;
331	case LPT_NONE:
332	  break;
333	default:
334	  gcc_unreachable ();
335	}
336    }
337
338    if (changed)
339      {
340	calculate_dominance_info (CDI_DOMINATORS);
341	fix_loop_structure (NULL);
342      }
343
344  iv_analysis_done ();
345}
346
347/* Check whether exit of the LOOP is at the end of loop body.  */
348
349static bool
350loop_exit_at_end_p (struct loop *loop)
351{
352  struct niter_desc *desc = get_simple_loop_desc (loop);
353  rtx_insn *insn;
354
355  /* We should never have conditional in latch block.  */
356  gcc_assert (desc->in_edge->dest != loop->header);
357
358  if (desc->in_edge->dest != loop->latch)
359    return false;
360
361  /* Check that the latch is empty.  */
362  FOR_BB_INSNS (loop->latch, insn)
363    {
364      if (INSN_P (insn) && active_insn_p (insn))
365	return false;
366    }
367
368  return true;
369}
370
371/* Decide whether to unroll LOOP iterating constant number of times
372   and how much.  */
373
374static void
375decide_unroll_constant_iterations (struct loop *loop, int flags)
376{
377  unsigned nunroll, nunroll_by_av, best_copies, best_unroll = 0, n_copies, i;
378  struct niter_desc *desc;
379  widest_int iterations;
380
381  if (!(flags & UAP_UNROLL))
382    {
383      /* We were not asked to, just return back silently.  */
384      return;
385    }
386
387  if (dump_file)
388    fprintf (dump_file,
389	     "\n;; Considering unrolling loop with constant "
390	     "number of iterations\n");
391
392  /* nunroll = total number of copies of the original loop body in
393     unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
394  nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
395  nunroll_by_av
396    = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
397  if (nunroll > nunroll_by_av)
398    nunroll = nunroll_by_av;
399  if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
400    nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
401
402  if (targetm.loop_unroll_adjust)
403    nunroll = targetm.loop_unroll_adjust (nunroll, loop);
404
405  /* Skip big loops.  */
406  if (nunroll <= 1)
407    {
408      if (dump_file)
409	fprintf (dump_file, ";; Not considering loop, is too big\n");
410      return;
411    }
412
413  /* Check for simple loops.  */
414  desc = get_simple_loop_desc (loop);
415
416  /* Check number of iterations.  */
417  if (!desc->simple_p || !desc->const_iter || desc->assumptions)
418    {
419      if (dump_file)
420	fprintf (dump_file,
421		 ";; Unable to prove that the loop iterates constant times\n");
422      return;
423    }
424
425  /* Check whether the loop rolls enough to consider.
426     Consult also loop bounds and profile; in the case the loop has more
427     than one exit it may well loop less than determined maximal number
428     of iterations.  */
429  if (desc->niter < 2 * nunroll
430      || ((get_estimated_loop_iterations (loop, &iterations)
431	   || get_max_loop_iterations (loop, &iterations))
432	  && wi::ltu_p (iterations, 2 * nunroll)))
433    {
434      if (dump_file)
435	fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
436      return;
437    }
438
439  /* Success; now compute number of iterations to unroll.  We alter
440     nunroll so that as few as possible copies of loop body are
441     necessary, while still not decreasing the number of unrollings
442     too much (at most by 1).  */
443  best_copies = 2 * nunroll + 10;
444
445  i = 2 * nunroll + 2;
446  if (i - 1 >= desc->niter)
447    i = desc->niter - 2;
448
449  for (; i >= nunroll - 1; i--)
450    {
451      unsigned exit_mod = desc->niter % (i + 1);
452
453      if (!loop_exit_at_end_p (loop))
454	n_copies = exit_mod + i + 1;
455      else if (exit_mod != (unsigned) i
456	       || desc->noloop_assumptions != NULL_RTX)
457	n_copies = exit_mod + i + 2;
458      else
459	n_copies = i + 1;
460
461      if (n_copies < best_copies)
462	{
463	  best_copies = n_copies;
464	  best_unroll = i;
465	}
466    }
467
468  loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
469  loop->lpt_decision.times = best_unroll;
470}
471
472/* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES times.
473   The transformation does this:
474
475   for (i = 0; i < 102; i++)
476     body;
477
478   ==>  (LOOP->LPT_DECISION.TIMES == 3)
479
480   i = 0;
481   body; i++;
482   body; i++;
483   while (i < 102)
484     {
485       body; i++;
486       body; i++;
487       body; i++;
488       body; i++;
489     }
490  */
491static void
492unroll_loop_constant_iterations (struct loop *loop)
493{
494  unsigned HOST_WIDE_INT niter;
495  unsigned exit_mod;
496  sbitmap wont_exit;
497  unsigned i;
498  edge e;
499  unsigned max_unroll = loop->lpt_decision.times;
500  struct niter_desc *desc = get_simple_loop_desc (loop);
501  bool exit_at_end = loop_exit_at_end_p (loop);
502  struct opt_info *opt_info = NULL;
503  bool ok;
504
505  niter = desc->niter;
506
507  /* Should not get here (such loop should be peeled instead).  */
508  gcc_assert (niter > max_unroll + 1);
509
510  exit_mod = niter % (max_unroll + 1);
511
512  wont_exit = sbitmap_alloc (max_unroll + 1);
513  bitmap_ones (wont_exit);
514
515  auto_vec<edge> remove_edges;
516  if (flag_split_ivs_in_unroller
517      || flag_variable_expansion_in_unroller)
518    opt_info = analyze_insns_in_loop (loop);
519
520  if (!exit_at_end)
521    {
522      /* The exit is not at the end of the loop; leave exit test
523	 in the first copy, so that the loops that start with test
524	 of exit condition have continuous body after unrolling.  */
525
526      if (dump_file)
527	fprintf (dump_file, ";; Condition at beginning of loop.\n");
528
529      /* Peel exit_mod iterations.  */
530      bitmap_clear_bit (wont_exit, 0);
531      if (desc->noloop_assumptions)
532	bitmap_clear_bit (wont_exit, 1);
533
534      if (exit_mod)
535	{
536	  opt_info_start_duplication (opt_info);
537          ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
538					      exit_mod,
539					      wont_exit, desc->out_edge,
540					      &remove_edges,
541					      DLTHE_FLAG_UPDATE_FREQ
542					      | (opt_info && exit_mod > 1
543						 ? DLTHE_RECORD_COPY_NUMBER
544						   : 0));
545	  gcc_assert (ok);
546
547          if (opt_info && exit_mod > 1)
548 	    apply_opt_in_copies (opt_info, exit_mod, false, false);
549
550	  desc->noloop_assumptions = NULL_RTX;
551	  desc->niter -= exit_mod;
552	  loop->nb_iterations_upper_bound -= exit_mod;
553	  if (loop->any_estimate
554	      && wi::leu_p (exit_mod, loop->nb_iterations_estimate))
555	    loop->nb_iterations_estimate -= exit_mod;
556	  else
557	    loop->any_estimate = false;
558	}
559
560      bitmap_set_bit (wont_exit, 1);
561    }
562  else
563    {
564      /* Leave exit test in last copy, for the same reason as above if
565	 the loop tests the condition at the end of loop body.  */
566
567      if (dump_file)
568	fprintf (dump_file, ";; Condition at end of loop.\n");
569
570      /* We know that niter >= max_unroll + 2; so we do not need to care of
571	 case when we would exit before reaching the loop.  So just peel
572	 exit_mod + 1 iterations.  */
573      if (exit_mod != max_unroll
574	  || desc->noloop_assumptions)
575	{
576	  bitmap_clear_bit (wont_exit, 0);
577	  if (desc->noloop_assumptions)
578	    bitmap_clear_bit (wont_exit, 1);
579
580          opt_info_start_duplication (opt_info);
581	  ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
582					      exit_mod + 1,
583					      wont_exit, desc->out_edge,
584					      &remove_edges,
585					      DLTHE_FLAG_UPDATE_FREQ
586					      | (opt_info && exit_mod > 0
587						 ? DLTHE_RECORD_COPY_NUMBER
588						   : 0));
589	  gcc_assert (ok);
590
591          if (opt_info && exit_mod > 0)
592  	    apply_opt_in_copies (opt_info, exit_mod + 1, false, false);
593
594	  desc->niter -= exit_mod + 1;
595	  loop->nb_iterations_upper_bound -= exit_mod + 1;
596	  if (loop->any_estimate
597	      && wi::leu_p (exit_mod + 1, loop->nb_iterations_estimate))
598	    loop->nb_iterations_estimate -= exit_mod + 1;
599	  else
600	    loop->any_estimate = false;
601	  desc->noloop_assumptions = NULL_RTX;
602
603	  bitmap_set_bit (wont_exit, 0);
604	  bitmap_set_bit (wont_exit, 1);
605	}
606
607      bitmap_clear_bit (wont_exit, max_unroll);
608    }
609
610  /* Now unroll the loop.  */
611
612  opt_info_start_duplication (opt_info);
613  ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
614				      max_unroll,
615				      wont_exit, desc->out_edge,
616				      &remove_edges,
617				      DLTHE_FLAG_UPDATE_FREQ
618				      | (opt_info
619					 ? DLTHE_RECORD_COPY_NUMBER
620					   : 0));
621  gcc_assert (ok);
622
623  if (opt_info)
624    {
625      apply_opt_in_copies (opt_info, max_unroll, true, true);
626      free_opt_info (opt_info);
627    }
628
629  free (wont_exit);
630
631  if (exit_at_end)
632    {
633      basic_block exit_block = get_bb_copy (desc->in_edge->src);
634      /* Find a new in and out edge; they are in the last copy we have made.  */
635
636      if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
637	{
638	  desc->out_edge = EDGE_SUCC (exit_block, 0);
639	  desc->in_edge = EDGE_SUCC (exit_block, 1);
640	}
641      else
642	{
643	  desc->out_edge = EDGE_SUCC (exit_block, 1);
644	  desc->in_edge = EDGE_SUCC (exit_block, 0);
645	}
646    }
647
648  desc->niter /= max_unroll + 1;
649  loop->nb_iterations_upper_bound
650    = wi::udiv_trunc (loop->nb_iterations_upper_bound, max_unroll + 1);
651  if (loop->any_estimate)
652    loop->nb_iterations_estimate
653      = wi::udiv_trunc (loop->nb_iterations_estimate, max_unroll + 1);
654  desc->niter_expr = GEN_INT (desc->niter);
655
656  /* Remove the edges.  */
657  FOR_EACH_VEC_ELT (remove_edges, i, e)
658    remove_path (e);
659
660  if (dump_file)
661    fprintf (dump_file,
662	     ";; Unrolled loop %d times, constant # of iterations %i insns\n",
663	     max_unroll, num_loop_insns (loop));
664}
665
666/* Decide whether to unroll LOOP iterating runtime computable number of times
667   and how much.  */
668static void
669decide_unroll_runtime_iterations (struct loop *loop, int flags)
670{
671  unsigned nunroll, nunroll_by_av, i;
672  struct niter_desc *desc;
673  widest_int iterations;
674
675  if (!(flags & UAP_UNROLL))
676    {
677      /* We were not asked to, just return back silently.  */
678      return;
679    }
680
681  if (dump_file)
682    fprintf (dump_file,
683	     "\n;; Considering unrolling loop with runtime "
684	     "computable number of iterations\n");
685
686  /* nunroll = total number of copies of the original loop body in
687     unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
688  nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
689  nunroll_by_av = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
690  if (nunroll > nunroll_by_av)
691    nunroll = nunroll_by_av;
692  if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
693    nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
694
695  if (targetm.loop_unroll_adjust)
696    nunroll = targetm.loop_unroll_adjust (nunroll, loop);
697
698  /* Skip big loops.  */
699  if (nunroll <= 1)
700    {
701      if (dump_file)
702	fprintf (dump_file, ";; Not considering loop, is too big\n");
703      return;
704    }
705
706  /* Check for simple loops.  */
707  desc = get_simple_loop_desc (loop);
708
709  /* Check simpleness.  */
710  if (!desc->simple_p || desc->assumptions)
711    {
712      if (dump_file)
713	fprintf (dump_file,
714		 ";; Unable to prove that the number of iterations "
715		 "can be counted in runtime\n");
716      return;
717    }
718
719  if (desc->const_iter)
720    {
721      if (dump_file)
722	fprintf (dump_file, ";; Loop iterates constant times\n");
723      return;
724    }
725
726  /* Check whether the loop rolls.  */
727  if ((get_estimated_loop_iterations (loop, &iterations)
728       || get_max_loop_iterations (loop, &iterations))
729      && wi::ltu_p (iterations, 2 * nunroll))
730    {
731      if (dump_file)
732	fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
733      return;
734    }
735
736  /* Success; now force nunroll to be power of 2, as we are unable to
737     cope with overflows in computation of number of iterations.  */
738  for (i = 1; 2 * i <= nunroll; i *= 2)
739    continue;
740
741  loop->lpt_decision.decision = LPT_UNROLL_RUNTIME;
742  loop->lpt_decision.times = i - 1;
743}
744
745/* Splits edge E and inserts the sequence of instructions INSNS on it, and
746   returns the newly created block.  If INSNS is NULL_RTX, nothing is changed
747   and NULL is returned instead.  */
748
749basic_block
750split_edge_and_insert (edge e, rtx_insn *insns)
751{
752  basic_block bb;
753
754  if (!insns)
755    return NULL;
756  bb = split_edge (e);
757  emit_insn_after (insns, BB_END (bb));
758
759  /* ??? We used to assume that INSNS can contain control flow insns, and
760     that we had to try to find sub basic blocks in BB to maintain a valid
761     CFG.  For this purpose we used to set the BB_SUPERBLOCK flag on BB
762     and call break_superblocks when going out of cfglayout mode.  But it
763     turns out that this never happens; and that if it does ever happen,
764     the verify_flow_info at the end of the RTL loop passes would fail.
765
766     There are two reasons why we expected we could have control flow insns
767     in INSNS.  The first is when a comparison has to be done in parts, and
768     the second is when the number of iterations is computed for loops with
769     the number of iterations known at runtime.  In both cases, test cases
770     to get control flow in INSNS appear to be impossible to construct:
771
772      * If do_compare_rtx_and_jump needs several branches to do comparison
773	in a mode that needs comparison by parts, we cannot analyze the
774	number of iterations of the loop, and we never get to unrolling it.
775
776      * The code in expand_divmod that was suspected to cause creation of
777	branching code seems to be only accessed for signed division.  The
778	divisions used by # of iterations analysis are always unsigned.
779	Problems might arise on architectures that emits branching code
780	for some operations that may appear in the unroller (especially
781	for division), but we have no such architectures.
782
783     Considering all this, it was decided that we should for now assume
784     that INSNS can in theory contain control flow insns, but in practice
785     it never does.  So we don't handle the theoretical case, and should
786     a real failure ever show up, we have a pretty good clue for how to
787     fix it.  */
788
789  return bb;
790}
791
792/* Prepare a sequence comparing OP0 with OP1 using COMP and jumping to LABEL if
793   true, with probability PROB.  If CINSN is not NULL, it is the insn to copy
794   in order to create a jump.  */
795
796static rtx_insn *
797compare_and_jump_seq (rtx op0, rtx op1, enum rtx_code comp, rtx label, int prob,
798		      rtx_insn *cinsn)
799{
800  rtx_insn *seq, *jump;
801  rtx cond;
802  machine_mode mode;
803
804  mode = GET_MODE (op0);
805  if (mode == VOIDmode)
806    mode = GET_MODE (op1);
807
808  start_sequence ();
809  if (GET_MODE_CLASS (mode) == MODE_CC)
810    {
811      /* A hack -- there seems to be no easy generic way how to make a
812	 conditional jump from a ccmode comparison.  */
813      gcc_assert (cinsn);
814      cond = XEXP (SET_SRC (pc_set (cinsn)), 0);
815      gcc_assert (GET_CODE (cond) == comp);
816      gcc_assert (rtx_equal_p (op0, XEXP (cond, 0)));
817      gcc_assert (rtx_equal_p (op1, XEXP (cond, 1)));
818      emit_jump_insn (copy_insn (PATTERN (cinsn)));
819      jump = get_last_insn ();
820      gcc_assert (JUMP_P (jump));
821      JUMP_LABEL (jump) = JUMP_LABEL (cinsn);
822      LABEL_NUSES (JUMP_LABEL (jump))++;
823      redirect_jump (jump, label, 0);
824    }
825  else
826    {
827      gcc_assert (!cinsn);
828
829      op0 = force_operand (op0, NULL_RTX);
830      op1 = force_operand (op1, NULL_RTX);
831      do_compare_rtx_and_jump (op0, op1, comp, 0,
832			       mode, NULL_RTX, NULL_RTX, label, -1);
833      jump = get_last_insn ();
834      gcc_assert (JUMP_P (jump));
835      JUMP_LABEL (jump) = label;
836      LABEL_NUSES (label)++;
837    }
838  add_int_reg_note (jump, REG_BR_PROB, prob);
839
840  seq = get_insns ();
841  end_sequence ();
842
843  return seq;
844}
845
846/* Unroll LOOP for which we are able to count number of iterations in runtime
847   LOOP->LPT_DECISION.TIMES times.  The transformation does this (with some
848   extra care for case n < 0):
849
850   for (i = 0; i < n; i++)
851     body;
852
853   ==>  (LOOP->LPT_DECISION.TIMES == 3)
854
855   i = 0;
856   mod = n % 4;
857
858   switch (mod)
859     {
860       case 3:
861         body; i++;
862       case 2:
863         body; i++;
864       case 1:
865         body; i++;
866       case 0: ;
867     }
868
869   while (i < n)
870     {
871       body; i++;
872       body; i++;
873       body; i++;
874       body; i++;
875     }
876   */
877static void
878unroll_loop_runtime_iterations (struct loop *loop)
879{
880  rtx old_niter, niter, tmp;
881  rtx_insn *init_code, *branch_code;
882  unsigned i, j, p;
883  basic_block preheader, *body, swtch, ezc_swtch;
884  sbitmap wont_exit;
885  int may_exit_copy;
886  unsigned n_peel;
887  edge e;
888  bool extra_zero_check, last_may_exit;
889  unsigned max_unroll = loop->lpt_decision.times;
890  struct niter_desc *desc = get_simple_loop_desc (loop);
891  bool exit_at_end = loop_exit_at_end_p (loop);
892  struct opt_info *opt_info = NULL;
893  bool ok;
894
895  if (flag_split_ivs_in_unroller
896      || flag_variable_expansion_in_unroller)
897    opt_info = analyze_insns_in_loop (loop);
898
899  /* Remember blocks whose dominators will have to be updated.  */
900  auto_vec<basic_block> dom_bbs;
901
902  body = get_loop_body (loop);
903  for (i = 0; i < loop->num_nodes; i++)
904    {
905      vec<basic_block> ldom;
906      basic_block bb;
907
908      ldom = get_dominated_by (CDI_DOMINATORS, body[i]);
909      FOR_EACH_VEC_ELT (ldom, j, bb)
910	if (!flow_bb_inside_loop_p (loop, bb))
911	  dom_bbs.safe_push (bb);
912
913      ldom.release ();
914    }
915  free (body);
916
917  if (!exit_at_end)
918    {
919      /* Leave exit in first copy (for explanation why see comment in
920	 unroll_loop_constant_iterations).  */
921      may_exit_copy = 0;
922      n_peel = max_unroll - 1;
923      extra_zero_check = true;
924      last_may_exit = false;
925    }
926  else
927    {
928      /* Leave exit in last copy (for explanation why see comment in
929	 unroll_loop_constant_iterations).  */
930      may_exit_copy = max_unroll;
931      n_peel = max_unroll;
932      extra_zero_check = false;
933      last_may_exit = true;
934    }
935
936  /* Get expression for number of iterations.  */
937  start_sequence ();
938  old_niter = niter = gen_reg_rtx (desc->mode);
939  tmp = force_operand (copy_rtx (desc->niter_expr), niter);
940  if (tmp != niter)
941    emit_move_insn (niter, tmp);
942
943  /* Count modulo by ANDing it with max_unroll; we use the fact that
944     the number of unrollings is a power of two, and thus this is correct
945     even if there is overflow in the computation.  */
946  niter = expand_simple_binop (desc->mode, AND,
947			       niter, gen_int_mode (max_unroll, desc->mode),
948			       NULL_RTX, 0, OPTAB_LIB_WIDEN);
949
950  init_code = get_insns ();
951  end_sequence ();
952  unshare_all_rtl_in_chain (init_code);
953
954  /* Precondition the loop.  */
955  split_edge_and_insert (loop_preheader_edge (loop), init_code);
956
957  auto_vec<edge> remove_edges;
958
959  wont_exit = sbitmap_alloc (max_unroll + 2);
960
961  /* Peel the first copy of loop body (almost always we must leave exit test
962     here; the only exception is when we have extra zero check and the number
963     of iterations is reliable.  Also record the place of (possible) extra
964     zero check.  */
965  bitmap_clear (wont_exit);
966  if (extra_zero_check
967      && !desc->noloop_assumptions)
968    bitmap_set_bit (wont_exit, 1);
969  ezc_swtch = loop_preheader_edge (loop)->src;
970  ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
971				      1, wont_exit, desc->out_edge,
972				      &remove_edges,
973				      DLTHE_FLAG_UPDATE_FREQ);
974  gcc_assert (ok);
975
976  /* Record the place where switch will be built for preconditioning.  */
977  swtch = split_edge (loop_preheader_edge (loop));
978
979  for (i = 0; i < n_peel; i++)
980    {
981      /* Peel the copy.  */
982      bitmap_clear (wont_exit);
983      if (i != n_peel - 1 || !last_may_exit)
984	bitmap_set_bit (wont_exit, 1);
985      ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
986					  1, wont_exit, desc->out_edge,
987					  &remove_edges,
988					  DLTHE_FLAG_UPDATE_FREQ);
989      gcc_assert (ok);
990
991      /* Create item for switch.  */
992      j = n_peel - i - (extra_zero_check ? 0 : 1);
993      p = REG_BR_PROB_BASE / (i + 2);
994
995      preheader = split_edge (loop_preheader_edge (loop));
996      branch_code = compare_and_jump_seq (copy_rtx (niter), GEN_INT (j), EQ,
997					  block_label (preheader), p,
998					  NULL);
999
1000      /* We rely on the fact that the compare and jump cannot be optimized out,
1001	 and hence the cfg we create is correct.  */
1002      gcc_assert (branch_code != NULL_RTX);
1003
1004      swtch = split_edge_and_insert (single_pred_edge (swtch), branch_code);
1005      set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1006      single_pred_edge (swtch)->probability = REG_BR_PROB_BASE - p;
1007      e = make_edge (swtch, preheader,
1008		     single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
1009      e->count = RDIV (preheader->count * REG_BR_PROB_BASE, p);
1010      e->probability = p;
1011    }
1012
1013  if (extra_zero_check)
1014    {
1015      /* Add branch for zero iterations.  */
1016      p = REG_BR_PROB_BASE / (max_unroll + 1);
1017      swtch = ezc_swtch;
1018      preheader = split_edge (loop_preheader_edge (loop));
1019      branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ,
1020					  block_label (preheader), p,
1021					  NULL);
1022      gcc_assert (branch_code != NULL_RTX);
1023
1024      swtch = split_edge_and_insert (single_succ_edge (swtch), branch_code);
1025      set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1026      single_succ_edge (swtch)->probability = REG_BR_PROB_BASE - p;
1027      e = make_edge (swtch, preheader,
1028		     single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
1029      e->count = RDIV (preheader->count * REG_BR_PROB_BASE, p);
1030      e->probability = p;
1031    }
1032
1033  /* Recount dominators for outer blocks.  */
1034  iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, false);
1035
1036  /* And unroll loop.  */
1037
1038  bitmap_ones (wont_exit);
1039  bitmap_clear_bit (wont_exit, may_exit_copy);
1040  opt_info_start_duplication (opt_info);
1041
1042  ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1043				      max_unroll,
1044				      wont_exit, desc->out_edge,
1045				      &remove_edges,
1046				      DLTHE_FLAG_UPDATE_FREQ
1047				      | (opt_info
1048					 ? DLTHE_RECORD_COPY_NUMBER
1049					   : 0));
1050  gcc_assert (ok);
1051
1052  if (opt_info)
1053    {
1054      apply_opt_in_copies (opt_info, max_unroll, true, true);
1055      free_opt_info (opt_info);
1056    }
1057
1058  free (wont_exit);
1059
1060  if (exit_at_end)
1061    {
1062      basic_block exit_block = get_bb_copy (desc->in_edge->src);
1063      /* Find a new in and out edge; they are in the last copy we have
1064	 made.  */
1065
1066      if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
1067	{
1068	  desc->out_edge = EDGE_SUCC (exit_block, 0);
1069	  desc->in_edge = EDGE_SUCC (exit_block, 1);
1070	}
1071      else
1072	{
1073	  desc->out_edge = EDGE_SUCC (exit_block, 1);
1074	  desc->in_edge = EDGE_SUCC (exit_block, 0);
1075	}
1076    }
1077
1078  /* Remove the edges.  */
1079  FOR_EACH_VEC_ELT (remove_edges, i, e)
1080    remove_path (e);
1081
1082  /* We must be careful when updating the number of iterations due to
1083     preconditioning and the fact that the value must be valid at entry
1084     of the loop.  After passing through the above code, we see that
1085     the correct new number of iterations is this:  */
1086  gcc_assert (!desc->const_iter);
1087  desc->niter_expr =
1088    simplify_gen_binary (UDIV, desc->mode, old_niter,
1089			 gen_int_mode (max_unroll + 1, desc->mode));
1090  loop->nb_iterations_upper_bound
1091    = wi::udiv_trunc (loop->nb_iterations_upper_bound, max_unroll + 1);
1092  if (loop->any_estimate)
1093    loop->nb_iterations_estimate
1094      = wi::udiv_trunc (loop->nb_iterations_estimate, max_unroll + 1);
1095  if (exit_at_end)
1096    {
1097      desc->niter_expr =
1098	simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx);
1099      desc->noloop_assumptions = NULL_RTX;
1100      --loop->nb_iterations_upper_bound;
1101      if (loop->any_estimate
1102	  && loop->nb_iterations_estimate != 0)
1103	--loop->nb_iterations_estimate;
1104      else
1105	loop->any_estimate = false;
1106    }
1107
1108  if (dump_file)
1109    fprintf (dump_file,
1110	     ";; Unrolled loop %d times, counting # of iterations "
1111	     "in runtime, %i insns\n",
1112	     max_unroll, num_loop_insns (loop));
1113}
1114
1115/* Decide whether to unroll LOOP stupidly and how much.  */
1116static void
1117decide_unroll_stupid (struct loop *loop, int flags)
1118{
1119  unsigned nunroll, nunroll_by_av, i;
1120  struct niter_desc *desc;
1121  widest_int iterations;
1122
1123  if (!(flags & UAP_UNROLL_ALL))
1124    {
1125      /* We were not asked to, just return back silently.  */
1126      return;
1127    }
1128
1129  if (dump_file)
1130    fprintf (dump_file, "\n;; Considering unrolling loop stupidly\n");
1131
1132  /* nunroll = total number of copies of the original loop body in
1133     unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
1134  nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
1135  nunroll_by_av
1136    = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
1137  if (nunroll > nunroll_by_av)
1138    nunroll = nunroll_by_av;
1139  if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
1140    nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
1141
1142  if (targetm.loop_unroll_adjust)
1143    nunroll = targetm.loop_unroll_adjust (nunroll, loop);
1144
1145  /* Skip big loops.  */
1146  if (nunroll <= 1)
1147    {
1148      if (dump_file)
1149	fprintf (dump_file, ";; Not considering loop, is too big\n");
1150      return;
1151    }
1152
1153  /* Check for simple loops.  */
1154  desc = get_simple_loop_desc (loop);
1155
1156  /* Check simpleness.  */
1157  if (desc->simple_p && !desc->assumptions)
1158    {
1159      if (dump_file)
1160	fprintf (dump_file, ";; The loop is simple\n");
1161      return;
1162    }
1163
1164  /* Do not unroll loops with branches inside -- it increases number
1165     of mispredicts.
1166     TODO: this heuristic needs tunning; call inside the loop body
1167     is also relatively good reason to not unroll.  */
1168  if (num_loop_branches (loop) > 1)
1169    {
1170      if (dump_file)
1171	fprintf (dump_file, ";; Not unrolling, contains branches\n");
1172      return;
1173    }
1174
1175  /* Check whether the loop rolls.  */
1176  if ((get_estimated_loop_iterations (loop, &iterations)
1177       || get_max_loop_iterations (loop, &iterations))
1178      && wi::ltu_p (iterations, 2 * nunroll))
1179    {
1180      if (dump_file)
1181	fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
1182      return;
1183    }
1184
1185  /* Success.  Now force nunroll to be power of 2, as it seems that this
1186     improves results (partially because of better alignments, partially
1187     because of some dark magic).  */
1188  for (i = 1; 2 * i <= nunroll; i *= 2)
1189    continue;
1190
1191  loop->lpt_decision.decision = LPT_UNROLL_STUPID;
1192  loop->lpt_decision.times = i - 1;
1193}
1194
1195/* Unroll a LOOP LOOP->LPT_DECISION.TIMES times.  The transformation does this:
1196
1197   while (cond)
1198     body;
1199
1200   ==>  (LOOP->LPT_DECISION.TIMES == 3)
1201
1202   while (cond)
1203     {
1204       body;
1205       if (!cond) break;
1206       body;
1207       if (!cond) break;
1208       body;
1209       if (!cond) break;
1210       body;
1211     }
1212   */
1213static void
1214unroll_loop_stupid (struct loop *loop)
1215{
1216  sbitmap wont_exit;
1217  unsigned nunroll = loop->lpt_decision.times;
1218  struct niter_desc *desc = get_simple_loop_desc (loop);
1219  struct opt_info *opt_info = NULL;
1220  bool ok;
1221
1222  if (flag_split_ivs_in_unroller
1223      || flag_variable_expansion_in_unroller)
1224    opt_info = analyze_insns_in_loop (loop);
1225
1226
1227  wont_exit = sbitmap_alloc (nunroll + 1);
1228  bitmap_clear (wont_exit);
1229  opt_info_start_duplication (opt_info);
1230
1231  ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1232				      nunroll, wont_exit,
1233				      NULL, NULL,
1234				      DLTHE_FLAG_UPDATE_FREQ
1235				      | (opt_info
1236					 ? DLTHE_RECORD_COPY_NUMBER
1237					   : 0));
1238  gcc_assert (ok);
1239
1240  if (opt_info)
1241    {
1242      apply_opt_in_copies (opt_info, nunroll, true, true);
1243      free_opt_info (opt_info);
1244    }
1245
1246  free (wont_exit);
1247
1248  if (desc->simple_p)
1249    {
1250      /* We indeed may get here provided that there are nontrivial assumptions
1251	 for a loop to be really simple.  We could update the counts, but the
1252	 problem is that we are unable to decide which exit will be taken
1253	 (not really true in case the number of iterations is constant,
1254	 but no one will do anything with this information, so we do not
1255	 worry about it).  */
1256      desc->simple_p = false;
1257    }
1258
1259  if (dump_file)
1260    fprintf (dump_file, ";; Unrolled loop %d times, %i insns\n",
1261	     nunroll, num_loop_insns (loop));
1262}
1263
1264/* Returns true if REG is referenced in one nondebug insn in LOOP.
1265   Set *DEBUG_USES to the number of debug insns that reference the
1266   variable.  */
1267
1268static bool
1269referenced_in_one_insn_in_loop_p (struct loop *loop, rtx reg,
1270				  int *debug_uses)
1271{
1272  basic_block *body, bb;
1273  unsigned i;
1274  int count_ref = 0;
1275  rtx_insn *insn;
1276
1277  body = get_loop_body (loop);
1278  for (i = 0; i < loop->num_nodes; i++)
1279    {
1280      bb = body[i];
1281
1282      FOR_BB_INSNS (bb, insn)
1283	if (!rtx_referenced_p (reg, insn))
1284	  continue;
1285	else if (DEBUG_INSN_P (insn))
1286	  ++*debug_uses;
1287	else if (++count_ref > 1)
1288	  break;
1289    }
1290  free (body);
1291  return (count_ref  == 1);
1292}
1293
1294/* Reset the DEBUG_USES debug insns in LOOP that reference REG.  */
1295
1296static void
1297reset_debug_uses_in_loop (struct loop *loop, rtx reg, int debug_uses)
1298{
1299  basic_block *body, bb;
1300  unsigned i;
1301  rtx_insn *insn;
1302
1303  body = get_loop_body (loop);
1304  for (i = 0; debug_uses && i < loop->num_nodes; i++)
1305    {
1306      bb = body[i];
1307
1308      FOR_BB_INSNS (bb, insn)
1309	if (!DEBUG_INSN_P (insn) || !rtx_referenced_p (reg, insn))
1310	  continue;
1311	else
1312	  {
1313	    validate_change (insn, &INSN_VAR_LOCATION_LOC (insn),
1314			     gen_rtx_UNKNOWN_VAR_LOC (), 0);
1315	    if (!--debug_uses)
1316	      break;
1317	  }
1318    }
1319  free (body);
1320}
1321
1322/* Determine whether INSN contains an accumulator
1323   which can be expanded into separate copies,
1324   one for each copy of the LOOP body.
1325
1326   for (i = 0 ; i < n; i++)
1327     sum += a[i];
1328
1329   ==>
1330
1331   sum += a[i]
1332   ....
1333   i = i+1;
1334   sum1 += a[i]
1335   ....
1336   i = i+1
1337   sum2 += a[i];
1338   ....
1339
1340   Return NULL if INSN contains no opportunity for expansion of accumulator.
1341   Otherwise, allocate a VAR_TO_EXPAND structure, fill it with the relevant
1342   information and return a pointer to it.
1343*/
1344
1345static struct var_to_expand *
1346analyze_insn_to_expand_var (struct loop *loop, rtx_insn *insn)
1347{
1348  rtx set, dest, src;
1349  struct var_to_expand *ves;
1350  unsigned accum_pos;
1351  enum rtx_code code;
1352  int debug_uses = 0;
1353
1354  set = single_set (insn);
1355  if (!set)
1356    return NULL;
1357
1358  dest = SET_DEST (set);
1359  src = SET_SRC (set);
1360  code = GET_CODE (src);
1361
1362  if (code != PLUS && code != MINUS && code != MULT && code != FMA)
1363    return NULL;
1364
1365  if (FLOAT_MODE_P (GET_MODE (dest)))
1366    {
1367      if (!flag_associative_math)
1368        return NULL;
1369      /* In the case of FMA, we're also changing the rounding.  */
1370      if (code == FMA && !flag_unsafe_math_optimizations)
1371	return NULL;
1372    }
1373
1374  /* Hmm, this is a bit paradoxical.  We know that INSN is a valid insn
1375     in MD.  But if there is no optab to generate the insn, we can not
1376     perform the variable expansion.  This can happen if an MD provides
1377     an insn but not a named pattern to generate it, for example to avoid
1378     producing code that needs additional mode switches like for x87/mmx.
1379
1380     So we check have_insn_for which looks for an optab for the operation
1381     in SRC.  If it doesn't exist, we can't perform the expansion even
1382     though INSN is valid.  */
1383  if (!have_insn_for (code, GET_MODE (src)))
1384    return NULL;
1385
1386  if (!REG_P (dest)
1387      && !(GET_CODE (dest) == SUBREG
1388           && REG_P (SUBREG_REG (dest))))
1389    return NULL;
1390
1391  /* Find the accumulator use within the operation.  */
1392  if (code == FMA)
1393    {
1394      /* We only support accumulation via FMA in the ADD position.  */
1395      if (!rtx_equal_p  (dest, XEXP (src, 2)))
1396	return NULL;
1397      accum_pos = 2;
1398    }
1399  else if (rtx_equal_p (dest, XEXP (src, 0)))
1400    accum_pos = 0;
1401  else if (rtx_equal_p (dest, XEXP (src, 1)))
1402    {
1403      /* The method of expansion that we are using; which includes the
1404	 initialization of the expansions with zero and the summation of
1405         the expansions at the end of the computation will yield wrong
1406	 results for (x = something - x) thus avoid using it in that case.  */
1407      if (code == MINUS)
1408	return NULL;
1409      accum_pos = 1;
1410    }
1411  else
1412    return NULL;
1413
1414  /* It must not otherwise be used.  */
1415  if (code == FMA)
1416    {
1417      if (rtx_referenced_p (dest, XEXP (src, 0))
1418	  || rtx_referenced_p (dest, XEXP (src, 1)))
1419	return NULL;
1420    }
1421  else if (rtx_referenced_p (dest, XEXP (src, 1 - accum_pos)))
1422    return NULL;
1423
1424  /* It must be used in exactly one insn.  */
1425  if (!referenced_in_one_insn_in_loop_p (loop, dest, &debug_uses))
1426    return NULL;
1427
1428  if (dump_file)
1429    {
1430      fprintf (dump_file, "\n;; Expanding Accumulator ");
1431      print_rtl (dump_file, dest);
1432      fprintf (dump_file, "\n");
1433    }
1434
1435  if (debug_uses)
1436    /* Instead of resetting the debug insns, we could replace each
1437       debug use in the loop with the sum or product of all expanded
1438       accummulators.  Since we'll only know of all expansions at the
1439       end, we'd have to keep track of which vars_to_expand a debug
1440       insn in the loop references, take note of each copy of the
1441       debug insn during unrolling, and when it's all done, compute
1442       the sum or product of each variable and adjust the original
1443       debug insn and each copy thereof.  What a pain!  */
1444    reset_debug_uses_in_loop (loop, dest, debug_uses);
1445
1446  /* Record the accumulator to expand.  */
1447  ves = XNEW (struct var_to_expand);
1448  ves->insn = insn;
1449  ves->reg = copy_rtx (dest);
1450  ves->var_expansions.create (1);
1451  ves->next = NULL;
1452  ves->op = GET_CODE (src);
1453  ves->expansion_count = 0;
1454  ves->reuse_expansion = 0;
1455  return ves;
1456}
1457
1458/* Determine whether there is an induction variable in INSN that
1459   we would like to split during unrolling.
1460
1461   I.e. replace
1462
1463   i = i + 1;
1464   ...
1465   i = i + 1;
1466   ...
1467   i = i + 1;
1468   ...
1469
1470   type chains by
1471
1472   i0 = i + 1
1473   ...
1474   i = i0 + 1
1475   ...
1476   i = i0 + 2
1477   ...
1478
1479   Return NULL if INSN contains no interesting IVs.  Otherwise, allocate
1480   an IV_TO_SPLIT structure, fill it with the relevant information and return a
1481   pointer to it.  */
1482
1483static struct iv_to_split *
1484analyze_iv_to_split_insn (rtx_insn *insn)
1485{
1486  rtx set, dest;
1487  struct rtx_iv iv;
1488  struct iv_to_split *ivts;
1489  bool ok;
1490
1491  /* For now we just split the basic induction variables.  Later this may be
1492     extended for example by selecting also addresses of memory references.  */
1493  set = single_set (insn);
1494  if (!set)
1495    return NULL;
1496
1497  dest = SET_DEST (set);
1498  if (!REG_P (dest))
1499    return NULL;
1500
1501  if (!biv_p (insn, dest))
1502    return NULL;
1503
1504  ok = iv_analyze_result (insn, dest, &iv);
1505
1506  /* This used to be an assert under the assumption that if biv_p returns
1507     true that iv_analyze_result must also return true.  However, that
1508     assumption is not strictly correct as evidenced by pr25569.
1509
1510     Returning NULL when iv_analyze_result returns false is safe and
1511     avoids the problems in pr25569 until the iv_analyze_* routines
1512     can be fixed, which is apparently hard and time consuming
1513     according to their author.  */
1514  if (! ok)
1515    return NULL;
1516
1517  if (iv.step == const0_rtx
1518      || iv.mode != iv.extend_mode)
1519    return NULL;
1520
1521  /* Record the insn to split.  */
1522  ivts = XNEW (struct iv_to_split);
1523  ivts->insn = insn;
1524  ivts->orig_var = dest;
1525  ivts->base_var = NULL_RTX;
1526  ivts->step = iv.step;
1527  ivts->next = NULL;
1528
1529  return ivts;
1530}
1531
1532/* Determines which of insns in LOOP can be optimized.
1533   Return a OPT_INFO struct with the relevant hash tables filled
1534   with all insns to be optimized.  The FIRST_NEW_BLOCK field
1535   is undefined for the return value.  */
1536
1537static struct opt_info *
1538analyze_insns_in_loop (struct loop *loop)
1539{
1540  basic_block *body, bb;
1541  unsigned i;
1542  struct opt_info *opt_info = XCNEW (struct opt_info);
1543  rtx_insn *insn;
1544  struct iv_to_split *ivts = NULL;
1545  struct var_to_expand *ves = NULL;
1546  iv_to_split **slot1;
1547  var_to_expand **slot2;
1548  vec<edge> edges = get_loop_exit_edges (loop);
1549  edge exit;
1550  bool can_apply = false;
1551
1552  iv_analysis_loop_init (loop);
1553
1554  body = get_loop_body (loop);
1555
1556  if (flag_split_ivs_in_unroller)
1557    {
1558      opt_info->insns_to_split
1559       	= new hash_table<iv_split_hasher> (5 * loop->num_nodes);
1560      opt_info->iv_to_split_head = NULL;
1561      opt_info->iv_to_split_tail = &opt_info->iv_to_split_head;
1562    }
1563
1564  /* Record the loop exit bb and loop preheader before the unrolling.  */
1565  opt_info->loop_preheader = loop_preheader_edge (loop)->src;
1566
1567  if (edges.length () == 1)
1568    {
1569      exit = edges[0];
1570      if (!(exit->flags & EDGE_COMPLEX))
1571	{
1572	  opt_info->loop_exit = split_edge (exit);
1573	  can_apply = true;
1574	}
1575    }
1576
1577  if (flag_variable_expansion_in_unroller
1578      && can_apply)
1579    {
1580      opt_info->insns_with_var_to_expand
1581       	= new hash_table<var_expand_hasher> (5 * loop->num_nodes);
1582      opt_info->var_to_expand_head = NULL;
1583      opt_info->var_to_expand_tail = &opt_info->var_to_expand_head;
1584    }
1585
1586  for (i = 0; i < loop->num_nodes; i++)
1587    {
1588      bb = body[i];
1589      if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1590	continue;
1591
1592      FOR_BB_INSNS (bb, insn)
1593      {
1594        if (!INSN_P (insn))
1595          continue;
1596
1597        if (opt_info->insns_to_split)
1598          ivts = analyze_iv_to_split_insn (insn);
1599
1600        if (ivts)
1601          {
1602            slot1 = opt_info->insns_to_split->find_slot (ivts, INSERT);
1603	    gcc_assert (*slot1 == NULL);
1604            *slot1 = ivts;
1605	    *opt_info->iv_to_split_tail = ivts;
1606	    opt_info->iv_to_split_tail = &ivts->next;
1607            continue;
1608          }
1609
1610        if (opt_info->insns_with_var_to_expand)
1611          ves = analyze_insn_to_expand_var (loop, insn);
1612
1613        if (ves)
1614          {
1615            slot2 = opt_info->insns_with_var_to_expand->find_slot (ves, INSERT);
1616	    gcc_assert (*slot2 == NULL);
1617            *slot2 = ves;
1618	    *opt_info->var_to_expand_tail = ves;
1619	    opt_info->var_to_expand_tail = &ves->next;
1620          }
1621      }
1622    }
1623
1624  edges.release ();
1625  free (body);
1626  return opt_info;
1627}
1628
1629/* Called just before loop duplication.  Records start of duplicated area
1630   to OPT_INFO.  */
1631
1632static void
1633opt_info_start_duplication (struct opt_info *opt_info)
1634{
1635  if (opt_info)
1636    opt_info->first_new_block = last_basic_block_for_fn (cfun);
1637}
1638
1639/* Determine the number of iterations between initialization of the base
1640   variable and the current copy (N_COPY).  N_COPIES is the total number
1641   of newly created copies.  UNROLLING is true if we are unrolling
1642   (not peeling) the loop.  */
1643
1644static unsigned
1645determine_split_iv_delta (unsigned n_copy, unsigned n_copies, bool unrolling)
1646{
1647  if (unrolling)
1648    {
1649      /* If we are unrolling, initialization is done in the original loop
1650	 body (number 0).  */
1651      return n_copy;
1652    }
1653  else
1654    {
1655      /* If we are peeling, the copy in that the initialization occurs has
1656	 number 1.  The original loop (number 0) is the last.  */
1657      if (n_copy)
1658	return n_copy - 1;
1659      else
1660	return n_copies;
1661    }
1662}
1663
1664/* Allocate basic variable for the induction variable chain.  */
1665
1666static void
1667allocate_basic_variable (struct iv_to_split *ivts)
1668{
1669  rtx expr = SET_SRC (single_set (ivts->insn));
1670
1671  ivts->base_var = gen_reg_rtx (GET_MODE (expr));
1672}
1673
1674/* Insert initialization of basic variable of IVTS before INSN, taking
1675   the initial value from INSN.  */
1676
1677static void
1678insert_base_initialization (struct iv_to_split *ivts, rtx_insn *insn)
1679{
1680  rtx expr = copy_rtx (SET_SRC (single_set (insn)));
1681  rtx_insn *seq;
1682
1683  start_sequence ();
1684  expr = force_operand (expr, ivts->base_var);
1685  if (expr != ivts->base_var)
1686    emit_move_insn (ivts->base_var, expr);
1687  seq = get_insns ();
1688  end_sequence ();
1689
1690  emit_insn_before (seq, insn);
1691}
1692
1693/* Replace the use of induction variable described in IVTS in INSN
1694   by base variable + DELTA * step.  */
1695
1696static void
1697split_iv (struct iv_to_split *ivts, rtx_insn *insn, unsigned delta)
1698{
1699  rtx expr, *loc, incr, var;
1700  rtx_insn *seq;
1701  machine_mode mode = GET_MODE (ivts->base_var);
1702  rtx src, dest, set;
1703
1704  /* Construct base + DELTA * step.  */
1705  if (!delta)
1706    expr = ivts->base_var;
1707  else
1708    {
1709      incr = simplify_gen_binary (MULT, mode,
1710				  ivts->step, gen_int_mode (delta, mode));
1711      expr = simplify_gen_binary (PLUS, GET_MODE (ivts->base_var),
1712				  ivts->base_var, incr);
1713    }
1714
1715  /* Figure out where to do the replacement.  */
1716  loc = &SET_SRC (single_set (insn));
1717
1718  /* If we can make the replacement right away, we're done.  */
1719  if (validate_change (insn, loc, expr, 0))
1720    return;
1721
1722  /* Otherwise, force EXPR into a register and try again.  */
1723  start_sequence ();
1724  var = gen_reg_rtx (mode);
1725  expr = force_operand (expr, var);
1726  if (expr != var)
1727    emit_move_insn (var, expr);
1728  seq = get_insns ();
1729  end_sequence ();
1730  emit_insn_before (seq, insn);
1731
1732  if (validate_change (insn, loc, var, 0))
1733    return;
1734
1735  /* The last chance.  Try recreating the assignment in insn
1736     completely from scratch.  */
1737  set = single_set (insn);
1738  gcc_assert (set);
1739
1740  start_sequence ();
1741  *loc = var;
1742  src = copy_rtx (SET_SRC (set));
1743  dest = copy_rtx (SET_DEST (set));
1744  src = force_operand (src, dest);
1745  if (src != dest)
1746    emit_move_insn (dest, src);
1747  seq = get_insns ();
1748  end_sequence ();
1749
1750  emit_insn_before (seq, insn);
1751  delete_insn (insn);
1752}
1753
1754
1755/* Return one expansion of the accumulator recorded in struct VE.  */
1756
1757static rtx
1758get_expansion (struct var_to_expand *ve)
1759{
1760  rtx reg;
1761
1762  if (ve->reuse_expansion == 0)
1763    reg = ve->reg;
1764  else
1765    reg = ve->var_expansions[ve->reuse_expansion - 1];
1766
1767  if (ve->var_expansions.length () == (unsigned) ve->reuse_expansion)
1768    ve->reuse_expansion = 0;
1769  else
1770    ve->reuse_expansion++;
1771
1772  return reg;
1773}
1774
1775
1776/* Given INSN replace the uses of the accumulator recorded in VE
1777   with a new register.  */
1778
1779static void
1780expand_var_during_unrolling (struct var_to_expand *ve, rtx_insn *insn)
1781{
1782  rtx new_reg, set;
1783  bool really_new_expansion = false;
1784
1785  set = single_set (insn);
1786  gcc_assert (set);
1787
1788  /* Generate a new register only if the expansion limit has not been
1789     reached.  Else reuse an already existing expansion.  */
1790  if (PARAM_VALUE (PARAM_MAX_VARIABLE_EXPANSIONS) > ve->expansion_count)
1791    {
1792      really_new_expansion = true;
1793      new_reg = gen_reg_rtx (GET_MODE (ve->reg));
1794    }
1795  else
1796    new_reg = get_expansion (ve);
1797
1798  validate_replace_rtx_group (SET_DEST (set), new_reg, insn);
1799  if (apply_change_group ())
1800    if (really_new_expansion)
1801      {
1802        ve->var_expansions.safe_push (new_reg);
1803        ve->expansion_count++;
1804      }
1805}
1806
1807/* Initialize the variable expansions in loop preheader.  PLACE is the
1808   loop-preheader basic block where the initialization of the
1809   expansions should take place.  The expansions are initialized with
1810   (-0) when the operation is plus or minus to honor sign zero.  This
1811   way we can prevent cases where the sign of the final result is
1812   effected by the sign of the expansion.  Here is an example to
1813   demonstrate this:
1814
1815   for (i = 0 ; i < n; i++)
1816     sum += something;
1817
1818   ==>
1819
1820   sum += something
1821   ....
1822   i = i+1;
1823   sum1 += something
1824   ....
1825   i = i+1
1826   sum2 += something;
1827   ....
1828
1829   When SUM is initialized with -zero and SOMETHING is also -zero; the
1830   final result of sum should be -zero thus the expansions sum1 and sum2
1831   should be initialized with -zero as well (otherwise we will get +zero
1832   as the final result).  */
1833
1834static void
1835insert_var_expansion_initialization (struct var_to_expand *ve,
1836				     basic_block place)
1837{
1838  rtx_insn *seq;
1839  rtx var, zero_init;
1840  unsigned i;
1841  machine_mode mode = GET_MODE (ve->reg);
1842  bool honor_signed_zero_p = HONOR_SIGNED_ZEROS (mode);
1843
1844  if (ve->var_expansions.length () == 0)
1845    return;
1846
1847  start_sequence ();
1848  switch (ve->op)
1849    {
1850    case FMA:
1851      /* Note that we only accumulate FMA via the ADD operand.  */
1852    case PLUS:
1853    case MINUS:
1854      FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
1855        {
1856	  if (honor_signed_zero_p)
1857	    zero_init = simplify_gen_unary (NEG, mode, CONST0_RTX (mode), mode);
1858	  else
1859	    zero_init = CONST0_RTX (mode);
1860          emit_move_insn (var, zero_init);
1861        }
1862      break;
1863
1864    case MULT:
1865      FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
1866        {
1867          zero_init = CONST1_RTX (GET_MODE (var));
1868          emit_move_insn (var, zero_init);
1869        }
1870      break;
1871
1872    default:
1873      gcc_unreachable ();
1874    }
1875
1876  seq = get_insns ();
1877  end_sequence ();
1878
1879  emit_insn_after (seq, BB_END (place));
1880}
1881
1882/* Combine the variable expansions at the loop exit.  PLACE is the
1883   loop exit basic block where the summation of the expansions should
1884   take place.  */
1885
1886static void
1887combine_var_copies_in_loop_exit (struct var_to_expand *ve, basic_block place)
1888{
1889  rtx sum = ve->reg;
1890  rtx expr, var;
1891  rtx_insn *seq, *insn;
1892  unsigned i;
1893
1894  if (ve->var_expansions.length () == 0)
1895    return;
1896
1897  start_sequence ();
1898  switch (ve->op)
1899    {
1900    case FMA:
1901      /* Note that we only accumulate FMA via the ADD operand.  */
1902    case PLUS:
1903    case MINUS:
1904      FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
1905	sum = simplify_gen_binary (PLUS, GET_MODE (ve->reg), var, sum);
1906      break;
1907
1908    case MULT:
1909      FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
1910	sum = simplify_gen_binary (MULT, GET_MODE (ve->reg), var, sum);
1911      break;
1912
1913    default:
1914      gcc_unreachable ();
1915    }
1916
1917  expr = force_operand (sum, ve->reg);
1918  if (expr != ve->reg)
1919    emit_move_insn (ve->reg, expr);
1920  seq = get_insns ();
1921  end_sequence ();
1922
1923  insn = BB_HEAD (place);
1924  while (!NOTE_INSN_BASIC_BLOCK_P (insn))
1925    insn = NEXT_INSN (insn);
1926
1927  emit_insn_after (seq, insn);
1928}
1929
1930/* Strip away REG_EQUAL notes for IVs we're splitting.
1931
1932   Updating REG_EQUAL notes for IVs we split is tricky: We
1933   cannot tell until after unrolling, DF-rescanning, and liveness
1934   updating, whether an EQ_USE is reached by the split IV while
1935   the IV reg is still live.  See PR55006.
1936
1937   ??? We cannot use remove_reg_equal_equiv_notes_for_regno,
1938   because RTL loop-iv requires us to defer rescanning insns and
1939   any notes attached to them.  So resort to old techniques...  */
1940
1941static void
1942maybe_strip_eq_note_for_split_iv (struct opt_info *opt_info, rtx_insn *insn)
1943{
1944  struct iv_to_split *ivts;
1945  rtx note = find_reg_equal_equiv_note (insn);
1946  if (! note)
1947    return;
1948  for (ivts = opt_info->iv_to_split_head; ivts; ivts = ivts->next)
1949    if (reg_mentioned_p (ivts->orig_var, note))
1950      {
1951	remove_note (insn, note);
1952	return;
1953      }
1954}
1955
1956/* Apply loop optimizations in loop copies using the
1957   data which gathered during the unrolling.  Structure
1958   OPT_INFO record that data.
1959
1960   UNROLLING is true if we unrolled (not peeled) the loop.
1961   REWRITE_ORIGINAL_BODY is true if we should also rewrite the original body of
1962   the loop (as it should happen in complete unrolling, but not in ordinary
1963   peeling of the loop).  */
1964
1965static void
1966apply_opt_in_copies (struct opt_info *opt_info,
1967                     unsigned n_copies, bool unrolling,
1968                     bool rewrite_original_loop)
1969{
1970  unsigned i, delta;
1971  basic_block bb, orig_bb;
1972  rtx_insn *insn, *orig_insn, *next;
1973  struct iv_to_split ivts_templ, *ivts;
1974  struct var_to_expand ve_templ, *ves;
1975
1976  /* Sanity check -- we need to put initialization in the original loop
1977     body.  */
1978  gcc_assert (!unrolling || rewrite_original_loop);
1979
1980  /* Allocate the basic variables (i0).  */
1981  if (opt_info->insns_to_split)
1982    for (ivts = opt_info->iv_to_split_head; ivts; ivts = ivts->next)
1983      allocate_basic_variable (ivts);
1984
1985  for (i = opt_info->first_new_block;
1986       i < (unsigned) last_basic_block_for_fn (cfun);
1987       i++)
1988    {
1989      bb = BASIC_BLOCK_FOR_FN (cfun, i);
1990      orig_bb = get_bb_original (bb);
1991
1992      /* bb->aux holds position in copy sequence initialized by
1993	 duplicate_loop_to_header_edge.  */
1994      delta = determine_split_iv_delta ((size_t)bb->aux, n_copies,
1995					unrolling);
1996      bb->aux = 0;
1997      orig_insn = BB_HEAD (orig_bb);
1998      FOR_BB_INSNS_SAFE (bb, insn, next)
1999        {
2000	  if (!INSN_P (insn)
2001	      || (DEBUG_INSN_P (insn)
2002		  && TREE_CODE (INSN_VAR_LOCATION_DECL (insn)) == LABEL_DECL))
2003            continue;
2004
2005	  while (!INSN_P (orig_insn)
2006		 || (DEBUG_INSN_P (orig_insn)
2007		     && (TREE_CODE (INSN_VAR_LOCATION_DECL (orig_insn))
2008			 == LABEL_DECL)))
2009            orig_insn = NEXT_INSN (orig_insn);
2010
2011          ivts_templ.insn = orig_insn;
2012          ve_templ.insn = orig_insn;
2013
2014          /* Apply splitting iv optimization.  */
2015          if (opt_info->insns_to_split)
2016            {
2017	      maybe_strip_eq_note_for_split_iv (opt_info, insn);
2018
2019              ivts = opt_info->insns_to_split->find (&ivts_templ);
2020
2021              if (ivts)
2022                {
2023		  gcc_assert (GET_CODE (PATTERN (insn))
2024			      == GET_CODE (PATTERN (orig_insn)));
2025
2026                  if (!delta)
2027                    insert_base_initialization (ivts, insn);
2028                  split_iv (ivts, insn, delta);
2029                }
2030            }
2031          /* Apply variable expansion optimization.  */
2032          if (unrolling && opt_info->insns_with_var_to_expand)
2033            {
2034              ves = (struct var_to_expand *)
2035		opt_info->insns_with_var_to_expand->find (&ve_templ);
2036              if (ves)
2037                {
2038		  gcc_assert (GET_CODE (PATTERN (insn))
2039			      == GET_CODE (PATTERN (orig_insn)));
2040                  expand_var_during_unrolling (ves, insn);
2041                }
2042            }
2043          orig_insn = NEXT_INSN (orig_insn);
2044        }
2045    }
2046
2047  if (!rewrite_original_loop)
2048    return;
2049
2050  /* Initialize the variable expansions in the loop preheader
2051     and take care of combining them at the loop exit.  */
2052  if (opt_info->insns_with_var_to_expand)
2053    {
2054      for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
2055	insert_var_expansion_initialization (ves, opt_info->loop_preheader);
2056      for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
2057	combine_var_copies_in_loop_exit (ves, opt_info->loop_exit);
2058    }
2059
2060  /* Rewrite also the original loop body.  Find them as originals of the blocks
2061     in the last copied iteration, i.e. those that have
2062     get_bb_copy (get_bb_original (bb)) == bb.  */
2063  for (i = opt_info->first_new_block;
2064       i < (unsigned) last_basic_block_for_fn (cfun);
2065       i++)
2066    {
2067      bb = BASIC_BLOCK_FOR_FN (cfun, i);
2068      orig_bb = get_bb_original (bb);
2069      if (get_bb_copy (orig_bb) != bb)
2070	continue;
2071
2072      delta = determine_split_iv_delta (0, n_copies, unrolling);
2073      for (orig_insn = BB_HEAD (orig_bb);
2074           orig_insn != NEXT_INSN (BB_END (bb));
2075           orig_insn = next)
2076        {
2077          next = NEXT_INSN (orig_insn);
2078
2079          if (!INSN_P (orig_insn))
2080 	    continue;
2081
2082          ivts_templ.insn = orig_insn;
2083          if (opt_info->insns_to_split)
2084            {
2085	      maybe_strip_eq_note_for_split_iv (opt_info, orig_insn);
2086
2087              ivts = (struct iv_to_split *)
2088		opt_info->insns_to_split->find (&ivts_templ);
2089              if (ivts)
2090                {
2091                  if (!delta)
2092                    insert_base_initialization (ivts, orig_insn);
2093                  split_iv (ivts, orig_insn, delta);
2094                  continue;
2095                }
2096            }
2097
2098        }
2099    }
2100}
2101
2102/* Release OPT_INFO.  */
2103
2104static void
2105free_opt_info (struct opt_info *opt_info)
2106{
2107  delete opt_info->insns_to_split;
2108  opt_info->insns_to_split = NULL;
2109  if (opt_info->insns_with_var_to_expand)
2110    {
2111      struct var_to_expand *ves;
2112
2113      for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
2114	ves->var_expansions.release ();
2115      delete opt_info->insns_with_var_to_expand;
2116      opt_info->insns_with_var_to_expand = NULL;
2117    }
2118  free (opt_info);
2119}
2120