1169689Skan/* Perform doloop optimizations
2169689Skan   Copyright (C) 2004, 2005, 2006 Free Software Foundation, Inc.
3169689Skan   Based on code by Michael P. Hayes (m.hayes@elec.canterbury.ac.nz)
4169689Skan
5169689SkanThis file is part of GCC.
6169689Skan
7169689SkanGCC is free software; you can redistribute it and/or modify it under
8169689Skanthe terms of the GNU General Public License as published by the Free
9169689SkanSoftware Foundation; either version 2, or (at your option) any later
10169689Skanversion.
11169689Skan
12169689SkanGCC is distributed in the hope that it will be useful, but WITHOUT ANY
13169689SkanWARRANTY; without even the implied warranty of MERCHANTABILITY or
14169689SkanFITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15169689Skanfor more details.
16169689Skan
17169689SkanYou should have received a copy of the GNU General Public License
18169689Skanalong with GCC; see the file COPYING.  If not, write to the Free
19169689SkanSoftware Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
20169689Skan02110-1301, USA.  */
21169689Skan
22169689Skan#include "config.h"
23169689Skan#include "system.h"
24169689Skan#include "coretypes.h"
25169689Skan#include "tm.h"
26169689Skan#include "rtl.h"
27169689Skan#include "flags.h"
28169689Skan#include "expr.h"
29169689Skan#include "hard-reg-set.h"
30169689Skan#include "basic-block.h"
31169689Skan#include "toplev.h"
32169689Skan#include "tm_p.h"
33169689Skan#include "cfgloop.h"
34169689Skan#include "output.h"
35169689Skan#include "params.h"
36169689Skan#include "target.h"
37169689Skan
38169689Skan/* This module is used to modify loops with a determinable number of
39169689Skan   iterations to use special low-overhead looping instructions.
40169689Skan
41169689Skan   It first validates whether the loop is well behaved and has a
42169689Skan   determinable number of iterations (either at compile or run-time).
43169689Skan   It then modifies the loop to use a low-overhead looping pattern as
44169689Skan   follows:
45169689Skan
46169689Skan   1. A pseudo register is allocated as the loop iteration counter.
47169689Skan
48169689Skan   2. The number of loop iterations is calculated and is stored
49169689Skan      in the loop counter.
50169689Skan
51169689Skan   3. At the end of the loop, the jump insn is replaced by the
52169689Skan      doloop_end pattern.  The compare must remain because it might be
53169689Skan      used elsewhere.  If the loop-variable or condition register are
54169689Skan      used elsewhere, they will be eliminated by flow.
55169689Skan
56169689Skan   4. An optional doloop_begin pattern is inserted at the top of the
57169689Skan      loop.
58169689Skan
59169689Skan   TODO The optimization should only performed when either the biv used for exit
60169689Skan   condition is unused at all except for the exit test, or if we do not have to
61169689Skan   change its value, since otherwise we have to add a new induction variable,
62169689Skan   which usually will not pay up (unless the cost of the doloop pattern is
63169689Skan   somehow extremely lower than the cost of compare & jump, or unless the bct
64169689Skan   register cannot be used for anything else but doloop -- ??? detect these
65169689Skan   cases).  */
66169689Skan
67169689Skan#ifdef HAVE_doloop_end
68169689Skan
69169689Skan/* Return the loop termination condition for PATTERN or zero
70169689Skan   if it is not a decrement and branch jump insn.  */
71169689Skan
72169689Skanrtx
73169689Skandoloop_condition_get (rtx pattern)
74169689Skan{
75169689Skan  rtx cmp;
76169689Skan  rtx inc;
77169689Skan  rtx reg;
78169689Skan  rtx inc_src;
79169689Skan  rtx condition;
80169689Skan
81169689Skan  /* The canonical doloop pattern we expect is:
82169689Skan
83169689Skan     (parallel [(set (pc) (if_then_else (condition)
84169689Skan                                        (label_ref (label))
85169689Skan                                        (pc)))
86169689Skan                (set (reg) (plus (reg) (const_int -1)))
87169689Skan                (additional clobbers and uses)])
88169689Skan
89169689Skan     Some targets (IA-64) wrap the set of the loop counter in
90169689Skan     an if_then_else too.
91169689Skan
92169689Skan     In summary, the branch must be the first entry of the
93169689Skan     parallel (also required by jump.c), and the second
94169689Skan     entry of the parallel must be a set of the loop counter
95169689Skan     register.  */
96169689Skan
97169689Skan  if (GET_CODE (pattern) != PARALLEL)
98169689Skan    return 0;
99169689Skan
100169689Skan  cmp = XVECEXP (pattern, 0, 0);
101169689Skan  inc = XVECEXP (pattern, 0, 1);
102169689Skan
103169689Skan  /* Check for (set (reg) (something)).  */
104169689Skan  if (GET_CODE (inc) != SET)
105169689Skan    return 0;
106169689Skan  reg = SET_DEST (inc);
107169689Skan  if (! REG_P (reg))
108169689Skan    return 0;
109169689Skan
110169689Skan  /* Check if something = (plus (reg) (const_int -1)).
111169689Skan     On IA-64, this decrement is wrapped in an if_then_else.  */
112169689Skan  inc_src = SET_SRC (inc);
113169689Skan  if (GET_CODE (inc_src) == IF_THEN_ELSE)
114169689Skan    inc_src = XEXP (inc_src, 1);
115169689Skan  if (GET_CODE (inc_src) != PLUS
116169689Skan      || XEXP (inc_src, 0) != reg
117169689Skan      || XEXP (inc_src, 1) != constm1_rtx)
118169689Skan    return 0;
119169689Skan
120169689Skan  /* Check for (set (pc) (if_then_else (condition)
121169689Skan                                       (label_ref (label))
122169689Skan                                       (pc))).  */
123169689Skan  if (GET_CODE (cmp) != SET
124169689Skan      || SET_DEST (cmp) != pc_rtx
125169689Skan      || GET_CODE (SET_SRC (cmp)) != IF_THEN_ELSE
126169689Skan      || GET_CODE (XEXP (SET_SRC (cmp), 1)) != LABEL_REF
127169689Skan      || XEXP (SET_SRC (cmp), 2) != pc_rtx)
128169689Skan    return 0;
129169689Skan
130169689Skan  /* Extract loop termination condition.  */
131169689Skan  condition = XEXP (SET_SRC (cmp), 0);
132169689Skan
133169689Skan  /* We expect a GE or NE comparison with 0 or 1.  */
134169689Skan  if ((GET_CODE (condition) != GE
135169689Skan       && GET_CODE (condition) != NE)
136169689Skan      || (XEXP (condition, 1) != const0_rtx
137169689Skan          && XEXP (condition, 1) != const1_rtx))
138169689Skan    return 0;
139169689Skan
140169689Skan  if ((XEXP (condition, 0) == reg)
141169689Skan      || (GET_CODE (XEXP (condition, 0)) == PLUS
142169689Skan		   && XEXP (XEXP (condition, 0), 0) == reg))
143169689Skan    return condition;
144169689Skan
145169689Skan  /* ??? If a machine uses a funny comparison, we could return a
146169689Skan     canonicalized form here.  */
147169689Skan
148169689Skan  return 0;
149169689Skan}
150169689Skan
151169689Skan/* Return nonzero if the loop specified by LOOP is suitable for
152169689Skan   the use of special low-overhead looping instructions.  DESC
153169689Skan   describes the number of iterations of the loop.  */
154169689Skan
155169689Skanstatic bool
156169689Skandoloop_valid_p (struct loop *loop, struct niter_desc *desc)
157169689Skan{
158169689Skan  basic_block *body = get_loop_body (loop), bb;
159169689Skan  rtx insn;
160169689Skan  unsigned i;
161169689Skan  bool result = true;
162169689Skan
163169689Skan  /* Check for loops that may not terminate under special conditions.  */
164169689Skan  if (!desc->simple_p
165169689Skan      || desc->assumptions
166169689Skan      || desc->infinite)
167169689Skan    {
168169689Skan      /* There are some cases that would require a special attention.
169169689Skan	 For example if the comparison is LEU and the comparison value
170169689Skan	 is UINT_MAX then the loop will not terminate.  Similarly, if the
171169689Skan	 comparison code is GEU and the comparison value is 0, the
172169689Skan	 loop will not terminate.
173169689Skan
174169689Skan	 If the absolute increment is not 1, the loop can be infinite
175169689Skan	 even with LTU/GTU, e.g. for (i = 3; i > 0; i -= 2)
176169689Skan
177169689Skan	 ??? We could compute these conditions at run-time and have a
178169689Skan	 additional jump around the loop to ensure an infinite loop.
179169689Skan	 However, it is very unlikely that this is the intended
180169689Skan	 behavior of the loop and checking for these rare boundary
181169689Skan	 conditions would pessimize all other code.
182169689Skan
183169689Skan	 If the loop is executed only a few times an extra check to
184169689Skan	 restart the loop could use up most of the benefits of using a
185169689Skan	 count register loop.  Note however, that normally, this
186169689Skan	 restart branch would never execute, so it could be predicted
187169689Skan	 well by the CPU.  We should generate the pessimistic code by
188169689Skan	 default, and have an option, e.g. -funsafe-loops that would
189169689Skan	 enable count-register loops in this case.  */
190169689Skan      if (dump_file)
191169689Skan	fprintf (dump_file, "Doloop: Possible infinite iteration case.\n");
192169689Skan      result = false;
193169689Skan      goto cleanup;
194169689Skan    }
195169689Skan
196169689Skan  for (i = 0; i < loop->num_nodes; i++)
197169689Skan    {
198169689Skan      bb = body[i];
199169689Skan
200169689Skan      for (insn = BB_HEAD (bb);
201169689Skan	   insn != NEXT_INSN (BB_END (bb));
202169689Skan	   insn = NEXT_INSN (insn))
203169689Skan	{
204169689Skan	  /* Different targets have different necessities for low-overhead
205169689Skan	     looping.  Call the back end for each instruction within the loop
206169689Skan	     to let it decide whether the insn prohibits a low-overhead loop.
207169689Skan	     It will then return the cause for it to emit to the dump file.  */
208169689Skan	  const char * invalid = targetm.invalid_within_doloop (insn);
209169689Skan	  if (invalid)
210169689Skan	    {
211169689Skan	      if (dump_file)
212169689Skan		fprintf (dump_file, "Doloop: %s\n", invalid);
213169689Skan	      result = false;
214169689Skan	      goto cleanup;
215169689Skan	    }
216169689Skan	}
217169689Skan    }
218169689Skan  result = true;
219169689Skan
220169689Skancleanup:
221169689Skan  free (body);
222169689Skan
223169689Skan  return result;
224169689Skan}
225169689Skan
226169689Skan/* Adds test of COND jumping to DEST on edge *E and set *E to the new fallthru
227169689Skan   edge.  If the condition is always false, do not do anything.  If it is always
228169689Skan   true, redirect E to DEST and return false.  In all other cases, true is
229169689Skan   returned.  */
230169689Skan
231169689Skanstatic bool
232169689Skanadd_test (rtx cond, edge *e, basic_block dest)
233169689Skan{
234169689Skan  rtx seq, jump, label;
235169689Skan  enum machine_mode mode;
236169689Skan  rtx op0 = XEXP (cond, 0), op1 = XEXP (cond, 1);
237169689Skan  enum rtx_code code = GET_CODE (cond);
238169689Skan  basic_block bb;
239169689Skan
240169689Skan  mode = GET_MODE (XEXP (cond, 0));
241169689Skan  if (mode == VOIDmode)
242169689Skan    mode = GET_MODE (XEXP (cond, 1));
243169689Skan
244169689Skan  start_sequence ();
245169689Skan  op0 = force_operand (op0, NULL_RTX);
246169689Skan  op1 = force_operand (op1, NULL_RTX);
247169689Skan  label = block_label (dest);
248169689Skan  do_compare_rtx_and_jump (op0, op1, code, 0, mode, NULL_RTX, NULL_RTX, label);
249169689Skan
250169689Skan  jump = get_last_insn ();
251169689Skan  if (!JUMP_P (jump))
252169689Skan    {
253169689Skan      /* The condition is always false and the jump was optimized out.  */
254169689Skan      end_sequence ();
255169689Skan      return true;
256169689Skan    }
257169689Skan
258169689Skan  seq = get_insns ();
259169689Skan  end_sequence ();
260169689Skan  bb = loop_split_edge_with (*e, seq);
261169689Skan  *e = single_succ_edge (bb);
262169689Skan
263169689Skan  if (any_uncondjump_p (jump))
264169689Skan    {
265169689Skan      /* The condition is always true.  */
266169689Skan      delete_insn (jump);
267169689Skan      redirect_edge_and_branch_force (*e, dest);
268169689Skan      return false;
269169689Skan    }
270169689Skan
271169689Skan  JUMP_LABEL (jump) = label;
272169689Skan
273169689Skan  /* The jump is supposed to handle an unlikely special case.  */
274169689Skan  REG_NOTES (jump)
275169689Skan	  = gen_rtx_EXPR_LIST (REG_BR_PROB,
276169689Skan			       const0_rtx, REG_NOTES (jump));
277169689Skan  LABEL_NUSES (label)++;
278169689Skan
279169689Skan  make_edge (bb, dest, (*e)->flags & ~EDGE_FALLTHRU);
280169689Skan  return true;
281169689Skan}
282169689Skan
283169689Skan/* Modify the loop to use the low-overhead looping insn where LOOP
284169689Skan   describes the loop, DESC describes the number of iterations of the
285169689Skan   loop, and DOLOOP_INSN is the low-overhead looping insn to emit at the
286169689Skan   end of the loop.  CONDITION is the condition separated from the
287169689Skan   DOLOOP_SEQ.  COUNT is the number of iterations of the LOOP.  */
288169689Skan
289169689Skanstatic void
290169689Skandoloop_modify (struct loop *loop, struct niter_desc *desc,
291169689Skan	       rtx doloop_seq, rtx condition, rtx count)
292169689Skan{
293169689Skan  rtx counter_reg;
294169689Skan  rtx tmp, noloop = NULL_RTX;
295169689Skan  rtx sequence;
296169689Skan  rtx jump_insn;
297169689Skan  rtx jump_label;
298169689Skan  int nonneg = 0;
299169689Skan  bool increment_count;
300169689Skan  basic_block loop_end = desc->out_edge->src;
301169689Skan  enum machine_mode mode;
302169689Skan
303169689Skan  jump_insn = BB_END (loop_end);
304169689Skan
305169689Skan  if (dump_file)
306169689Skan    {
307169689Skan      fprintf (dump_file, "Doloop: Inserting doloop pattern (");
308169689Skan      if (desc->const_iter)
309169689Skan	fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, desc->niter);
310169689Skan      else
311169689Skan	fputs ("runtime", dump_file);
312169689Skan      fputs (" iterations).\n", dump_file);
313169689Skan    }
314169689Skan
315169689Skan  /* Discard original jump to continue loop.  The original compare
316169689Skan     result may still be live, so it cannot be discarded explicitly.  */
317169689Skan  delete_insn (jump_insn);
318169689Skan
319169689Skan  counter_reg = XEXP (condition, 0);
320169689Skan  if (GET_CODE (counter_reg) == PLUS)
321169689Skan    counter_reg = XEXP (counter_reg, 0);
322169689Skan  mode = GET_MODE (counter_reg);
323169689Skan
324169689Skan  increment_count = false;
325169689Skan  switch (GET_CODE (condition))
326169689Skan    {
327169689Skan    case NE:
328169689Skan      /* Currently only NE tests against zero and one are supported.  */
329169689Skan      noloop = XEXP (condition, 1);
330169689Skan      if (noloop != const0_rtx)
331169689Skan	{
332169689Skan	  gcc_assert (noloop == const1_rtx);
333169689Skan	  increment_count = true;
334169689Skan	}
335169689Skan      break;
336169689Skan
337169689Skan    case GE:
338169689Skan      /* Currently only GE tests against zero are supported.  */
339169689Skan      gcc_assert (XEXP (condition, 1) == const0_rtx);
340169689Skan
341169689Skan      noloop = constm1_rtx;
342169689Skan
343169689Skan      /* The iteration count does not need incrementing for a GE test.  */
344169689Skan      increment_count = false;
345169689Skan
346169689Skan      /* Determine if the iteration counter will be non-negative.
347169689Skan	 Note that the maximum value loaded is iterations_max - 1.  */
348169689Skan      if (desc->niter_max
349169689Skan	  <= ((unsigned HOST_WIDEST_INT) 1
350169689Skan	      << (GET_MODE_BITSIZE (mode) - 1)))
351169689Skan	nonneg = 1;
352169689Skan      break;
353169689Skan
354169689Skan      /* Abort if an invalid doloop pattern has been generated.  */
355169689Skan    default:
356169689Skan      gcc_unreachable ();
357169689Skan    }
358169689Skan
359169689Skan  if (increment_count)
360169689Skan    count = simplify_gen_binary (PLUS, mode, count, const1_rtx);
361169689Skan
362169689Skan  /* Insert initialization of the count register into the loop header.  */
363169689Skan  start_sequence ();
364169689Skan  tmp = force_operand (count, counter_reg);
365169689Skan  convert_move (counter_reg, tmp, 1);
366169689Skan  sequence = get_insns ();
367169689Skan  end_sequence ();
368169689Skan  emit_insn_after (sequence, BB_END (loop_preheader_edge (loop)->src));
369169689Skan
370169689Skan  if (desc->noloop_assumptions)
371169689Skan    {
372169689Skan      rtx ass = copy_rtx (desc->noloop_assumptions);
373169689Skan      basic_block preheader = loop_preheader_edge (loop)->src;
374169689Skan      basic_block set_zero
375169689Skan	      = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
376169689Skan      basic_block new_preheader
377169689Skan	      = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
378169689Skan      edge te;
379169689Skan
380169689Skan      /* Expand the condition testing the assumptions and if it does not pass,
381169689Skan	 reset the count register to 0.  */
382169689Skan      redirect_edge_and_branch_force (single_succ_edge (preheader), new_preheader);
383169689Skan      set_immediate_dominator (CDI_DOMINATORS, new_preheader, preheader);
384169689Skan
385169689Skan      set_zero->count = 0;
386169689Skan      set_zero->frequency = 0;
387169689Skan
388169689Skan      te = single_succ_edge (preheader);
389169689Skan      for (; ass; ass = XEXP (ass, 1))
390169689Skan	if (!add_test (XEXP (ass, 0), &te, set_zero))
391169689Skan	  break;
392169689Skan
393169689Skan      if (ass)
394169689Skan	{
395169689Skan	  /* We reached a condition that is always true.  This is very hard to
396169689Skan	     reproduce (such a loop does not roll, and thus it would most
397169689Skan	     likely get optimized out by some of the preceding optimizations).
398169689Skan	     In fact, I do not have any testcase for it.  However, it would
399169689Skan	     also be very hard to show that it is impossible, so we must
400169689Skan	     handle this case.  */
401169689Skan	  set_zero->count = preheader->count;
402169689Skan	  set_zero->frequency = preheader->frequency;
403169689Skan	}
404169689Skan
405169689Skan      if (EDGE_COUNT (set_zero->preds) == 0)
406169689Skan	{
407169689Skan	  /* All the conditions were simplified to false, remove the
408169689Skan	     unreachable set_zero block.  */
409169689Skan	  remove_bb_from_loops (set_zero);
410169689Skan	  delete_basic_block (set_zero);
411169689Skan	}
412169689Skan      else
413169689Skan	{
414169689Skan	  /* Reset the counter to zero in the set_zero block.  */
415169689Skan	  start_sequence ();
416169689Skan	  convert_move (counter_reg, noloop, 0);
417169689Skan	  sequence = get_insns ();
418169689Skan	  end_sequence ();
419169689Skan	  emit_insn_after (sequence, BB_END (set_zero));
420169689Skan
421169689Skan	  set_immediate_dominator (CDI_DOMINATORS, set_zero,
422169689Skan				   recount_dominator (CDI_DOMINATORS,
423169689Skan						      set_zero));
424169689Skan	}
425169689Skan
426169689Skan      set_immediate_dominator (CDI_DOMINATORS, new_preheader,
427169689Skan			       recount_dominator (CDI_DOMINATORS,
428169689Skan						  new_preheader));
429169689Skan    }
430169689Skan
431169689Skan  /* Some targets (eg, C4x) need to initialize special looping
432169689Skan     registers.  */
433169689Skan#ifdef HAVE_doloop_begin
434169689Skan  {
435169689Skan    rtx init;
436169689Skan    unsigned level = get_loop_level (loop) + 1;
437169689Skan    init = gen_doloop_begin (counter_reg,
438169689Skan			     desc->const_iter ? desc->niter_expr : const0_rtx,
439169689Skan			     GEN_INT (desc->niter_max),
440169689Skan			     GEN_INT (level));
441169689Skan    if (init)
442169689Skan      {
443169689Skan	start_sequence ();
444169689Skan	emit_insn (init);
445169689Skan	sequence = get_insns ();
446169689Skan	end_sequence ();
447169689Skan	emit_insn_after (sequence, BB_END (loop_preheader_edge (loop)->src));
448169689Skan      }
449169689Skan  }
450169689Skan#endif
451169689Skan
452169689Skan  /* Insert the new low-overhead looping insn.  */
453169689Skan  emit_jump_insn_after (doloop_seq, BB_END (loop_end));
454169689Skan  jump_insn = BB_END (loop_end);
455169689Skan  jump_label = block_label (desc->in_edge->dest);
456169689Skan  JUMP_LABEL (jump_insn) = jump_label;
457169689Skan  LABEL_NUSES (jump_label)++;
458169689Skan
459169689Skan  /* Ensure the right fallthru edge is marked, for case we have reversed
460169689Skan     the condition.  */
461169689Skan  desc->in_edge->flags &= ~EDGE_FALLTHRU;
462169689Skan  desc->out_edge->flags |= EDGE_FALLTHRU;
463169689Skan
464169689Skan  /* Add a REG_NONNEG note if the actual or estimated maximum number
465169689Skan     of iterations is non-negative.  */
466169689Skan  if (nonneg)
467169689Skan    {
468169689Skan      REG_NOTES (jump_insn)
469169689Skan	= gen_rtx_EXPR_LIST (REG_NONNEG, NULL_RTX, REG_NOTES (jump_insn));
470169689Skan    }
471169689Skan}
472169689Skan
473169689Skan/* Process loop described by LOOP validating that the loop is suitable for
474169689Skan   conversion to use a low overhead looping instruction, replacing the jump
475169689Skan   insn where suitable.  Returns true if the loop was successfully
476169689Skan   modified.  */
477169689Skan
478169689Skanstatic bool
479169689Skandoloop_optimize (struct loop *loop)
480169689Skan{
481169689Skan  enum machine_mode mode;
482169689Skan  rtx doloop_seq, doloop_pat, doloop_reg;
483169689Skan  rtx iterations, count;
484169689Skan  rtx iterations_max;
485169689Skan  rtx start_label;
486169689Skan  rtx condition;
487169689Skan  unsigned level, est_niter;
488169689Skan  int max_cost;
489169689Skan  struct niter_desc *desc;
490169689Skan  unsigned word_mode_size;
491169689Skan  unsigned HOST_WIDE_INT word_mode_max;
492169689Skan
493169689Skan  if (dump_file)
494169689Skan    fprintf (dump_file, "Doloop: Processing loop %d.\n", loop->num);
495169689Skan
496169689Skan  iv_analysis_loop_init (loop);
497169689Skan
498169689Skan  /* Find the simple exit of a LOOP.  */
499169689Skan  desc = get_simple_loop_desc (loop);
500169689Skan
501169689Skan  /* Check that loop is a candidate for a low-overhead looping insn.  */
502169689Skan  if (!doloop_valid_p (loop, desc))
503169689Skan    {
504169689Skan      if (dump_file)
505169689Skan	fprintf (dump_file,
506169689Skan		 "Doloop: The loop is not suitable.\n");
507169689Skan      return false;
508169689Skan    }
509169689Skan  mode = desc->mode;
510169689Skan
511169689Skan  est_niter = 3;
512169689Skan  if (desc->const_iter)
513169689Skan    est_niter = desc->niter;
514169689Skan  /* If the estimate on number of iterations is reliable (comes from profile
515169689Skan     feedback), use it.  Do not use it normally, since the expected number
516169689Skan     of iterations of an unrolled loop is 2.  */
517169689Skan  if (loop->header->count)
518169689Skan    est_niter = expected_loop_iterations (loop);
519169689Skan
520169689Skan  if (est_niter < 3)
521169689Skan    {
522169689Skan      if (dump_file)
523169689Skan	fprintf (dump_file,
524169689Skan		 "Doloop: Too few iterations (%u) to be profitable.\n",
525169689Skan		 est_niter);
526169689Skan      return false;
527169689Skan    }
528169689Skan
529169689Skan  max_cost
530169689Skan    = COSTS_N_INSNS (PARAM_VALUE (PARAM_MAX_ITERATIONS_COMPUTATION_COST));
531169689Skan  if (rtx_cost (desc->niter_expr, SET) > max_cost)
532169689Skan    {
533169689Skan      if (dump_file)
534169689Skan	fprintf (dump_file,
535169689Skan		 "Doloop: number of iterations too costly to compute.\n");
536169689Skan      return false;
537169689Skan    }
538169689Skan
539169689Skan  count = copy_rtx (desc->niter_expr);
540169689Skan  iterations = desc->const_iter ? desc->niter_expr : const0_rtx;
541169689Skan  iterations_max = GEN_INT (desc->niter_max);
542169689Skan  level = get_loop_level (loop) + 1;
543169689Skan
544169689Skan  /* Generate looping insn.  If the pattern FAILs then give up trying
545169689Skan     to modify the loop since there is some aspect the back-end does
546169689Skan     not like.  */
547169689Skan  start_label = block_label (desc->in_edge->dest);
548169689Skan  doloop_reg = gen_reg_rtx (mode);
549169689Skan  doloop_seq = gen_doloop_end (doloop_reg, iterations, iterations_max,
550169689Skan			       GEN_INT (level), start_label);
551169689Skan
552169689Skan  word_mode_size = GET_MODE_BITSIZE (word_mode);
553169689Skan  word_mode_max
554169689Skan	  = ((unsigned HOST_WIDE_INT) 1 << (word_mode_size - 1) << 1) - 1;
555169689Skan  if (! doloop_seq
556169689Skan      && mode != word_mode
557169689Skan      /* Before trying mode different from the one in that # of iterations is
558169689Skan	 computed, we must be sure that the number of iterations fits into
559169689Skan	 the new mode.  */
560169689Skan      && (word_mode_size >= GET_MODE_BITSIZE (mode)
561169689Skan	  || desc->niter_max <= word_mode_max))
562169689Skan    {
563169689Skan      if (word_mode_size > GET_MODE_BITSIZE (mode))
564169689Skan	{
565169689Skan	  count = simplify_gen_unary (ZERO_EXTEND, word_mode,
566169689Skan				      count, mode);
567169689Skan	  iterations = simplify_gen_unary (ZERO_EXTEND, word_mode,
568169689Skan					   iterations, mode);
569169689Skan	  iterations_max = simplify_gen_unary (ZERO_EXTEND, word_mode,
570169689Skan					       iterations_max, mode);
571169689Skan	}
572169689Skan      else
573169689Skan	{
574169689Skan	  count = lowpart_subreg (word_mode, count, mode);
575169689Skan	  iterations = lowpart_subreg (word_mode, iterations, mode);
576169689Skan	  iterations_max = lowpart_subreg (word_mode, iterations_max, mode);
577169689Skan	}
578169689Skan      PUT_MODE (doloop_reg, word_mode);
579169689Skan      doloop_seq = gen_doloop_end (doloop_reg, iterations, iterations_max,
580169689Skan				   GEN_INT (level), start_label);
581169689Skan    }
582169689Skan  if (! doloop_seq)
583169689Skan    {
584169689Skan      if (dump_file)
585169689Skan	fprintf (dump_file,
586169689Skan		 "Doloop: Target unwilling to use doloop pattern!\n");
587169689Skan      return false;
588169689Skan    }
589169689Skan
590169689Skan  /* If multiple instructions were created, the last must be the
591169689Skan     jump instruction.  Also, a raw define_insn may yield a plain
592169689Skan     pattern.  */
593169689Skan  doloop_pat = doloop_seq;
594169689Skan  if (INSN_P (doloop_pat))
595169689Skan    {
596169689Skan      while (NEXT_INSN (doloop_pat) != NULL_RTX)
597169689Skan	doloop_pat = NEXT_INSN (doloop_pat);
598169689Skan      if (JUMP_P (doloop_pat))
599169689Skan	doloop_pat = PATTERN (doloop_pat);
600169689Skan      else
601169689Skan	doloop_pat = NULL_RTX;
602169689Skan    }
603169689Skan
604169689Skan  if (! doloop_pat
605169689Skan      || ! (condition = doloop_condition_get (doloop_pat)))
606169689Skan    {
607169689Skan      if (dump_file)
608169689Skan	fprintf (dump_file, "Doloop: Unrecognizable doloop pattern!\n");
609169689Skan      return false;
610169689Skan    }
611169689Skan
612169689Skan  doloop_modify (loop, desc, doloop_seq, condition, count);
613169689Skan  return true;
614169689Skan}
615169689Skan
616169689Skan/* This is the main entry point.  Process all LOOPS using doloop_optimize.  */
617169689Skan
618169689Skanvoid
619169689Skandoloop_optimize_loops (struct loops *loops)
620169689Skan{
621169689Skan  unsigned i;
622169689Skan  struct loop *loop;
623169689Skan
624169689Skan  for (i = 1; i < loops->num; i++)
625169689Skan    {
626169689Skan      loop = loops->parray[i];
627169689Skan      if (!loop)
628169689Skan	continue;
629169689Skan
630169689Skan      doloop_optimize (loop);
631169689Skan    }
632169689Skan
633169689Skan  iv_analysis_done ();
634169689Skan
635169689Skan#ifdef ENABLE_CHECKING
636169689Skan  verify_dominators (CDI_DOMINATORS);
637169689Skan  verify_loop_structure (loops);
638169689Skan#endif
639169689Skan}
640169689Skan#endif /* HAVE_doloop_end */
641169689Skan
642