1132718Skan/* Loop unswitching for GNU compiler.
2169689Skan   Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
3132718Skan
4132718SkanThis file is part of GCC.
5132718Skan
6132718SkanGCC is free software; you can redistribute it and/or modify it under
7132718Skanthe terms of the GNU General Public License as published by the Free
8132718SkanSoftware Foundation; either version 2, or (at your option) any later
9132718Skanversion.
10132718Skan
11132718SkanGCC is distributed in the hope that it will be useful, but WITHOUT ANY
12132718SkanWARRANTY; without even the implied warranty of MERCHANTABILITY or
13132718SkanFITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14132718Skanfor more details.
15132718Skan
16132718SkanYou should have received a copy of the GNU General Public License
17132718Skanalong with GCC; see the file COPYING.  If not, write to the Free
18169689SkanSoftware Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
19169689Skan02110-1301, USA.  */
20132718Skan
21132718Skan#include "config.h"
22132718Skan#include "system.h"
23132718Skan#include "coretypes.h"
24132718Skan#include "tm.h"
25132718Skan#include "rtl.h"
26132718Skan#include "hard-reg-set.h"
27169689Skan#include "obstack.h"
28132718Skan#include "basic-block.h"
29132718Skan#include "cfgloop.h"
30132718Skan#include "cfglayout.h"
31132718Skan#include "params.h"
32132718Skan#include "output.h"
33132718Skan#include "expr.h"
34132718Skan
35132718Skan/* This pass moves constant conditions out of loops, duplicating the loop
36132718Skan   in progress, i.e. this code:
37132718Skan
38132718Skan   while (loop_cond)
39132718Skan     {
40132718Skan       A;
41132718Skan       if (cond)
42132718Skan         branch1;
43132718Skan       else
44132718Skan	 branch2;
45132718Skan       B;
46132718Skan       if (cond)
47132718Skan         branch3;
48132718Skan       C;
49132718Skan     }
50132718Skan   where nothing inside the loop alters cond is transformed
51132718Skan   into
52132718Skan
53132718Skan   if (cond)
54132718Skan     {
55132718Skan       while (loop_cond)
56132718Skan	 {
57132718Skan	   A;
58132718Skan	   branch1;
59132718Skan	   B;
60132718Skan	   branch3;
61132718Skan	   C;
62132718Skan	 }
63132718Skan     }
64132718Skan   else
65132718Skan     {
66132718Skan       while (loop_cond)
67132718Skan	 {
68132718Skan	   A;
69132718Skan	   branch2;
70132718Skan	   B;
71132718Skan	   C;
72132718Skan	 }
73132718Skan     }
74132718Skan
75132718Skan  Duplicating the loop might lead to code growth exponential in number of
76132718Skan  branches inside loop, so we limit the number of unswitchings performed
77132718Skan  in a single loop to PARAM_MAX_UNSWITCH_LEVEL.  We only perform the
78132718Skan  transformation on innermost loops, as the benefit of doing it on loops
79132718Skan  containing subloops would not be very large compared to complications
80132718Skan  with handling this case.  */
81132718Skan
82132718Skanstatic struct loop *unswitch_loop (struct loops *, struct loop *,
83169689Skan				   basic_block, rtx, rtx);
84132718Skanstatic void unswitch_single_loop (struct loops *, struct loop *, rtx, int);
85169689Skanstatic rtx may_unswitch_on (basic_block, struct loop *, rtx *);
86132718Skan
87169689Skan/* Prepare a sequence comparing OP0 with OP1 using COMP and jumping to LABEL if
88169689Skan   true, with probability PROB.  If CINSN is not NULL, it is the insn to copy
89169689Skan   in order to create a jump.  */
90169689Skan
91169689Skanrtx
92169689Skancompare_and_jump_seq (rtx op0, rtx op1, enum rtx_code comp, rtx label, int prob,
93169689Skan		      rtx cinsn)
94169689Skan{
95169689Skan  rtx seq, jump, cond;
96169689Skan  enum machine_mode mode;
97169689Skan
98169689Skan  mode = GET_MODE (op0);
99169689Skan  if (mode == VOIDmode)
100169689Skan    mode = GET_MODE (op1);
101169689Skan
102169689Skan  start_sequence ();
103169689Skan  if (GET_MODE_CLASS (mode) == MODE_CC)
104169689Skan    {
105169689Skan      /* A hack -- there seems to be no easy generic way how to make a
106169689Skan	 conditional jump from a ccmode comparison.  */
107169689Skan      gcc_assert (cinsn);
108169689Skan      cond = XEXP (SET_SRC (pc_set (cinsn)), 0);
109169689Skan      gcc_assert (GET_CODE (cond) == comp);
110169689Skan      gcc_assert (rtx_equal_p (op0, XEXP (cond, 0)));
111169689Skan      gcc_assert (rtx_equal_p (op1, XEXP (cond, 1)));
112169689Skan      emit_jump_insn (copy_insn (PATTERN (cinsn)));
113169689Skan      jump = get_last_insn ();
114169689Skan      JUMP_LABEL (jump) = JUMP_LABEL (cinsn);
115169689Skan      LABEL_NUSES (JUMP_LABEL (jump))++;
116169689Skan      redirect_jump (jump, label, 0);
117169689Skan    }
118169689Skan  else
119169689Skan    {
120169689Skan      gcc_assert (!cinsn);
121169689Skan
122169689Skan      op0 = force_operand (op0, NULL_RTX);
123169689Skan      op1 = force_operand (op1, NULL_RTX);
124169689Skan      do_compare_rtx_and_jump (op0, op1, comp, 0,
125169689Skan			       mode, NULL_RTX, NULL_RTX, label);
126169689Skan      jump = get_last_insn ();
127169689Skan      JUMP_LABEL (jump) = label;
128169689Skan      LABEL_NUSES (label)++;
129169689Skan    }
130169689Skan  REG_NOTES (jump) = gen_rtx_EXPR_LIST (REG_BR_PROB, GEN_INT (prob),
131169689Skan					REG_NOTES (jump));
132169689Skan  seq = get_insns ();
133169689Skan  end_sequence ();
134169689Skan
135169689Skan  return seq;
136169689Skan}
137169689Skan
138132718Skan/* Main entry point.  Perform loop unswitching on all suitable LOOPS.  */
139132718Skanvoid
140132718Skanunswitch_loops (struct loops *loops)
141132718Skan{
142132718Skan  int i, num;
143132718Skan  struct loop *loop;
144132718Skan
145132718Skan  /* Go through inner loops (only original ones).  */
146132718Skan  num = loops->num;
147132718Skan
148132718Skan  for (i = 1; i < num; i++)
149132718Skan    {
150132718Skan      /* Removed loop?  */
151132718Skan      loop = loops->parray[i];
152132718Skan      if (!loop)
153132718Skan	continue;
154132718Skan
155132718Skan      if (loop->inner)
156132718Skan	continue;
157132718Skan
158132718Skan      unswitch_single_loop (loops, loop, NULL_RTX, 0);
159132718Skan#ifdef ENABLE_CHECKING
160132718Skan      verify_dominators (CDI_DOMINATORS);
161132718Skan      verify_loop_structure (loops);
162132718Skan#endif
163132718Skan    }
164169689Skan
165169689Skan  iv_analysis_done ();
166132718Skan}
167132718Skan
168132718Skan/* Checks whether we can unswitch LOOP on condition at end of BB -- one of its
169169689Skan   basic blocks (for what it means see comments below).  In case condition
170169689Skan   compares loop invariant cc mode register, return the jump in CINSN.  */
171169689Skan
172169689Skanstatic rtx
173169689Skanmay_unswitch_on (basic_block bb, struct loop *loop, rtx *cinsn)
174132718Skan{
175169689Skan  rtx test, at, op[2], stest;
176169689Skan  struct rtx_iv iv;
177132718Skan  unsigned i;
178169689Skan  enum machine_mode mode;
179132718Skan
180132718Skan  /* BB must end in a simple conditional jump.  */
181169689Skan  if (EDGE_COUNT (bb->succs) != 2)
182169689Skan    return NULL_RTX;
183132718Skan  if (!any_condjump_p (BB_END (bb)))
184169689Skan    return NULL_RTX;
185132718Skan
186132718Skan  /* With branches inside loop.  */
187169689Skan  if (!flow_bb_inside_loop_p (loop, EDGE_SUCC (bb, 0)->dest)
188169689Skan      || !flow_bb_inside_loop_p (loop, EDGE_SUCC (bb, 1)->dest))
189169689Skan    return NULL_RTX;
190132718Skan
191132718Skan  /* It must be executed just once each iteration (because otherwise we
192132718Skan     are unable to update dominator/irreducible loop information correctly).  */
193132718Skan  if (!just_once_each_iteration_p (loop, bb))
194169689Skan    return NULL_RTX;
195132718Skan
196169689Skan  /* Condition must be invariant.  */
197169689Skan  test = get_condition (BB_END (bb), &at, true, false);
198132718Skan  if (!test)
199169689Skan    return NULL_RTX;
200132718Skan
201169689Skan  for (i = 0; i < 2; i++)
202169689Skan    {
203169689Skan      op[i] = XEXP (test, i);
204132718Skan
205169689Skan      if (CONSTANT_P (op[i]))
206169689Skan	continue;
207169689Skan
208169689Skan      if (!iv_analyze (at, op[i], &iv))
209169689Skan	return NULL_RTX;
210169689Skan      if (iv.step != const0_rtx
211169689Skan	  || iv.first_special)
212169689Skan	return NULL_RTX;
213169689Skan
214169689Skan      op[i] = get_iv_value (&iv, const0_rtx);
215169689Skan    }
216169689Skan
217169689Skan  mode = GET_MODE (op[0]);
218169689Skan  if (mode == VOIDmode)
219169689Skan    mode = GET_MODE (op[1]);
220169689Skan  if (GET_MODE_CLASS (mode) == MODE_CC)
221169689Skan    {
222169689Skan      if (at != BB_END (bb))
223169689Skan	return NULL_RTX;
224169689Skan
225169689Skan      if (!rtx_equal_p (op[0], XEXP (test, 0))
226169689Skan	  || !rtx_equal_p (op[1], XEXP (test, 1)))
227169689Skan	return NULL_RTX;
228169689Skan
229169689Skan      *cinsn = BB_END (bb);
230169689Skan      return test;
231169689Skan    }
232169689Skan
233169689Skan  stest = simplify_gen_relational (GET_CODE (test), SImode,
234169689Skan				   mode, op[0], op[1]);
235169689Skan  if (stest == const0_rtx
236169689Skan      || stest == const_true_rtx)
237169689Skan    return stest;
238169689Skan
239169689Skan  return canon_condition (gen_rtx_fmt_ee (GET_CODE (test), SImode,
240169689Skan					  op[0], op[1]));
241132718Skan}
242132718Skan
243132718Skan/* Reverses CONDition; returns NULL if we cannot.  */
244169689Skanrtx
245132718Skanreversed_condition (rtx cond)
246132718Skan{
247132718Skan  enum rtx_code reversed;
248132718Skan  reversed = reversed_comparison_code (cond, NULL);
249132718Skan  if (reversed == UNKNOWN)
250132718Skan    return NULL_RTX;
251132718Skan  else
252132718Skan    return gen_rtx_fmt_ee (reversed,
253132718Skan			   GET_MODE (cond), XEXP (cond, 0),
254132718Skan			   XEXP (cond, 1));
255132718Skan}
256132718Skan
257132718Skan/* Unswitch single LOOP.  COND_CHECKED holds list of conditions we already
258132718Skan   unswitched on and are therefore known to be true in this LOOP.  NUM is
259132718Skan   number of unswitchings done; do not allow it to grow too much, it is too
260132718Skan   easy to create example on that the code would grow exponentially.  */
261132718Skanstatic void
262132718Skanunswitch_single_loop (struct loops *loops, struct loop *loop,
263132718Skan		      rtx cond_checked, int num)
264132718Skan{
265169689Skan  basic_block *bbs;
266132718Skan  struct loop *nloop;
267132718Skan  unsigned i;
268169689Skan  rtx cond, rcond = NULL_RTX, conds, rconds, acond, cinsn;
269132718Skan  int repeat;
270132718Skan  edge e;
271132718Skan
272132718Skan  /* Do not unswitch too much.  */
273132718Skan  if (num > PARAM_VALUE (PARAM_MAX_UNSWITCH_LEVEL))
274132718Skan    {
275169689Skan      if (dump_file)
276169689Skan	fprintf (dump_file, ";; Not unswitching anymore, hit max level\n");
277132718Skan      return;
278132718Skan    }
279132718Skan
280132718Skan  /* Only unswitch innermost loops.  */
281132718Skan  if (loop->inner)
282132718Skan    {
283169689Skan      if (dump_file)
284169689Skan	fprintf (dump_file, ";; Not unswitching, not innermost loop\n");
285132718Skan      return;
286132718Skan    }
287132718Skan
288132718Skan  /* We must be able to duplicate loop body.  */
289132718Skan  if (!can_duplicate_loop_p (loop))
290132718Skan    {
291169689Skan      if (dump_file)
292169689Skan	fprintf (dump_file, ";; Not unswitching, can't duplicate loop\n");
293132718Skan      return;
294132718Skan    }
295132718Skan
296132718Skan  /* The loop should not be too large, to limit code growth.  */
297132718Skan  if (num_loop_insns (loop) > PARAM_VALUE (PARAM_MAX_UNSWITCH_INSNS))
298132718Skan    {
299169689Skan      if (dump_file)
300169689Skan	fprintf (dump_file, ";; Not unswitching, loop too big\n");
301132718Skan      return;
302132718Skan    }
303132718Skan
304132718Skan  /* Do not unswitch in cold areas.  */
305132718Skan  if (!maybe_hot_bb_p (loop->header))
306132718Skan    {
307169689Skan      if (dump_file)
308169689Skan	fprintf (dump_file, ";; Not unswitching, not hot area\n");
309132718Skan      return;
310132718Skan    }
311132718Skan
312132718Skan  /* Nor if the loop usually does not roll.  */
313132718Skan  if (expected_loop_iterations (loop) < 1)
314132718Skan    {
315169689Skan      if (dump_file)
316169689Skan	fprintf (dump_file, ";; Not unswitching, loop iterations < 1\n");
317132718Skan      return;
318132718Skan    }
319132718Skan
320132718Skan  do
321132718Skan    {
322132718Skan      repeat = 0;
323169689Skan      cinsn = NULL_RTX;
324132718Skan
325132718Skan      /* Find a bb to unswitch on.  */
326132718Skan      bbs = get_loop_body (loop);
327169689Skan      iv_analysis_loop_init (loop);
328132718Skan      for (i = 0; i < loop->num_nodes; i++)
329169689Skan	if ((cond = may_unswitch_on (bbs[i], loop, &cinsn)))
330132718Skan	  break;
331132718Skan
332132718Skan      if (i == loop->num_nodes)
333132718Skan	{
334132718Skan	  free (bbs);
335132718Skan	  return;
336132718Skan	}
337132718Skan
338169689Skan      if (cond != const0_rtx
339169689Skan	  && cond != const_true_rtx)
340169689Skan	{
341169689Skan	  rcond = reversed_condition (cond);
342169689Skan	  if (rcond)
343169689Skan	    rcond = canon_condition (rcond);
344132718Skan
345169689Skan	  /* Check whether the result can be predicted.  */
346169689Skan	  for (acond = cond_checked; acond; acond = XEXP (acond, 1))
347169689Skan	    simplify_using_condition (XEXP (acond, 0), &cond, NULL);
348132718Skan	}
349132718Skan
350169689Skan      if (cond == const_true_rtx)
351132718Skan	{
352132718Skan	  /* Remove false path.  */
353169689Skan	  e = FALLTHRU_EDGE (bbs[i]);
354132718Skan	  remove_path (loops, e);
355132718Skan	  free (bbs);
356132718Skan	  repeat = 1;
357132718Skan	}
358169689Skan      else if (cond == const0_rtx)
359132718Skan	{
360132718Skan	  /* Remove true path.  */
361169689Skan	  e = BRANCH_EDGE (bbs[i]);
362132718Skan	  remove_path (loops, e);
363132718Skan	  free (bbs);
364132718Skan	  repeat = 1;
365132718Skan	}
366132718Skan    } while (repeat);
367132718Skan
368132718Skan  /* We found the condition we can unswitch on.  */
369132718Skan  conds = alloc_EXPR_LIST (0, cond, cond_checked);
370132718Skan  if (rcond)
371132718Skan    rconds = alloc_EXPR_LIST (0, rcond, cond_checked);
372132718Skan  else
373132718Skan    rconds = cond_checked;
374132718Skan
375169689Skan  if (dump_file)
376169689Skan    fprintf (dump_file, ";; Unswitching loop\n");
377132718Skan
378132718Skan  /* Unswitch the loop on this condition.  */
379169689Skan  nloop = unswitch_loop (loops, loop, bbs[i], cond, cinsn);
380169689Skan  gcc_assert (nloop);
381132718Skan
382132718Skan  /* Invoke itself on modified loops.  */
383169689Skan  unswitch_single_loop (loops, nloop, rconds, num + 1);
384169689Skan  unswitch_single_loop (loops, loop, conds, num + 1);
385132718Skan
386132718Skan  free_EXPR_LIST_node (conds);
387132718Skan  if (rcond)
388132718Skan    free_EXPR_LIST_node (rconds);
389169689Skan
390169689Skan  free (bbs);
391132718Skan}
392132718Skan
393132718Skan/* Unswitch a LOOP w.r. to given basic block UNSWITCH_ON.  We only support
394132718Skan   unswitching of innermost loops.  UNSWITCH_ON must be executed in every
395169689Skan   iteration, i.e. it must dominate LOOP latch.  COND is the condition
396169689Skan   determining which loop is entered.  Returns NULL if impossible, new loop
397169689Skan   otherwise.  The new loop is entered if COND is true.  If CINSN is not
398169689Skan   NULL, it is the insn in that COND is compared.  */
399169689Skan
400132718Skanstatic struct loop *
401169689Skanunswitch_loop (struct loops *loops, struct loop *loop, basic_block unswitch_on,
402169689Skan	       rtx cond, rtx cinsn)
403132718Skan{
404169689Skan  edge entry, latch_edge, true_edge, false_edge, e;
405169689Skan  basic_block switch_bb, unswitch_on_alt;
406132718Skan  struct loop *nloop;
407132718Skan  sbitmap zero_bitmap;
408169689Skan  int irred_flag, prob;
409169689Skan  rtx seq;
410132718Skan
411132718Skan  /* Some sanity checking.  */
412169689Skan  gcc_assert (flow_bb_inside_loop_p (loop, unswitch_on));
413169689Skan  gcc_assert (EDGE_COUNT (unswitch_on->succs) == 2);
414169689Skan  gcc_assert (just_once_each_iteration_p (loop, unswitch_on));
415169689Skan  gcc_assert (!loop->inner);
416169689Skan  gcc_assert (flow_bb_inside_loop_p (loop, EDGE_SUCC (unswitch_on, 0)->dest));
417169689Skan  gcc_assert (flow_bb_inside_loop_p (loop, EDGE_SUCC (unswitch_on, 1)->dest));
418132718Skan
419132718Skan  entry = loop_preheader_edge (loop);
420132718Skan
421132718Skan  /* Make a copy.  */
422132718Skan  irred_flag = entry->flags & EDGE_IRREDUCIBLE_LOOP;
423132718Skan  entry->flags &= ~EDGE_IRREDUCIBLE_LOOP;
424132718Skan  zero_bitmap = sbitmap_alloc (2);
425132718Skan  sbitmap_zero (zero_bitmap);
426132718Skan  if (!duplicate_loop_to_header_edge (loop, entry, loops, 1,
427132718Skan	zero_bitmap, NULL, NULL, NULL, 0))
428169689Skan    {
429169689Skan      sbitmap_free (zero_bitmap);
430169689Skan      return NULL;
431169689Skan    }
432169689Skan  sbitmap_free (zero_bitmap);
433132718Skan  entry->flags |= irred_flag;
434132718Skan
435132718Skan  /* Record the block with condition we unswitch on.  */
436169689Skan  unswitch_on_alt = get_bb_copy (unswitch_on);
437169689Skan  true_edge = BRANCH_EDGE (unswitch_on_alt);
438169689Skan  false_edge = FALLTHRU_EDGE (unswitch_on);
439169689Skan  latch_edge = single_succ_edge (get_bb_copy (loop->latch));
440132718Skan
441169689Skan  /* Create a block with the condition.  */
442169689Skan  prob = true_edge->probability;
443169689Skan  switch_bb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
444169689Skan  seq = compare_and_jump_seq (XEXP (cond, 0), XEXP (cond, 1), GET_CODE (cond),
445169689Skan			      block_label (true_edge->dest),
446169689Skan			      prob, cinsn);
447169689Skan  emit_insn_after (seq, BB_END (switch_bb));
448169689Skan  e = make_edge (switch_bb, true_edge->dest, 0);
449169689Skan  e->probability = prob;
450169689Skan  e->count = latch_edge->count * prob / REG_BR_PROB_BASE;
451169689Skan  e = make_edge (switch_bb, FALLTHRU_EDGE (unswitch_on)->dest, EDGE_FALLTHRU);
452169689Skan  e->probability = false_edge->probability;
453169689Skan  e->count = latch_edge->count * (false_edge->probability) / REG_BR_PROB_BASE;
454169689Skan
455132718Skan  if (irred_flag)
456132718Skan    {
457132718Skan      switch_bb->flags |= BB_IRREDUCIBLE_LOOP;
458169689Skan      EDGE_SUCC (switch_bb, 0)->flags |= EDGE_IRREDUCIBLE_LOOP;
459169689Skan      EDGE_SUCC (switch_bb, 1)->flags |= EDGE_IRREDUCIBLE_LOOP;
460132718Skan    }
461132718Skan  else
462132718Skan    {
463132718Skan      switch_bb->flags &= ~BB_IRREDUCIBLE_LOOP;
464169689Skan      EDGE_SUCC (switch_bb, 0)->flags &= ~EDGE_IRREDUCIBLE_LOOP;
465169689Skan      EDGE_SUCC (switch_bb, 1)->flags &= ~EDGE_IRREDUCIBLE_LOOP;
466132718Skan    }
467132718Skan
468132718Skan  /* Loopify from the copy of LOOP body, constructing the new loop.  */
469132718Skan  nloop = loopify (loops, latch_edge,
470169689Skan		   single_pred_edge (get_bb_copy (loop->header)), switch_bb,
471169689Skan		   BRANCH_EDGE (switch_bb), FALLTHRU_EDGE (switch_bb), true);
472132718Skan
473169689Skan  /* Remove branches that are now unreachable in new loops.  */
474169689Skan  remove_path (loops, true_edge);
475169689Skan  remove_path (loops, false_edge);
476132718Skan
477132718Skan  /* One of created loops do not have to be subloop of the outer loop now,
478132718Skan     so fix its placement in loop data structure.  */
479132718Skan  fix_loop_placement (loop);
480132718Skan  fix_loop_placement (nloop);
481132718Skan
482132718Skan  /* Preserve the simple loop preheaders.  */
483132718Skan  loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
484132718Skan  loop_split_edge_with (loop_preheader_edge (nloop), NULL_RTX);
485132718Skan
486132718Skan  return nloop;
487132718Skan}
488