1/* Shrink-wrapping related optimizations.
2   Copyright (C) 1987-2015 Free Software Foundation, Inc.
3
4This file is part of GCC.
5
6GCC is free software; you can redistribute it and/or modify it under
7the terms of the GNU General Public License as published by the Free
8Software Foundation; either version 3, or (at your option) any later
9version.
10
11GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14for more details.
15
16You should have received a copy of the GNU General Public License
17along with GCC; see the file COPYING3.  If not see
18<http://www.gnu.org/licenses/>.  */
19
20/* This file handles shrink-wrapping related optimizations.  */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
25#include "tm.h"
26#include "rtl-error.h"
27#include "hash-set.h"
28#include "machmode.h"
29#include "vec.h"
30#include "double-int.h"
31#include "input.h"
32#include "alias.h"
33#include "symtab.h"
34#include "wide-int.h"
35#include "inchash.h"
36#include "tree.h"
37#include "fold-const.h"
38#include "stor-layout.h"
39#include "varasm.h"
40#include "stringpool.h"
41#include "flags.h"
42#include "except.h"
43#include "hard-reg-set.h"
44#include "function.h"
45#include "hashtab.h"
46#include "rtl.h"
47#include "statistics.h"
48#include "real.h"
49#include "fixed-value.h"
50#include "insn-config.h"
51#include "expmed.h"
52#include "dojump.h"
53#include "explow.h"
54#include "calls.h"
55#include "emit-rtl.h"
56#include "stmt.h"
57#include "expr.h"
58#include "insn-codes.h"
59#include "optabs.h"
60#include "libfuncs.h"
61#include "regs.h"
62#include "recog.h"
63#include "output.h"
64#include "tm_p.h"
65#include "langhooks.h"
66#include "target.h"
67#include "common/common-target.h"
68#include "gimple-expr.h"
69#include "gimplify.h"
70#include "tree-pass.h"
71#include "predict.h"
72#include "dominance.h"
73#include "cfg.h"
74#include "cfgrtl.h"
75#include "basic-block.h"
76#include "df.h"
77#include "params.h"
78#include "bb-reorder.h"
79#include "shrink-wrap.h"
80#include "regcprop.h"
81#include "rtl-iter.h"
82#include "valtrack.h"
83
84#ifdef HAVE_simple_return
85
86/* Return true if INSN requires the stack frame to be set up.
87   PROLOGUE_USED contains the hard registers used in the function
88   prologue.  SET_UP_BY_PROLOGUE is the set of registers we expect the
89   prologue to set up for the function.  */
90bool
91requires_stack_frame_p (rtx_insn *insn, HARD_REG_SET prologue_used,
92			HARD_REG_SET set_up_by_prologue)
93{
94  df_ref def, use;
95  HARD_REG_SET hardregs;
96  unsigned regno;
97
98  if (CALL_P (insn))
99    return !SIBLING_CALL_P (insn);
100
101  /* We need a frame to get the unique CFA expected by the unwinder.  */
102  if (cfun->can_throw_non_call_exceptions && can_throw_internal (insn))
103    return true;
104
105  CLEAR_HARD_REG_SET (hardregs);
106  FOR_EACH_INSN_DEF (def, insn)
107    {
108      rtx dreg = DF_REF_REG (def);
109
110      if (!REG_P (dreg))
111	continue;
112
113      add_to_hard_reg_set (&hardregs, GET_MODE (dreg),
114			   REGNO (dreg));
115    }
116  if (hard_reg_set_intersect_p (hardregs, prologue_used))
117    return true;
118  AND_COMPL_HARD_REG_SET (hardregs, call_used_reg_set);
119  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
120    if (TEST_HARD_REG_BIT (hardregs, regno)
121	&& df_regs_ever_live_p (regno))
122      return true;
123
124  FOR_EACH_INSN_USE (use, insn)
125    {
126      rtx reg = DF_REF_REG (use);
127
128      if (!REG_P (reg))
129	continue;
130
131      add_to_hard_reg_set (&hardregs, GET_MODE (reg),
132			   REGNO (reg));
133    }
134  if (hard_reg_set_intersect_p (hardregs, set_up_by_prologue))
135    return true;
136
137  return false;
138}
139
140/* See whether there has a single live edge from BB, which dest uses
141   [REGNO, END_REGNO).  Return the live edge if its dest bb has
142   one or two predecessors.  Otherwise return NULL.  */
143
144static edge
145live_edge_for_reg (basic_block bb, int regno, int end_regno)
146{
147  edge e, live_edge;
148  edge_iterator ei;
149  bitmap live;
150  int i;
151
152  live_edge = NULL;
153  FOR_EACH_EDGE (e, ei, bb->succs)
154    {
155      live = df_get_live_in (e->dest);
156      for (i = regno; i < end_regno; i++)
157	if (REGNO_REG_SET_P (live, i))
158	  {
159	    if (live_edge && live_edge != e)
160	      return NULL;
161	    live_edge = e;
162	  }
163    }
164
165  /* We can sometimes encounter dead code.  Don't try to move it
166     into the exit block.  */
167  if (!live_edge || live_edge->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
168    return NULL;
169
170  /* Reject targets of abnormal edges.  This is needed for correctness
171     on ports like Alpha and MIPS, whose pic_offset_table_rtx can die on
172     exception edges even though it is generally treated as call-saved
173     for the majority of the compilation.  Moving across abnormal edges
174     isn't going to be interesting for shrink-wrap usage anyway.  */
175  if (live_edge->flags & EDGE_ABNORMAL)
176    return NULL;
177
178  /* When live_edge->dest->preds == 2, we can create a new block on
179     the edge to make it meet the requirement.  */
180  if (EDGE_COUNT (live_edge->dest->preds) > 2)
181    return NULL;
182
183  return live_edge;
184}
185
186/* Try to move INSN from BB to a successor.  Return true on success.
187   USES and DEFS are the set of registers that are used and defined
188   after INSN in BB.  SPLIT_P indicates whether a live edge from BB
189   is splitted or not.  */
190
191static bool
192move_insn_for_shrink_wrap (basic_block bb, rtx_insn *insn,
193			   const HARD_REG_SET uses,
194			   const HARD_REG_SET defs,
195			   bool *split_p,
196			   struct dead_debug_local *debug)
197{
198  rtx set, src, dest;
199  bitmap live_out, live_in, bb_uses, bb_defs;
200  unsigned int i, dregno, end_dregno;
201  unsigned int sregno = FIRST_PSEUDO_REGISTER;
202  unsigned int end_sregno = FIRST_PSEUDO_REGISTER;
203  basic_block next_block;
204  edge live_edge;
205  rtx_insn *dinsn;
206  df_ref def;
207
208  /* Look for a simple register assignment.  We don't use single_set here
209     because we can't deal with any CLOBBERs, USEs, or REG_UNUSED secondary
210     destinations.  */
211  if (!INSN_P (insn))
212    return false;
213  set = PATTERN (insn);
214  if (GET_CODE (set) != SET)
215    return false;
216  src = SET_SRC (set);
217  dest = SET_DEST (set);
218
219  /* For the destination, we want only a register.  Also disallow STACK
220     or FRAME related adjustments.  They are likely part of the prologue,
221     so keep them in the entry block.  */
222  if (!REG_P (dest)
223      || dest == stack_pointer_rtx
224      || dest == frame_pointer_rtx
225      || dest == hard_frame_pointer_rtx)
226    return false;
227
228  /* For the source, we want one of:
229      (1) A (non-overlapping) register
230      (2) A constant,
231      (3) An expression involving no more than one register.
232
233     That last point comes from the code following, which was originally
234     written to handle only register move operations, and still only handles
235     a single source register when checking for overlaps.  Happily, the
236     same checks can be applied to expressions like (plus reg const).  */
237
238  if (CONSTANT_P (src))
239    ;
240  else if (!REG_P (src))
241    {
242      rtx src_inner = NULL_RTX;
243
244      if (can_throw_internal (insn))
245	return false;
246
247      subrtx_var_iterator::array_type array;
248      FOR_EACH_SUBRTX_VAR (iter, array, src, ALL)
249	{
250	  rtx x = *iter;
251	  switch (GET_RTX_CLASS (GET_CODE (x)))
252	    {
253	    case RTX_CONST_OBJ:
254	    case RTX_COMPARE:
255	    case RTX_COMM_COMPARE:
256	    case RTX_BIN_ARITH:
257	    case RTX_COMM_ARITH:
258	    case RTX_UNARY:
259	    case RTX_TERNARY:
260	      /* Constant or expression.  Continue.  */
261	      break;
262
263	    case RTX_OBJ:
264	    case RTX_EXTRA:
265	      switch (GET_CODE (x))
266		{
267		case UNSPEC:
268		case SUBREG:
269		case STRICT_LOW_PART:
270		case PC:
271		case LO_SUM:
272		  /* Ok.  Continue.  */
273		  break;
274
275		case REG:
276		  /* Fail if we see a second inner register.  */
277		  if (src_inner != NULL)
278		    return false;
279		  src_inner = x;
280		  break;
281
282		default:
283		  return false;
284		}
285	      break;
286
287	    default:
288	      return false;
289	    }
290	}
291
292      if (src_inner != NULL)
293	src = src_inner;
294    }
295
296  /* Make sure that the source register isn't defined later in BB.  */
297  if (REG_P (src))
298    {
299      sregno = REGNO (src);
300      end_sregno = END_REGNO (src);
301      if (overlaps_hard_reg_set_p (defs, GET_MODE (src), sregno))
302	return false;
303    }
304
305  /* Make sure that the destination register isn't referenced later in BB.  */
306  dregno = REGNO (dest);
307  end_dregno = END_REGNO (dest);
308  if (overlaps_hard_reg_set_p (uses, GET_MODE (dest), dregno)
309      || overlaps_hard_reg_set_p (defs, GET_MODE (dest), dregno))
310    return false;
311
312  /* See whether there is a successor block to which we could move INSN.  */
313  live_edge = live_edge_for_reg (bb, dregno, end_dregno);
314  if (!live_edge)
315    return false;
316
317  next_block = live_edge->dest;
318  /* Create a new basic block on the edge.  */
319  if (EDGE_COUNT (next_block->preds) == 2)
320    {
321      /* split_edge for a block with only one successor is meaningless.  */
322      if (EDGE_COUNT (bb->succs) == 1)
323	return false;
324
325      /* If DF_LIVE doesn't exist, i.e. at -O1, just give up.  */
326      if (!df_live)
327	return false;
328
329      basic_block old_dest = live_edge->dest;
330      next_block = split_edge (live_edge);
331
332      /* We create a new basic block.  Call df_grow_bb_info to make sure
333	 all data structures are allocated.  */
334      df_grow_bb_info (df_live);
335
336      bitmap_and (df_get_live_in (next_block), df_get_live_out (bb),
337		  df_get_live_in (old_dest));
338      df_set_bb_dirty (next_block);
339
340      /* We should not split more than once for a function.  */
341      if (*split_p)
342	return false;
343
344      *split_p = true;
345    }
346
347  /* At this point we are committed to moving INSN, but let's try to
348     move it as far as we can.  */
349  do
350    {
351      if (MAY_HAVE_DEBUG_INSNS)
352	{
353	  FOR_BB_INSNS_REVERSE (bb, dinsn)
354	    if (DEBUG_INSN_P (dinsn))
355	      {
356		df_ref use;
357		FOR_EACH_INSN_USE (use, dinsn)
358		  if (refers_to_regno_p (dregno, end_dregno,
359					 DF_REF_REG (use), (rtx *) NULL))
360		    dead_debug_add (debug, use, DF_REF_REGNO (use));
361	      }
362	    else if (dinsn == insn)
363	      break;
364	}
365      live_out = df_get_live_out (bb);
366      live_in = df_get_live_in (next_block);
367      bb = next_block;
368
369      /* Check whether BB uses DEST or clobbers DEST.  We need to add
370	 INSN to BB if so.  Either way, DEST is no longer live on entry,
371	 except for any part that overlaps SRC (next loop).  */
372      bb_uses = &DF_LR_BB_INFO (bb)->use;
373      bb_defs = &DF_LR_BB_INFO (bb)->def;
374      if (df_live)
375	{
376	  for (i = dregno; i < end_dregno; i++)
377	    {
378	      if (*split_p
379		  || REGNO_REG_SET_P (bb_uses, i)
380		  || REGNO_REG_SET_P (bb_defs, i)
381		  || REGNO_REG_SET_P (&DF_LIVE_BB_INFO (bb)->gen, i))
382		next_block = NULL;
383	      CLEAR_REGNO_REG_SET (live_out, i);
384	      CLEAR_REGNO_REG_SET (live_in, i);
385	    }
386
387	  /* Check whether BB clobbers SRC.  We need to add INSN to BB if so.
388	     Either way, SRC is now live on entry.  */
389	  for (i = sregno; i < end_sregno; i++)
390	    {
391	      if (*split_p
392		  || REGNO_REG_SET_P (bb_defs, i)
393		  || REGNO_REG_SET_P (&DF_LIVE_BB_INFO (bb)->gen, i))
394		next_block = NULL;
395	      SET_REGNO_REG_SET (live_out, i);
396	      SET_REGNO_REG_SET (live_in, i);
397	    }
398	}
399      else
400	{
401	  /* DF_LR_BB_INFO (bb)->def does not comprise the DF_REF_PARTIAL and
402	     DF_REF_CONDITIONAL defs.  So if DF_LIVE doesn't exist, i.e.
403	     at -O1, just give up searching NEXT_BLOCK.  */
404	  next_block = NULL;
405	  for (i = dregno; i < end_dregno; i++)
406	    {
407	      CLEAR_REGNO_REG_SET (live_out, i);
408	      CLEAR_REGNO_REG_SET (live_in, i);
409	    }
410
411	  for (i = sregno; i < end_sregno; i++)
412	    {
413	      SET_REGNO_REG_SET (live_out, i);
414	      SET_REGNO_REG_SET (live_in, i);
415	    }
416	}
417
418      /* If we don't need to add the move to BB, look for a single
419	 successor block.  */
420      if (next_block)
421	{
422	  live_edge = live_edge_for_reg (next_block, dregno, end_dregno);
423	  if (!live_edge || EDGE_COUNT (live_edge->dest->preds) > 1)
424	    break;
425	  next_block = live_edge->dest;
426	}
427    }
428  while (next_block);
429
430  /* For the new created basic block, there is no dataflow info at all.
431     So skip the following dataflow update and check.  */
432  if (!(*split_p))
433    {
434      /* BB now defines DEST.  It only uses the parts of DEST that overlap SRC
435	 (next loop).  */
436      for (i = dregno; i < end_dregno; i++)
437	{
438	  CLEAR_REGNO_REG_SET (bb_uses, i);
439	  SET_REGNO_REG_SET (bb_defs, i);
440	}
441
442      /* BB now uses SRC.  */
443      for (i = sregno; i < end_sregno; i++)
444	SET_REGNO_REG_SET (bb_uses, i);
445    }
446
447  /* Insert debug temps for dead REGs used in subsequent debug insns.  */
448  if (debug->used && !bitmap_empty_p (debug->used))
449    FOR_EACH_INSN_DEF (def, insn)
450      dead_debug_insert_temp (debug, DF_REF_REGNO (def), insn,
451			      DEBUG_TEMP_BEFORE_WITH_VALUE);
452
453  emit_insn_after (PATTERN (insn), bb_note (bb));
454  delete_insn (insn);
455  return true;
456}
457
458/* Look for register copies in the first block of the function, and move
459   them down into successor blocks if the register is used only on one
460   path.  This exposes more opportunities for shrink-wrapping.  These
461   kinds of sets often occur when incoming argument registers are moved
462   to call-saved registers because their values are live across one or
463   more calls during the function.  */
464
465void
466prepare_shrink_wrap (basic_block entry_block)
467{
468  rtx_insn *insn, *curr;
469  rtx x;
470  HARD_REG_SET uses, defs;
471  df_ref def, use;
472  bool split_p = false;
473  unsigned int i;
474  struct dead_debug_local debug;
475
476  if (JUMP_P (BB_END (entry_block)))
477    {
478      /* To have more shrink-wrapping opportunities, prepare_shrink_wrap tries
479	 to sink the copies from parameter to callee saved register out of
480	 entry block.  copyprop_hardreg_forward_bb_without_debug_insn is called
481	 to release some dependences.  */
482      copyprop_hardreg_forward_bb_without_debug_insn (entry_block);
483    }
484
485  dead_debug_local_init (&debug, NULL, NULL);
486  CLEAR_HARD_REG_SET (uses);
487  CLEAR_HARD_REG_SET (defs);
488
489  FOR_BB_INSNS_REVERSE_SAFE (entry_block, insn, curr)
490    if (NONDEBUG_INSN_P (insn)
491	&& !move_insn_for_shrink_wrap (entry_block, insn, uses, defs,
492				       &split_p, &debug))
493      {
494	/* Add all defined registers to DEFs.  */
495	FOR_EACH_INSN_DEF (def, insn)
496	  {
497	    x = DF_REF_REG (def);
498	    if (REG_P (x) && HARD_REGISTER_P (x))
499	      for (i = REGNO (x); i < END_REGNO (x); i++)
500		SET_HARD_REG_BIT (defs, i);
501	  }
502
503	/* Add all used registers to USESs.  */
504	FOR_EACH_INSN_USE (use, insn)
505	  {
506	    x = DF_REF_REG (use);
507	    if (REG_P (x) && HARD_REGISTER_P (x))
508	      for (i = REGNO (x); i < END_REGNO (x); i++)
509		SET_HARD_REG_BIT (uses, i);
510	  }
511      }
512
513  dead_debug_local_finish (&debug, NULL);
514}
515
516/* Create a copy of BB instructions and insert at BEFORE.  Redirect
517   preds of BB to COPY_BB if they don't appear in NEED_PROLOGUE.  */
518void
519dup_block_and_redirect (basic_block bb, basic_block copy_bb, rtx_insn *before,
520			bitmap_head *need_prologue)
521{
522  edge_iterator ei;
523  edge e;
524  rtx_insn *insn = BB_END (bb);
525
526  /* We know BB has a single successor, so there is no need to copy a
527     simple jump at the end of BB.  */
528  if (simplejump_p (insn))
529    insn = PREV_INSN (insn);
530
531  start_sequence ();
532  duplicate_insn_chain (BB_HEAD (bb), insn);
533  if (dump_file)
534    {
535      unsigned count = 0;
536      for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
537	if (active_insn_p (insn))
538	  ++count;
539      fprintf (dump_file, "Duplicating bb %d to bb %d, %u active insns.\n",
540	       bb->index, copy_bb->index, count);
541    }
542  insn = get_insns ();
543  end_sequence ();
544  emit_insn_before (insn, before);
545
546  /* Redirect all the paths that need no prologue into copy_bb.  */
547  for (ei = ei_start (bb->preds); (e = ei_safe_edge (ei));)
548    if (!bitmap_bit_p (need_prologue, e->src->index))
549      {
550	int freq = EDGE_FREQUENCY (e);
551	copy_bb->count += e->count;
552	copy_bb->frequency += EDGE_FREQUENCY (e);
553	e->dest->count -= e->count;
554	if (e->dest->count < 0)
555	  e->dest->count = 0;
556	e->dest->frequency -= freq;
557	if (e->dest->frequency < 0)
558	  e->dest->frequency = 0;
559	redirect_edge_and_branch_force (e, copy_bb);
560	continue;
561      }
562    else
563      ei_next (&ei);
564}
565
566
567/* Try to perform a kind of shrink-wrapping, making sure the
568   prologue/epilogue is emitted only around those parts of the
569   function that require it.  */
570
571void
572try_shrink_wrapping (edge *entry_edge, edge orig_entry_edge,
573		     bitmap_head *bb_flags, rtx_insn *prologue_seq)
574{
575  edge e;
576  edge_iterator ei;
577  bool nonempty_prologue = false;
578  unsigned max_grow_size;
579  rtx_insn *seq;
580
581  for (seq = prologue_seq; seq; seq = NEXT_INSN (seq))
582    if (!NOTE_P (seq) || NOTE_KIND (seq) != NOTE_INSN_PROLOGUE_END)
583      {
584	nonempty_prologue = true;
585	break;
586      }
587
588  if (flag_shrink_wrap && HAVE_simple_return
589      && (targetm.profile_before_prologue () || !crtl->profile)
590      && nonempty_prologue && !crtl->calls_eh_return)
591    {
592      HARD_REG_SET prologue_clobbered, prologue_used, live_on_edge;
593      struct hard_reg_set_container set_up_by_prologue;
594      rtx_insn *p_insn;
595      vec<basic_block> vec;
596      basic_block bb;
597      bitmap_head bb_antic_flags;
598      bitmap_head bb_on_list;
599      bitmap_head bb_tail;
600
601      if (dump_file)
602	fprintf (dump_file, "Attempting shrink-wrapping optimization.\n");
603
604      /* Compute the registers set and used in the prologue.  */
605      CLEAR_HARD_REG_SET (prologue_clobbered);
606      CLEAR_HARD_REG_SET (prologue_used);
607      for (p_insn = prologue_seq; p_insn; p_insn = NEXT_INSN (p_insn))
608	{
609	  HARD_REG_SET this_used;
610	  if (!NONDEBUG_INSN_P (p_insn))
611	    continue;
612
613	  CLEAR_HARD_REG_SET (this_used);
614	  note_uses (&PATTERN (p_insn), record_hard_reg_uses,
615		     &this_used);
616	  AND_COMPL_HARD_REG_SET (this_used, prologue_clobbered);
617	  IOR_HARD_REG_SET (prologue_used, this_used);
618	  note_stores (PATTERN (p_insn), record_hard_reg_sets,
619		       &prologue_clobbered);
620	}
621
622      prepare_shrink_wrap ((*entry_edge)->dest);
623
624      bitmap_initialize (&bb_antic_flags, &bitmap_default_obstack);
625      bitmap_initialize (&bb_on_list, &bitmap_default_obstack);
626      bitmap_initialize (&bb_tail, &bitmap_default_obstack);
627
628      /* Find the set of basic blocks that require a stack frame,
629	 and blocks that are too big to be duplicated.  */
630
631      vec.create (n_basic_blocks_for_fn (cfun));
632
633      CLEAR_HARD_REG_SET (set_up_by_prologue.set);
634      add_to_hard_reg_set (&set_up_by_prologue.set, Pmode,
635			   STACK_POINTER_REGNUM);
636      add_to_hard_reg_set (&set_up_by_prologue.set, Pmode, ARG_POINTER_REGNUM);
637      if (frame_pointer_needed)
638	add_to_hard_reg_set (&set_up_by_prologue.set, Pmode,
639			     HARD_FRAME_POINTER_REGNUM);
640      if (pic_offset_table_rtx
641	  && (unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
642	add_to_hard_reg_set (&set_up_by_prologue.set, Pmode,
643			     PIC_OFFSET_TABLE_REGNUM);
644      if (crtl->drap_reg)
645	add_to_hard_reg_set (&set_up_by_prologue.set,
646			     GET_MODE (crtl->drap_reg),
647			     REGNO (crtl->drap_reg));
648      if (targetm.set_up_by_prologue)
649	targetm.set_up_by_prologue (&set_up_by_prologue);
650
651      /* We don't use a different max size depending on
652	 optimize_bb_for_speed_p because increasing shrink-wrapping
653	 opportunities by duplicating tail blocks can actually result
654	 in an overall decrease in code size.  */
655      max_grow_size = get_uncond_jump_length ();
656      max_grow_size *= PARAM_VALUE (PARAM_MAX_GROW_COPY_BB_INSNS);
657
658      FOR_EACH_BB_FN (bb, cfun)
659	{
660	  rtx_insn *insn;
661	  unsigned size = 0;
662
663	  FOR_BB_INSNS (bb, insn)
664	    if (NONDEBUG_INSN_P (insn))
665	      {
666		if (requires_stack_frame_p (insn, prologue_used,
667					    set_up_by_prologue.set))
668		  {
669		    if (bb == (*entry_edge)->dest)
670		      goto fail_shrinkwrap;
671		    bitmap_set_bit (bb_flags, bb->index);
672		    vec.quick_push (bb);
673		    break;
674		  }
675		else if (size <= max_grow_size)
676		  {
677		    size += get_attr_min_length (insn);
678		    if (size > max_grow_size)
679		      bitmap_set_bit (&bb_on_list, bb->index);
680		  }
681	      }
682	}
683
684      /* Blocks that really need a prologue, or are too big for tails.  */
685      bitmap_ior_into (&bb_on_list, bb_flags);
686
687      /* For every basic block that needs a prologue, mark all blocks
688	 reachable from it, so as to ensure they are also seen as
689	 requiring a prologue.  */
690      while (!vec.is_empty ())
691	{
692	  basic_block tmp_bb = vec.pop ();
693
694	  FOR_EACH_EDGE (e, ei, tmp_bb->succs)
695	    if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
696		&& bitmap_set_bit (bb_flags, e->dest->index))
697	      vec.quick_push (e->dest);
698	}
699
700      /* Find the set of basic blocks that need no prologue, have a
701	 single successor, can be duplicated, meet a max size
702	 requirement, and go to the exit via like blocks.  */
703      vec.quick_push (EXIT_BLOCK_PTR_FOR_FN (cfun));
704      while (!vec.is_empty ())
705	{
706	  basic_block tmp_bb = vec.pop ();
707
708	  FOR_EACH_EDGE (e, ei, tmp_bb->preds)
709	    if (single_succ_p (e->src)
710		&& !bitmap_bit_p (&bb_on_list, e->src->index)
711		&& can_duplicate_block_p (e->src))
712	      {
713		edge pe;
714		edge_iterator pei;
715
716		/* If there is predecessor of e->src which doesn't
717		   need prologue and the edge is complex,
718		   we might not be able to redirect the branch
719		   to a copy of e->src.  */
720		FOR_EACH_EDGE (pe, pei, e->src->preds)
721		  if ((pe->flags & EDGE_COMPLEX) != 0
722		      && !bitmap_bit_p (bb_flags, pe->src->index))
723		    break;
724		if (pe == NULL && bitmap_set_bit (&bb_tail, e->src->index))
725		  vec.quick_push (e->src);
726	      }
727	}
728
729      /* Now walk backwards from every block that is marked as needing
730	 a prologue to compute the bb_antic_flags bitmap.  Exclude
731	 tail blocks; They can be duplicated to be used on paths not
732	 needing a prologue.  */
733      bitmap_clear (&bb_on_list);
734      bitmap_and_compl (&bb_antic_flags, bb_flags, &bb_tail);
735      FOR_EACH_BB_FN (bb, cfun)
736	{
737	  if (!bitmap_bit_p (&bb_antic_flags, bb->index))
738	    continue;
739	  FOR_EACH_EDGE (e, ei, bb->preds)
740	    if (!bitmap_bit_p (&bb_antic_flags, e->src->index)
741		&& bitmap_set_bit (&bb_on_list, e->src->index))
742	      vec.quick_push (e->src);
743	}
744      while (!vec.is_empty ())
745	{
746	  basic_block tmp_bb = vec.pop ();
747	  bool all_set = true;
748
749	  bitmap_clear_bit (&bb_on_list, tmp_bb->index);
750	  FOR_EACH_EDGE (e, ei, tmp_bb->succs)
751	    if (!bitmap_bit_p (&bb_antic_flags, e->dest->index))
752	      {
753		all_set = false;
754		break;
755	      }
756
757	  if (all_set)
758	    {
759	      bitmap_set_bit (&bb_antic_flags, tmp_bb->index);
760	      FOR_EACH_EDGE (e, ei, tmp_bb->preds)
761		if (!bitmap_bit_p (&bb_antic_flags, e->src->index)
762		    && bitmap_set_bit (&bb_on_list, e->src->index))
763		  vec.quick_push (e->src);
764	    }
765	}
766      /* Find exactly one edge that leads to a block in ANTIC from
767	 a block that isn't.  */
768      if (!bitmap_bit_p (&bb_antic_flags, (*entry_edge)->dest->index))
769	FOR_EACH_BB_FN (bb, cfun)
770	  {
771	    if (!bitmap_bit_p (&bb_antic_flags, bb->index))
772	      continue;
773	    FOR_EACH_EDGE (e, ei, bb->preds)
774	      if (!bitmap_bit_p (&bb_antic_flags, e->src->index))
775		{
776		  if (*entry_edge != orig_entry_edge)
777		    {
778		      *entry_edge = orig_entry_edge;
779		      if (dump_file)
780			fprintf (dump_file, "More than one candidate edge.\n");
781		      goto fail_shrinkwrap;
782		    }
783		  if (dump_file)
784		    fprintf (dump_file, "Found candidate edge for "
785			     "shrink-wrapping, %d->%d.\n", e->src->index,
786			     e->dest->index);
787		  *entry_edge = e;
788		}
789	  }
790
791      if (*entry_edge != orig_entry_edge)
792	{
793	  /* Test whether the prologue is known to clobber any register
794	     (other than FP or SP) which are live on the edge.  */
795	  CLEAR_HARD_REG_BIT (prologue_clobbered, STACK_POINTER_REGNUM);
796	  if (frame_pointer_needed)
797	    CLEAR_HARD_REG_BIT (prologue_clobbered, HARD_FRAME_POINTER_REGNUM);
798	  REG_SET_TO_HARD_REG_SET (live_on_edge,
799				   df_get_live_in ((*entry_edge)->dest));
800	  if (hard_reg_set_intersect_p (live_on_edge, prologue_clobbered))
801	    {
802	      *entry_edge = orig_entry_edge;
803	      if (dump_file)
804		fprintf (dump_file,
805			 "Shrink-wrapping aborted due to clobber.\n");
806	    }
807	}
808      if (*entry_edge != orig_entry_edge)
809	{
810	  crtl->shrink_wrapped = true;
811	  if (dump_file)
812	    fprintf (dump_file, "Performing shrink-wrapping.\n");
813
814	  /* Find tail blocks reachable from both blocks needing a
815	     prologue and blocks not needing a prologue.  */
816	  if (!bitmap_empty_p (&bb_tail))
817	    FOR_EACH_BB_FN (bb, cfun)
818	      {
819		bool some_pro, some_no_pro;
820		if (!bitmap_bit_p (&bb_tail, bb->index))
821		  continue;
822		some_pro = some_no_pro = false;
823		FOR_EACH_EDGE (e, ei, bb->preds)
824		  {
825		    if (bitmap_bit_p (bb_flags, e->src->index))
826		      some_pro = true;
827		    else
828		      some_no_pro = true;
829		  }
830		if (some_pro && some_no_pro)
831		  vec.quick_push (bb);
832		else
833		  bitmap_clear_bit (&bb_tail, bb->index);
834	      }
835	  /* Find the head of each tail.  */
836	  while (!vec.is_empty ())
837	    {
838	      basic_block tbb = vec.pop ();
839
840	      if (!bitmap_bit_p (&bb_tail, tbb->index))
841		continue;
842
843	      while (single_succ_p (tbb))
844		{
845		  tbb = single_succ (tbb);
846		  bitmap_clear_bit (&bb_tail, tbb->index);
847		}
848	    }
849	  /* Now duplicate the tails.  */
850	  if (!bitmap_empty_p (&bb_tail))
851	    FOR_EACH_BB_REVERSE_FN (bb, cfun)
852	      {
853		basic_block copy_bb, tbb;
854		rtx_insn *insert_point;
855		int eflags;
856
857		if (!bitmap_clear_bit (&bb_tail, bb->index))
858		  continue;
859
860		/* Create a copy of BB, instructions and all, for
861		   use on paths that don't need a prologue.
862		   Ideal placement of the copy is on a fall-thru edge
863		   or after a block that would jump to the copy.  */
864		FOR_EACH_EDGE (e, ei, bb->preds)
865		  if (!bitmap_bit_p (bb_flags, e->src->index)
866		      && single_succ_p (e->src))
867		    break;
868		if (e)
869		  {
870                    /* Make sure we insert after any barriers.  */
871                    rtx_insn *end = get_last_bb_insn (e->src);
872                    copy_bb = create_basic_block (NEXT_INSN (end),
873                                                  NULL_RTX, e->src);
874		    BB_COPY_PARTITION (copy_bb, e->src);
875		  }
876		else
877		  {
878		    /* Otherwise put the copy at the end of the function.  */
879		    copy_bb = create_basic_block (NULL_RTX, NULL_RTX,
880						  EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
881		    BB_COPY_PARTITION (copy_bb, bb);
882		  }
883
884		insert_point = emit_note_after (NOTE_INSN_DELETED,
885						BB_END (copy_bb));
886		emit_barrier_after (BB_END (copy_bb));
887
888		tbb = bb;
889		while (1)
890		  {
891		    dup_block_and_redirect (tbb, copy_bb, insert_point,
892					    bb_flags);
893		    tbb = single_succ (tbb);
894		    if (tbb == EXIT_BLOCK_PTR_FOR_FN (cfun))
895		      break;
896		    e = split_block (copy_bb, PREV_INSN (insert_point));
897		    copy_bb = e->dest;
898		  }
899
900		/* Quiet verify_flow_info by (ab)using EDGE_FAKE.
901		   We have yet to add a simple_return to the tails,
902		   as we'd like to first convert_jumps_to_returns in
903		   case the block is no longer used after that.  */
904		eflags = EDGE_FAKE;
905		if (CALL_P (PREV_INSN (insert_point))
906		    && SIBLING_CALL_P (PREV_INSN (insert_point)))
907		  eflags = EDGE_SIBCALL | EDGE_ABNORMAL;
908		make_single_succ_edge (copy_bb, EXIT_BLOCK_PTR_FOR_FN (cfun),
909				       eflags);
910
911		/* verify_flow_info doesn't like a note after a
912		   sibling call.  */
913		delete_insn (insert_point);
914		if (bitmap_empty_p (&bb_tail))
915		  break;
916	      }
917	}
918
919    fail_shrinkwrap:
920      bitmap_clear (&bb_tail);
921      bitmap_clear (&bb_antic_flags);
922      bitmap_clear (&bb_on_list);
923      vec.release ();
924    }
925}
926
927/* If we're allowed to generate a simple return instruction, then by
928   definition we don't need a full epilogue.  If the last basic
929   block before the exit block does not contain active instructions,
930   examine its predecessors and try to emit (conditional) return
931   instructions.  */
932
933edge
934get_unconverted_simple_return (edge exit_fallthru_edge, bitmap_head bb_flags,
935			       vec<edge> *unconverted_simple_returns,
936			       rtx_insn **returnjump)
937{
938  if (optimize)
939    {
940      unsigned i, last;
941
942      /* convert_jumps_to_returns may add to preds of the exit block
943         (but won't remove).  Stop at end of current preds.  */
944      last = EDGE_COUNT (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds);
945      for (i = 0; i < last; i++)
946	{
947	  edge e = EDGE_I (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds, i);
948	  if (LABEL_P (BB_HEAD (e->src))
949	      && !bitmap_bit_p (&bb_flags, e->src->index)
950	      && !active_insn_between (BB_HEAD (e->src), BB_END (e->src)))
951	    *unconverted_simple_returns
952		  = convert_jumps_to_returns (e->src, true,
953					      *unconverted_simple_returns);
954	}
955    }
956
957  if (exit_fallthru_edge != NULL
958      && EDGE_COUNT (exit_fallthru_edge->src->preds) != 0
959      && !bitmap_bit_p (&bb_flags, exit_fallthru_edge->src->index))
960    {
961      basic_block last_bb;
962
963      last_bb = emit_return_for_exit (exit_fallthru_edge, true);
964      *returnjump = BB_END (last_bb);
965      exit_fallthru_edge = NULL;
966    }
967  return exit_fallthru_edge;
968}
969
970/* If there were branches to an empty LAST_BB which we tried to
971   convert to conditional simple_returns, but couldn't for some
972   reason, create a block to hold a simple_return insn and redirect
973   those remaining edges.  */
974
975void
976convert_to_simple_return (edge entry_edge, edge orig_entry_edge,
977			  bitmap_head bb_flags, rtx_insn *returnjump,
978			  vec<edge> unconverted_simple_returns)
979{
980  edge e;
981  edge_iterator ei;
982
983  if (!unconverted_simple_returns.is_empty ())
984    {
985      basic_block simple_return_block_hot = NULL;
986      basic_block simple_return_block_cold = NULL;
987      edge pending_edge_hot = NULL;
988      edge pending_edge_cold = NULL;
989      basic_block exit_pred;
990      int i;
991
992      gcc_assert (entry_edge != orig_entry_edge);
993
994      /* See if we can reuse the last insn that was emitted for the
995	 epilogue.  */
996      if (returnjump != NULL_RTX
997	  && JUMP_LABEL (returnjump) == simple_return_rtx)
998	{
999	  e = split_block (BLOCK_FOR_INSN (returnjump), PREV_INSN (returnjump));
1000	  if (BB_PARTITION (e->src) == BB_HOT_PARTITION)
1001	    simple_return_block_hot = e->dest;
1002	  else
1003	    simple_return_block_cold = e->dest;
1004	}
1005
1006      /* Also check returns we might need to add to tail blocks.  */
1007      FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
1008	if (EDGE_COUNT (e->src->preds) != 0
1009	    && (e->flags & EDGE_FAKE) != 0
1010	    && !bitmap_bit_p (&bb_flags, e->src->index))
1011	  {
1012	    if (BB_PARTITION (e->src) == BB_HOT_PARTITION)
1013	      pending_edge_hot = e;
1014	    else
1015	      pending_edge_cold = e;
1016	  }
1017
1018      /* Save a pointer to the exit's predecessor BB for use in
1019         inserting new BBs at the end of the function.  Do this
1020         after the call to split_block above which may split
1021         the original exit pred.  */
1022      exit_pred = EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb;
1023
1024      FOR_EACH_VEC_ELT (unconverted_simple_returns, i, e)
1025	{
1026	  basic_block *pdest_bb;
1027	  edge pending;
1028
1029	  if (BB_PARTITION (e->src) == BB_HOT_PARTITION)
1030	    {
1031	      pdest_bb = &simple_return_block_hot;
1032	      pending = pending_edge_hot;
1033	    }
1034	  else
1035	    {
1036	      pdest_bb = &simple_return_block_cold;
1037	      pending = pending_edge_cold;
1038	    }
1039
1040	  if (*pdest_bb == NULL && pending != NULL)
1041	    {
1042	      emit_return_into_block (true, pending->src);
1043	      pending->flags &= ~(EDGE_FALLTHRU | EDGE_FAKE);
1044	      *pdest_bb = pending->src;
1045	    }
1046	  else if (*pdest_bb == NULL)
1047	    {
1048	      basic_block bb;
1049	      rtx_insn *start;
1050
1051	      bb = create_basic_block (NULL, NULL, exit_pred);
1052	      BB_COPY_PARTITION (bb, e->src);
1053	      start = emit_jump_insn_after (gen_simple_return (),
1054					    BB_END (bb));
1055	      JUMP_LABEL (start) = simple_return_rtx;
1056	      emit_barrier_after (start);
1057
1058	      *pdest_bb = bb;
1059	      make_edge (bb, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
1060	    }
1061	  redirect_edge_and_branch_force (e, *pdest_bb);
1062	}
1063      unconverted_simple_returns.release ();
1064    }
1065
1066  if (entry_edge != orig_entry_edge)
1067    {
1068      FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
1069	if (EDGE_COUNT (e->src->preds) != 0
1070	    && (e->flags & EDGE_FAKE) != 0
1071	    && !bitmap_bit_p (&bb_flags, e->src->index))
1072	  {
1073	    emit_return_into_block (true, e->src);
1074	    e->flags &= ~(EDGE_FALLTHRU | EDGE_FAKE);
1075	  }
1076    }
1077}
1078
1079#endif
1080