1/* Code to analyze doloop loops in order for targets to perform late
2   optimizations converting doloops to other forms of hardware loops.
3   Copyright (C) 2011-2015 Free Software Foundation, Inc.
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify it under
8the terms of the GNU General Public License as published by the Free
9Software Foundation; either version 3, or (at your option) any later
10version.
11
12GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15for more details.
16
17You should have received a copy of the GNU General Public License
18along with GCC; see the file COPYING3.  If not see
19<http://www.gnu.org/licenses/>.  */
20
21#include "config.h"
22#include "system.h"
23#include "coretypes.h"
24#include "tm.h"
25#include "rtl.h"
26#include "flags.h"
27#include "symtab.h"
28#include "hashtab.h"
29#include "hash-set.h"
30#include "vec.h"
31#include "machmode.h"
32#include "hard-reg-set.h"
33#include "input.h"
34#include "function.h"
35#include "statistics.h"
36#include "double-int.h"
37#include "real.h"
38#include "fixed-value.h"
39#include "alias.h"
40#include "wide-int.h"
41#include "inchash.h"
42#include "tree.h"
43#include "insn-config.h"
44#include "expmed.h"
45#include "dojump.h"
46#include "explow.h"
47#include "calls.h"
48#include "emit-rtl.h"
49#include "varasm.h"
50#include "stmt.h"
51#include "expr.h"
52#include "regs.h"
53#include "predict.h"
54#include "dominance.h"
55#include "cfg.h"
56#include "cfgrtl.h"
57#include "basic-block.h"
58#include "tm_p.h"
59#include "df.h"
60#include "cfgloop.h"
61#include "recog.h"
62#include "target.h"
63#include "hw-doloop.h"
64#include "dumpfile.h"
65
66#ifdef HAVE_doloop_end
67
68/* Dump information collected in LOOPS.  */
69static void
70dump_hwloops (hwloop_info loops)
71{
72  hwloop_info loop;
73
74  for (loop = loops; loop; loop = loop->next)
75    {
76      hwloop_info i;
77      basic_block b;
78      unsigned ix;
79
80      fprintf (dump_file, ";; loop %d: ", loop->loop_no);
81      if (loop->bad)
82	fprintf (dump_file, "(bad) ");
83      fprintf (dump_file, "{head:%d, depth:%d, reg:%u}",
84	       loop->head == NULL ? -1 : loop->head->index,
85	       loop->depth, REGNO (loop->iter_reg));
86
87      fprintf (dump_file, " blocks: [ ");
88      for (ix = 0; loop->blocks.iterate (ix, &b); ix++)
89	fprintf (dump_file, "%d ", b->index);
90      fprintf (dump_file, "] ");
91
92      fprintf (dump_file, " inner loops: [ ");
93      for (ix = 0; loop->loops.iterate (ix, &i); ix++)
94	fprintf (dump_file, "%d ", i->loop_no);
95      fprintf (dump_file, "]\n");
96    }
97  fprintf (dump_file, "\n");
98}
99
100/* Return true if BB is part of LOOP.  */
101static bool
102bb_in_loop_p (hwloop_info loop, basic_block bb)
103{
104  return bitmap_bit_p (loop->block_bitmap, bb->index);
105}
106
107/* Scan the blocks of LOOP (and its inferiors), and record things such as
108   hard registers set, jumps out of and within the loop.  */
109static void
110scan_loop (hwloop_info loop)
111{
112  unsigned ix;
113  basic_block bb;
114
115  if (loop->bad)
116    return;
117
118  if (REGNO_REG_SET_P (df_get_live_in (loop->successor),
119		       REGNO (loop->iter_reg)))
120    loop->iter_reg_used_outside = true;
121
122  for (ix = 0; loop->blocks.iterate (ix, &bb); ix++)
123    {
124      rtx_insn *insn;
125      edge e;
126      edge_iterator ei;
127
128      if (bb != loop->tail)
129	FOR_EACH_EDGE (e, ei, bb->succs)
130	  {
131	    if (bb_in_loop_p (loop, e->dest))
132	      {
133		if (!(e->flags & EDGE_FALLTHRU))
134		  loop->jumps_within = true;
135	      }
136	    else
137	      {
138		loop->jumps_outof = true;
139		if (!loop->bad)
140		  gcc_assert (!REGNO_REG_SET_P (df_get_live_in (e->dest),
141						REGNO (loop->iter_reg)));
142	      }
143	  }
144
145      for (insn = BB_HEAD (bb);
146	   insn != NEXT_INSN (BB_END (bb));
147	   insn = NEXT_INSN (insn))
148	{
149	  df_ref def;
150	  HARD_REG_SET set_this_insn;
151
152	  if (!NONDEBUG_INSN_P (insn))
153	    continue;
154
155	  if (recog_memoized (insn) < 0
156	      && (GET_CODE (PATTERN (insn)) == ASM_INPUT
157		  || asm_noperands (PATTERN (insn)) >= 0))
158	    loop->has_asm = true;
159
160	  CLEAR_HARD_REG_SET (set_this_insn);
161	  FOR_EACH_INSN_DEF (def, insn)
162	    {
163	      rtx dreg = DF_REF_REG (def);
164
165	      if (!REG_P (dreg))
166		continue;
167
168	      add_to_hard_reg_set (&set_this_insn, GET_MODE (dreg),
169				   REGNO (dreg));
170	    }
171
172	  if (insn == loop->loop_end)
173	    CLEAR_HARD_REG_BIT (set_this_insn, REGNO (loop->iter_reg));
174	  else if (reg_mentioned_p (loop->iter_reg, PATTERN (insn)))
175	    loop->iter_reg_used = true;
176	  IOR_HARD_REG_SET (loop->regs_set_in_loop, set_this_insn);
177	}
178    }
179}
180
181/* Compute the incoming_dest and incoming_src members of LOOP by
182   identifying the edges that jump into the loop.  If there is more
183   than one block that jumps into the loop, incoming_src will be set
184   to NULL; likewise, if there is more than one block in the loop that
185   is the destination of an incoming edge, incoming_dest will be NULL.
186
187   Return true if either of these two fields is nonnull, false
188   otherwise.  */
189static bool
190process_incoming_edges (hwloop_info loop)
191{
192  edge e;
193  edge_iterator ei;
194  bool first = true;
195
196  FOR_EACH_EDGE (e, ei, loop->incoming)
197    {
198      if (first)
199	{
200	  loop->incoming_src = e->src;
201	  loop->incoming_dest = e->dest;
202	  first = false;
203	}
204      else
205	{
206	  if (e->dest != loop->incoming_dest)
207	    loop->incoming_dest = NULL;
208	  if (e->src != loop->incoming_src)
209	    loop->incoming_src = NULL;
210	}
211    }
212  if (loop->incoming_src == NULL && loop->incoming_dest == NULL)
213    return false;
214
215  return true;
216}
217
218/* Try to identify a forwarder block that jump into LOOP, and add it to
219   the set of blocks in the loop, updating the vector of incoming blocks as
220   well.  This transformation gives a second chance to loops we couldn't
221   otherwise handle by increasing the chance that we'll end up with one
222   incoming_src block.
223   Return true if we made a change, false otherwise.  */
224static bool
225add_forwarder_blocks (hwloop_info loop)
226{
227  edge e;
228  edge_iterator ei;
229
230  FOR_EACH_EDGE (e, ei, loop->incoming)
231    {
232      if (forwarder_block_p (e->src))
233	{
234	  edge e2;
235	  edge_iterator ei2;
236
237	  if (dump_file)
238	    fprintf (dump_file,
239		     ";; Adding forwarder block %d to loop %d and retrying\n",
240		     e->src->index, loop->loop_no);
241	  loop->blocks.safe_push (e->src);
242	  bitmap_set_bit (loop->block_bitmap, e->src->index);
243	  FOR_EACH_EDGE (e2, ei2, e->src->preds)
244	    vec_safe_push (loop->incoming, e2);
245	  loop->incoming->unordered_remove (ei.index);
246	  return true;
247	}
248    }
249  return false;
250}
251
252/* Called from reorg_loops when a potential loop end is found.  LOOP is
253   a newly set up structure describing the loop, it is this function's
254   responsibility to fill most of it.  TAIL_BB and TAIL_INSN point to the
255   loop_end insn and its enclosing basic block.  REG is the loop counter
256   register.
257   For our purposes, a loop is defined by the set of blocks reachable from
258   the loop head in which the loop counter register is live.  This matches
259   the expected use; targets that call into this code usually replace the
260   loop counter with a different special register.  */
261static void
262discover_loop (hwloop_info loop, basic_block tail_bb, rtx_insn *tail_insn, rtx reg)
263{
264  bool found_tail;
265  unsigned dwork = 0;
266  basic_block bb;
267
268  loop->tail = tail_bb;
269  loop->loop_end = tail_insn;
270  loop->iter_reg = reg;
271  vec_alloc (loop->incoming, 2);
272  loop->start_label = as_a <rtx_insn *> (JUMP_LABEL (tail_insn));
273
274  if (EDGE_COUNT (tail_bb->succs) != 2)
275    {
276      loop->bad = true;
277      return;
278    }
279  loop->head = BRANCH_EDGE (tail_bb)->dest;
280  loop->successor = FALLTHRU_EDGE (tail_bb)->dest;
281
282  auto_vec<basic_block, 20> works;
283  works.safe_push (loop->head);
284
285  found_tail = false;
286  for (dwork = 0; works.iterate (dwork, &bb); dwork++)
287    {
288      edge e;
289      edge_iterator ei;
290      if (bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
291	{
292	  /* We've reached the exit block.  The loop must be bad. */
293	  if (dump_file)
294	    fprintf (dump_file,
295		     ";; Loop is bad - reached exit block while scanning\n");
296	  loop->bad = true;
297	  break;
298	}
299
300      if (bitmap_bit_p (loop->block_bitmap, bb->index))
301	continue;
302
303      /* We've not seen this block before.  Add it to the loop's
304	 list and then add each successor to the work list.  */
305
306      loop->blocks.safe_push (bb);
307      bitmap_set_bit (loop->block_bitmap, bb->index);
308
309      if (bb == tail_bb)
310	found_tail = true;
311      else
312	{
313	  FOR_EACH_EDGE (e, ei, bb->succs)
314	    {
315	      basic_block succ = EDGE_SUCC (bb, ei.index)->dest;
316	      if (REGNO_REG_SET_P (df_get_live_in (succ),
317				   REGNO (loop->iter_reg)))
318		works.safe_push (succ);
319	    }
320	}
321    }
322
323  if (!found_tail)
324    loop->bad = true;
325
326  /* Find the predecessor, and make sure nothing else jumps into this loop.  */
327  if (!loop->bad)
328    {
329      FOR_EACH_VEC_ELT (loop->blocks, dwork, bb)
330	{
331	  edge e;
332	  edge_iterator ei;
333	  FOR_EACH_EDGE (e, ei, bb->preds)
334	    {
335	      basic_block pred = e->src;
336
337	      if (!bb_in_loop_p (loop, pred))
338		{
339		  if (dump_file)
340		    fprintf (dump_file, ";; Loop %d: incoming edge %d -> %d\n",
341			     loop->loop_no, pred->index,
342			     e->dest->index);
343		  vec_safe_push (loop->incoming, e);
344		}
345	    }
346	}
347
348      if (!process_incoming_edges (loop))
349	{
350	  if (dump_file)
351	    fprintf (dump_file,
352		     ";; retrying loop %d with forwarder blocks\n",
353		     loop->loop_no);
354	  if (!add_forwarder_blocks (loop))
355	    {
356	      if (dump_file)
357		fprintf (dump_file, ";; No forwarder blocks found\n");
358	      loop->bad = true;
359	    }
360	  else if (!process_incoming_edges (loop))
361	    {
362	      if (dump_file)
363		fprintf (dump_file,
364			 ";; can't find suitable entry for loop %d\n",
365			 loop->loop_no);
366	    }
367	}
368    }
369}
370
371/* Analyze the structure of the loops in the current function.  Use
372   LOOP_STACK for bitmap allocations.  Returns all the valid candidates for
373   hardware loops found in this function.  HOOKS is the argument
374   passed to reorg_loops, used here to find the iteration registers
375   from a loop_end pattern.  */
376static hwloop_info
377discover_loops (bitmap_obstack *loop_stack, struct hw_doloop_hooks *hooks)
378{
379  hwloop_info loops = NULL;
380  hwloop_info loop;
381  basic_block bb;
382  int nloops = 0;
383
384  /* Find all the possible loop tails.  This means searching for every
385     loop_end instruction.  For each one found, create a hwloop_info
386     structure and add the head block to the work list. */
387  FOR_EACH_BB_FN (bb, cfun)
388    {
389      rtx_insn *tail = BB_END (bb);
390      rtx_insn *insn;
391      rtx reg;
392
393      while (tail && NOTE_P (tail) && tail != BB_HEAD (bb))
394	tail = PREV_INSN (tail);
395
396      if (tail == NULL_RTX)
397	continue;
398
399      if (!JUMP_P (tail))
400	continue;
401      reg = hooks->end_pattern_reg (tail);
402      if (reg == NULL_RTX)
403	continue;
404
405      /* A possible loop end */
406
407      /* There's a degenerate case we can handle - an empty loop consisting
408	 of only a back branch.  Handle that by deleting the branch.  */
409      insn = JUMP_LABEL_AS_INSN (tail);
410      while (insn && !NONDEBUG_INSN_P (insn))
411	insn = NEXT_INSN (insn);
412      if (insn == tail)
413	{
414	  basic_block succ = FALLTHRU_EDGE (bb)->dest;
415	  if (dump_file)
416	    {
417	      fprintf (dump_file, ";; degenerate loop ending at\n");
418	      print_rtl_single (dump_file, tail);
419	    }
420	  if (!REGNO_REG_SET_P (df_get_live_in (succ), REGNO (reg)))
421	    {
422	      if (dump_file)
423		fprintf (dump_file, ";; deleting it\n");
424	      delete_insn_and_edges (tail);
425	      continue;
426	    }
427	}
428
429      loop = XCNEW (struct hwloop_info_d);
430      loop->next = loops;
431      loops = loop;
432      loop->loop_no = nloops++;
433      loop->blocks.create (20);
434      loop->block_bitmap = BITMAP_ALLOC (loop_stack);
435
436      if (dump_file)
437	{
438	  fprintf (dump_file, ";; potential loop %d ending at\n",
439		   loop->loop_no);
440	  print_rtl_single (dump_file, tail);
441	}
442
443      discover_loop (loop, bb, tail, reg);
444    }
445
446  /* Compute loop nestings.  Given two loops A and B, either the sets
447     of their blocks don't intersect at all, or one is the subset of
448     the other, or the blocks don't form a good nesting structure.  */
449  for (loop = loops; loop; loop = loop->next)
450    {
451      hwloop_info other;
452
453      if (loop->bad)
454	continue;
455
456      for (other = loops; other; other = other->next)
457	{
458	  if (other->bad)
459	    continue;
460
461	  if (!bitmap_intersect_p (other->block_bitmap, loop->block_bitmap))
462	    continue;
463	  if (!bitmap_intersect_compl_p (other->block_bitmap,
464					 loop->block_bitmap))
465	    loop->loops.safe_push (other);
466	  else if (!bitmap_intersect_compl_p (loop->block_bitmap,
467					      other->block_bitmap))
468	    other->loops.safe_push (loop);
469	  else
470	    {
471	      if (dump_file)
472		fprintf (dump_file,
473			 ";; can't find suitable nesting for loops %d and %d\n",
474			 loop->loop_no, other->loop_no);
475	      loop->bad = other->bad = true;
476	    }
477	}
478    }
479
480  if (dump_file)
481    dump_hwloops (loops);
482
483  return loops;
484}
485
486/* Free up the loop structures in LOOPS.  */
487static void
488free_loops (hwloop_info loops)
489{
490  while (loops)
491    {
492      hwloop_info loop = loops;
493      loops = loop->next;
494      loop->loops.release ();
495      loop->blocks.release ();
496      BITMAP_FREE (loop->block_bitmap);
497      XDELETE (loop);
498    }
499}
500
501#define BB_AUX_INDEX(BB) ((intptr_t) (BB)->aux)
502
503/* Initialize the aux fields to give ascending indices to basic blocks.  */
504static void
505set_bb_indices (void)
506{
507  basic_block bb;
508  intptr_t index;
509
510  index = 0;
511  FOR_EACH_BB_FN (bb, cfun)
512    bb->aux = (void *) index++;
513}
514
515/* The taken-branch edge from the loop end can actually go forward.
516   If the target's hardware loop support requires that the loop end be
517   after the loop start, try to reorder a loop's basic blocks when we
518   find such a case.
519   This is not very aggressive; it only moves at most one block.  It
520   does not introduce new branches into loops; it may remove them, or
521   it may switch fallthru/jump edges.  */
522static void
523reorder_loops (hwloop_info loops)
524{
525  basic_block bb;
526  hwloop_info loop;
527
528  cfg_layout_initialize (0);
529
530  set_bb_indices ();
531
532  for (loop = loops; loop; loop = loop->next)
533    {
534      edge e;
535      edge_iterator ei;
536
537      if (loop->bad)
538	continue;
539
540      if (BB_AUX_INDEX (loop->head) <= BB_AUX_INDEX (loop->tail))
541	continue;
542
543      FOR_EACH_EDGE (e, ei, loop->head->succs)
544	{
545	  if (bitmap_bit_p (loop->block_bitmap, e->dest->index)
546	      && BB_AUX_INDEX (e->dest) < BB_AUX_INDEX (loop->tail))
547	    {
548	      basic_block start_bb = e->dest;
549	      basic_block start_prev_bb = start_bb->prev_bb;
550
551	      if (dump_file)
552		fprintf (dump_file, ";; Moving block %d before block %d\n",
553			 loop->head->index, start_bb->index);
554	      loop->head->prev_bb->next_bb = loop->head->next_bb;
555	      loop->head->next_bb->prev_bb = loop->head->prev_bb;
556
557	      loop->head->prev_bb = start_prev_bb;
558	      loop->head->next_bb = start_bb;
559	      start_prev_bb->next_bb = start_bb->prev_bb = loop->head;
560
561	      set_bb_indices ();
562	      break;
563	    }
564	}
565      loops = loops->next;
566    }
567
568  FOR_EACH_BB_FN (bb, cfun)
569    {
570      if (bb->next_bb != EXIT_BLOCK_PTR_FOR_FN (cfun))
571	bb->aux = bb->next_bb;
572      else
573	bb->aux = NULL;
574    }
575  cfg_layout_finalize ();
576  clear_aux_for_blocks ();
577  df_analyze ();
578}
579
580/* Call the OPT function for LOOP and all of its sub-loops.  This is
581   done in a depth-first search; innermost loops are visited first.
582   OPTIMIZE and FAIL are the functions passed to reorg_loops by the
583   target's reorg pass.  */
584static void
585optimize_loop (hwloop_info loop, struct hw_doloop_hooks *hooks)
586{
587  int ix;
588  hwloop_info inner;
589  int inner_depth = 0;
590
591  if (loop->visited)
592    return;
593
594  loop->visited = 1;
595
596  if (loop->bad)
597    {
598      if (dump_file)
599	fprintf (dump_file, ";; loop %d bad when found\n", loop->loop_no);
600      goto bad_loop;
601    }
602
603  /* Every loop contains in its list of inner loops every loop nested inside
604     it, even if there are intermediate loops.  This works because we're doing
605     a depth-first search here and never visit a loop more than once.
606     Recursion depth is effectively limited by the number of available
607     hardware registers.  */
608  for (ix = 0; loop->loops.iterate (ix, &inner); ix++)
609    {
610      optimize_loop (inner, hooks);
611
612      if (!inner->bad && inner_depth < inner->depth)
613	inner_depth = inner->depth;
614      /* The set of registers may be changed while optimizing the inner
615	 loop.  */
616      IOR_HARD_REG_SET (loop->regs_set_in_loop, inner->regs_set_in_loop);
617    }
618
619  loop->depth = inner_depth + 1;
620
621  if (hooks->opt (loop))
622    return;
623
624 bad_loop:
625  if (dump_file)
626    fprintf (dump_file, ";; loop %d is bad\n", loop->loop_no);
627
628  loop->bad = true;
629  hooks->fail (loop);
630}
631
632/* This function can be used from a port's machine_dependent_reorg to
633   find and analyze loops that end in loop_end instructions.  It uses
634   a set of function pointers in HOOKS to call back into the
635   target-specific functions to perform the actual machine-specific
636   transformations.
637
638   Such transformations typically involve additional set-up
639   instructions before the loop, to define loop bounds or set up a
640   special loop counter register.
641
642   DO_REORDER should be set to true if we should try to use the
643   reorder_loops function to ensure the loop end occurs after the loop
644   start.  This is for use by targets where the loop hardware requires
645   this condition.
646
647   HOOKS is used to pass in target specific hooks; see
648   hw-doloop.h.  */
649void
650reorg_loops (bool do_reorder, struct hw_doloop_hooks *hooks)
651{
652  hwloop_info loops = NULL;
653  hwloop_info loop;
654  bitmap_obstack loop_stack;
655
656  df_live_add_problem ();
657  df_live_set_all_dirty ();
658  df_analyze ();
659
660  bitmap_obstack_initialize (&loop_stack);
661
662  if (dump_file)
663    fprintf (dump_file, ";; Find loops, first pass\n\n");
664
665  loops = discover_loops (&loop_stack, hooks);
666
667  /* We can't enter cfglayout mode anymore if basic block partitioning
668     already happened.  */
669  if (do_reorder && !flag_reorder_blocks_and_partition)
670    {
671      reorder_loops (loops);
672      free_loops (loops);
673
674      if (dump_file)
675	fprintf (dump_file, ";; Find loops, second pass\n\n");
676
677      loops = discover_loops (&loop_stack, hooks);
678    }
679
680  for (loop = loops; loop; loop = loop->next)
681    scan_loop (loop);
682
683  /* Now apply the optimizations.  */
684  for (loop = loops; loop; loop = loop->next)
685    optimize_loop (loop, hooks);
686
687  if (dump_file)
688    {
689      fprintf (dump_file, ";; After hardware loops optimization:\n\n");
690      dump_hwloops (loops);
691    }
692
693  free_loops (loops);
694  bitmap_obstack_release (&loop_stack);
695
696  if (dump_file)
697    print_rtl (dump_file, get_insns ());
698}
699#endif
700