1/* Natural loop analysis code for GNU compiler.
2   Copyright (C) 2002-2015 Free Software Foundation, Inc.
3
4This file is part of GCC.
5
6GCC is free software; you can redistribute it and/or modify it under
7the terms of the GNU General Public License as published by the Free
8Software Foundation; either version 3, or (at your option) any later
9version.
10
11GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14for more details.
15
16You should have received a copy of the GNU General Public License
17along with GCC; see the file COPYING3.  If not see
18<http://www.gnu.org/licenses/>.  */
19
20#include "config.h"
21#include "system.h"
22#include "coretypes.h"
23#include "tm.h"
24#include "rtl.h"
25#include "hard-reg-set.h"
26#include "obstack.h"
27#include "predict.h"
28#include "vec.h"
29#include "hashtab.h"
30#include "hash-set.h"
31#include "machmode.h"
32#include "input.h"
33#include "function.h"
34#include "dominance.h"
35#include "cfg.h"
36#include "basic-block.h"
37#include "cfgloop.h"
38#include "symtab.h"
39#include "flags.h"
40#include "statistics.h"
41#include "double-int.h"
42#include "real.h"
43#include "fixed-value.h"
44#include "alias.h"
45#include "wide-int.h"
46#include "inchash.h"
47#include "tree.h"
48#include "insn-config.h"
49#include "expmed.h"
50#include "dojump.h"
51#include "explow.h"
52#include "calls.h"
53#include "emit-rtl.h"
54#include "varasm.h"
55#include "stmt.h"
56#include "expr.h"
57#include "graphds.h"
58#include "params.h"
59
60struct target_cfgloop default_target_cfgloop;
61#if SWITCHABLE_TARGET
62struct target_cfgloop *this_target_cfgloop = &default_target_cfgloop;
63#endif
64
65/* Checks whether BB is executed exactly once in each LOOP iteration.  */
66
67bool
68just_once_each_iteration_p (const struct loop *loop, const_basic_block bb)
69{
70  /* It must be executed at least once each iteration.  */
71  if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
72    return false;
73
74  /* And just once.  */
75  if (bb->loop_father != loop)
76    return false;
77
78  /* But this was not enough.  We might have some irreducible loop here.  */
79  if (bb->flags & BB_IRREDUCIBLE_LOOP)
80    return false;
81
82  return true;
83}
84
85/* Marks blocks and edges that are part of non-recognized loops; i.e. we
86   throw away all latch edges and mark blocks inside any remaining cycle.
87   Everything is a bit complicated due to fact we do not want to do this
88   for parts of cycles that only "pass" through some loop -- i.e. for
89   each cycle, we want to mark blocks that belong directly to innermost
90   loop containing the whole cycle.
91
92   LOOPS is the loop tree.  */
93
94#define LOOP_REPR(LOOP) ((LOOP)->num + last_basic_block_for_fn (cfun))
95#define BB_REPR(BB) ((BB)->index + 1)
96
97bool
98mark_irreducible_loops (void)
99{
100  basic_block act;
101  struct graph_edge *ge;
102  edge e;
103  edge_iterator ei;
104  int src, dest;
105  unsigned depth;
106  struct graph *g;
107  int num = number_of_loops (cfun);
108  struct loop *cloop;
109  bool irred_loop_found = false;
110  int i;
111
112  gcc_assert (current_loops != NULL);
113
114  /* Reset the flags.  */
115  FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR_FOR_FN (cfun),
116		  EXIT_BLOCK_PTR_FOR_FN (cfun), next_bb)
117    {
118      act->flags &= ~BB_IRREDUCIBLE_LOOP;
119      FOR_EACH_EDGE (e, ei, act->succs)
120	e->flags &= ~EDGE_IRREDUCIBLE_LOOP;
121    }
122
123  /* Create the edge lists.  */
124  g = new_graph (last_basic_block_for_fn (cfun) + num);
125
126  FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR_FOR_FN (cfun),
127		  EXIT_BLOCK_PTR_FOR_FN (cfun), next_bb)
128    FOR_EACH_EDGE (e, ei, act->succs)
129      {
130	/* Ignore edges to exit.  */
131	if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
132	  continue;
133
134	src = BB_REPR (act);
135	dest = BB_REPR (e->dest);
136
137	/* Ignore latch edges.  */
138	if (e->dest->loop_father->header == e->dest
139	    && e->dest->loop_father->latch == act)
140	  continue;
141
142	/* Edges inside a single loop should be left where they are.  Edges
143	   to subloop headers should lead to representative of the subloop,
144	   but from the same place.
145
146	   Edges exiting loops should lead from representative
147	   of the son of nearest common ancestor of the loops in that
148	   act lays.  */
149
150	if (e->dest->loop_father->header == e->dest)
151	  dest = LOOP_REPR (e->dest->loop_father);
152
153	if (!flow_bb_inside_loop_p (act->loop_father, e->dest))
154	  {
155	    depth = 1 + loop_depth (find_common_loop (act->loop_father,
156						      e->dest->loop_father));
157	    if (depth == loop_depth (act->loop_father))
158	      cloop = act->loop_father;
159	    else
160	      cloop = (*act->loop_father->superloops)[depth];
161
162	    src = LOOP_REPR (cloop);
163	  }
164
165	add_edge (g, src, dest)->data = e;
166      }
167
168  /* Find the strongly connected components.  */
169  graphds_scc (g, NULL);
170
171  /* Mark the irreducible loops.  */
172  for (i = 0; i < g->n_vertices; i++)
173    for (ge = g->vertices[i].succ; ge; ge = ge->succ_next)
174      {
175	edge real = (edge) ge->data;
176	/* edge E in graph G is irreducible if it connects two vertices in the
177	   same scc.  */
178
179	/* All edges should lead from a component with higher number to the
180	   one with lower one.  */
181	gcc_assert (g->vertices[ge->src].component >= g->vertices[ge->dest].component);
182
183	if (g->vertices[ge->src].component != g->vertices[ge->dest].component)
184	  continue;
185
186	real->flags |= EDGE_IRREDUCIBLE_LOOP;
187	irred_loop_found = true;
188	if (flow_bb_inside_loop_p (real->src->loop_father, real->dest))
189	  real->src->flags |= BB_IRREDUCIBLE_LOOP;
190      }
191
192  free_graph (g);
193
194  loops_state_set (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS);
195  return irred_loop_found;
196}
197
198/* Counts number of insns inside LOOP.  */
199int
200num_loop_insns (const struct loop *loop)
201{
202  basic_block *bbs, bb;
203  unsigned i, ninsns = 0;
204  rtx_insn *insn;
205
206  bbs = get_loop_body (loop);
207  for (i = 0; i < loop->num_nodes; i++)
208    {
209      bb = bbs[i];
210      FOR_BB_INSNS (bb, insn)
211	if (NONDEBUG_INSN_P (insn))
212	  ninsns++;
213    }
214  free (bbs);
215
216  if (!ninsns)
217    ninsns = 1;	/* To avoid division by zero.  */
218
219  return ninsns;
220}
221
222/* Counts number of insns executed on average per iteration LOOP.  */
223int
224average_num_loop_insns (const struct loop *loop)
225{
226  basic_block *bbs, bb;
227  unsigned i, binsns, ninsns, ratio;
228  rtx_insn *insn;
229
230  ninsns = 0;
231  bbs = get_loop_body (loop);
232  for (i = 0; i < loop->num_nodes; i++)
233    {
234      bb = bbs[i];
235
236      binsns = 0;
237      FOR_BB_INSNS (bb, insn)
238	if (NONDEBUG_INSN_P (insn))
239	  binsns++;
240
241      ratio = loop->header->frequency == 0
242	      ? BB_FREQ_MAX
243	      : (bb->frequency * BB_FREQ_MAX) / loop->header->frequency;
244      ninsns += binsns * ratio;
245    }
246  free (bbs);
247
248  ninsns /= BB_FREQ_MAX;
249  if (!ninsns)
250    ninsns = 1; /* To avoid division by zero.  */
251
252  return ninsns;
253}
254
255/* Returns expected number of iterations of LOOP, according to
256   measured or guessed profile.  No bounding is done on the
257   value.  */
258
259gcov_type
260expected_loop_iterations_unbounded (const struct loop *loop)
261{
262  edge e;
263  edge_iterator ei;
264
265  if (loop->latch->count || loop->header->count)
266    {
267      gcov_type count_in, count_latch, expected;
268
269      count_in = 0;
270      count_latch = 0;
271
272      FOR_EACH_EDGE (e, ei, loop->header->preds)
273	if (e->src == loop->latch)
274	  count_latch = e->count;
275	else
276	  count_in += e->count;
277
278      if (count_in == 0)
279	expected = count_latch * 2;
280      else
281	expected = (count_latch + count_in - 1) / count_in;
282
283      return expected;
284    }
285  else
286    {
287      int freq_in, freq_latch;
288
289      freq_in = 0;
290      freq_latch = 0;
291
292      FOR_EACH_EDGE (e, ei, loop->header->preds)
293	if (e->src == loop->latch)
294	  freq_latch = EDGE_FREQUENCY (e);
295	else
296	  freq_in += EDGE_FREQUENCY (e);
297
298      if (freq_in == 0)
299	return freq_latch * 2;
300
301      return (freq_latch + freq_in - 1) / freq_in;
302    }
303}
304
305/* Returns expected number of LOOP iterations.  The returned value is bounded
306   by REG_BR_PROB_BASE.  */
307
308unsigned
309expected_loop_iterations (const struct loop *loop)
310{
311  gcov_type expected = expected_loop_iterations_unbounded (loop);
312  return (expected > REG_BR_PROB_BASE ? REG_BR_PROB_BASE : expected);
313}
314
315/* Returns the maximum level of nesting of subloops of LOOP.  */
316
317unsigned
318get_loop_level (const struct loop *loop)
319{
320  const struct loop *ploop;
321  unsigned mx = 0, l;
322
323  for (ploop = loop->inner; ploop; ploop = ploop->next)
324    {
325      l = get_loop_level (ploop);
326      if (l >= mx)
327	mx = l + 1;
328    }
329  return mx;
330}
331
332/* Initialize the constants for computing set costs.  */
333
334void
335init_set_costs (void)
336{
337  int speed;
338  rtx_insn *seq;
339  rtx reg1 = gen_raw_REG (SImode, FIRST_PSEUDO_REGISTER);
340  rtx reg2 = gen_raw_REG (SImode, FIRST_PSEUDO_REGISTER + 1);
341  rtx addr = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER + 2);
342  rtx mem = validize_mem (gen_rtx_MEM (SImode, addr));
343  unsigned i;
344
345  target_avail_regs = 0;
346  target_clobbered_regs = 0;
347  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
348    if (TEST_HARD_REG_BIT (reg_class_contents[GENERAL_REGS], i)
349	&& !fixed_regs[i])
350      {
351	target_avail_regs++;
352	if (call_used_regs[i])
353	  target_clobbered_regs++;
354      }
355
356  target_res_regs = 3;
357
358  for (speed = 0; speed < 2; speed++)
359     {
360      crtl->maybe_hot_insn_p = speed;
361      /* Set up the costs for using extra registers:
362
363	 1) If not many free registers remain, we should prefer having an
364	    additional move to decreasing the number of available registers.
365	    (TARGET_REG_COST).
366	 2) If no registers are available, we need to spill, which may require
367	    storing the old value to memory and loading it back
368	    (TARGET_SPILL_COST).  */
369
370      start_sequence ();
371      emit_move_insn (reg1, reg2);
372      seq = get_insns ();
373      end_sequence ();
374      target_reg_cost [speed] = seq_cost (seq, speed);
375
376      start_sequence ();
377      emit_move_insn (mem, reg1);
378      emit_move_insn (reg2, mem);
379      seq = get_insns ();
380      end_sequence ();
381      target_spill_cost [speed] = seq_cost (seq, speed);
382    }
383  default_rtl_profile ();
384}
385
386/* Estimates cost of increased register pressure caused by making N_NEW new
387   registers live around the loop.  N_OLD is the number of registers live
388   around the loop.  If CALL_P is true, also take into account that
389   call-used registers may be clobbered in the loop body, reducing the
390   number of available registers before we spill.  */
391
392unsigned
393estimate_reg_pressure_cost (unsigned n_new, unsigned n_old, bool speed,
394			    bool call_p)
395{
396  unsigned cost;
397  unsigned regs_needed = n_new + n_old;
398  unsigned available_regs = target_avail_regs;
399
400  /* If there is a call in the loop body, the call-clobbered registers
401     are not available for loop invariants.  */
402  if (call_p)
403    available_regs = available_regs - target_clobbered_regs;
404
405  /* If we have enough registers, we should use them and not restrict
406     the transformations unnecessarily.  */
407  if (regs_needed + target_res_regs <= available_regs)
408    return 0;
409
410  if (regs_needed <= available_regs)
411    /* If we are close to running out of registers, try to preserve
412       them.  */
413    cost = target_reg_cost [speed] * n_new;
414  else
415    /* If we run out of registers, it is very expensive to add another
416       one.  */
417    cost = target_spill_cost [speed] * n_new;
418
419  if (optimize && (flag_ira_region == IRA_REGION_ALL
420		   || flag_ira_region == IRA_REGION_MIXED)
421      && number_of_loops (cfun) <= (unsigned) IRA_MAX_LOOPS_NUM)
422    /* IRA regional allocation deals with high register pressure
423       better.  So decrease the cost (to do more accurate the cost
424       calculation for IRA, we need to know how many registers lives
425       through the loop transparently).  */
426    cost /= 2;
427
428  return cost;
429}
430
431/* Sets EDGE_LOOP_EXIT flag for all loop exits.  */
432
433void
434mark_loop_exit_edges (void)
435{
436  basic_block bb;
437  edge e;
438
439  if (number_of_loops (cfun) <= 1)
440    return;
441
442  FOR_EACH_BB_FN (bb, cfun)
443    {
444      edge_iterator ei;
445
446      FOR_EACH_EDGE (e, ei, bb->succs)
447	{
448	  if (loop_outer (bb->loop_father)
449	      && loop_exit_edge_p (bb->loop_father, e))
450	    e->flags |= EDGE_LOOP_EXIT;
451	  else
452	    e->flags &= ~EDGE_LOOP_EXIT;
453	}
454    }
455}
456
457/* Return exit edge if loop has only one exit that is likely
458   to be executed on runtime (i.e. it is not EH or leading
459   to noreturn call.  */
460
461edge
462single_likely_exit (struct loop *loop)
463{
464  edge found = single_exit (loop);
465  vec<edge> exits;
466  unsigned i;
467  edge ex;
468
469  if (found)
470    return found;
471  exits = get_loop_exit_edges (loop);
472  FOR_EACH_VEC_ELT (exits, i, ex)
473    {
474      if (ex->flags & (EDGE_EH | EDGE_ABNORMAL_CALL))
475	continue;
476      /* The constant of 5 is set in a way so noreturn calls are
477	 ruled out by this test.  The static branch prediction algorithm
478         will not assign such a low probability to conditionals for usual
479         reasons.  */
480      if (profile_status_for_fn (cfun) != PROFILE_ABSENT
481	  && ex->probability < 5 && !ex->count)
482	continue;
483      if (!found)
484	found = ex;
485      else
486	{
487	  exits.release ();
488	  return NULL;
489	}
490    }
491  exits.release ();
492  return found;
493}
494
495
496/* Gets basic blocks of a LOOP.  Header is the 0-th block, rest is in dfs
497   order against direction of edges from latch.  Specially, if
498   header != latch, latch is the 1-st block.  */
499
500vec<basic_block>
501get_loop_hot_path (const struct loop *loop)
502{
503  basic_block bb = loop->header;
504  vec<basic_block> path = vNULL;
505  bitmap visited = BITMAP_ALLOC (NULL);
506
507  while (true)
508    {
509      edge_iterator ei;
510      edge e;
511      edge best = NULL;
512
513      path.safe_push (bb);
514      bitmap_set_bit (visited, bb->index);
515      FOR_EACH_EDGE (e, ei, bb->succs)
516        if ((!best || e->probability > best->probability)
517	    && !loop_exit_edge_p (loop, e)
518	    && !bitmap_bit_p (visited, e->dest->index))
519	  best = e;
520      if (!best || best->dest == loop->header)
521	break;
522      bb = best->dest;
523    }
524  BITMAP_FREE (visited);
525  return path;
526}
527