1169689Skan/* Array prefetching.
2169689Skan   Copyright (C) 2005 Free Software Foundation, Inc.
3169689Skan
4169689SkanThis file is part of GCC.
5169689Skan
6169689SkanGCC is free software; you can redistribute it and/or modify it
7169689Skanunder the terms of the GNU General Public License as published by the
8169689SkanFree Software Foundation; either version 2, or (at your option) any
9169689Skanlater version.
10169689Skan
11169689SkanGCC is distributed in the hope that it will be useful, but WITHOUT
12169689SkanANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13169689SkanFITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14169689Skanfor more details.
15169689Skan
16169689SkanYou should have received a copy of the GNU General Public License
17169689Skanalong with GCC; see the file COPYING.  If not, write to the Free
18169689SkanSoftware Foundation, 59 Temple Place - Suite 330, Boston, MA
19169689Skan02111-1307, USA.  */
20169689Skan
21169689Skan#include "config.h"
22169689Skan#include "system.h"
23169689Skan#include "coretypes.h"
24169689Skan#include "tm.h"
25169689Skan#include "tree.h"
26169689Skan#include "rtl.h"
27169689Skan#include "tm_p.h"
28169689Skan#include "hard-reg-set.h"
29169689Skan#include "basic-block.h"
30169689Skan#include "output.h"
31169689Skan#include "diagnostic.h"
32169689Skan#include "tree-flow.h"
33169689Skan#include "tree-dump.h"
34169689Skan#include "timevar.h"
35169689Skan#include "cfgloop.h"
36169689Skan#include "varray.h"
37169689Skan#include "expr.h"
38169689Skan#include "tree-pass.h"
39169689Skan#include "ggc.h"
40169689Skan#include "insn-config.h"
41169689Skan#include "recog.h"
42169689Skan#include "hashtab.h"
43169689Skan#include "tree-chrec.h"
44169689Skan#include "tree-scalar-evolution.h"
45169689Skan#include "toplev.h"
46169689Skan#include "params.h"
47169689Skan#include "langhooks.h"
48169689Skan
49169689Skan/* This pass inserts prefetch instructions to optimize cache usage during
50169689Skan   accesses to arrays in loops.  It processes loops sequentially and:
51169689Skan
52169689Skan   1) Gathers all memory references in the single loop.
53169689Skan   2) For each of the references it decides when it is profitable to prefetch
54169689Skan      it.  To do it, we evaluate the reuse among the accesses, and determines
55169689Skan      two values: PREFETCH_BEFORE (meaning that it only makes sense to do
56169689Skan      prefetching in the first PREFETCH_BEFORE iterations of the loop) and
57169689Skan      PREFETCH_MOD (meaning that it only makes sense to prefetch in the
58169689Skan      iterations of the loop that are zero modulo PREFETCH_MOD).  For example
59169689Skan      (assuming cache line size is 64 bytes, char has size 1 byte and there
60169689Skan      is no hardware sequential prefetch):
61169689Skan
62169689Skan      char *a;
63169689Skan      for (i = 0; i < max; i++)
64169689Skan	{
65169689Skan	  a[255] = ...;		(0)
66169689Skan	  a[i] = ...;		(1)
67169689Skan	  a[i + 64] = ...;	(2)
68169689Skan	  a[16*i] = ...;	(3)
69169689Skan	  a[187*i] = ...;	(4)
70169689Skan	  a[187*i + 50] = ...;	(5)
71169689Skan	}
72169689Skan
73169689Skan       (0) obviously has PREFETCH_BEFORE 1
74169689Skan       (1) has PREFETCH_BEFORE 64, since (2) accesses the same memory
75169689Skan           location 64 iterations before it, and PREFETCH_MOD 64 (since
76169689Skan	   it hits the same cache line otherwise).
77169689Skan       (2) has PREFETCH_MOD 64
78169689Skan       (3) has PREFETCH_MOD 4
79169689Skan       (4) has PREFETCH_MOD 1.  We do not set PREFETCH_BEFORE here, since
80169689Skan           the cache line accessed by (4) is the same with probability only
81169689Skan	   7/32.
82169689Skan       (5) has PREFETCH_MOD 1 as well.
83169689Skan
84169689Skan   3) We determine how much ahead we need to prefetch.  The number of
85169689Skan      iterations needed is time to fetch / time spent in one iteration of
86169689Skan      the loop.  The problem is that we do not know either of these values,
87169689Skan      so we just make a heuristic guess based on a magic (possibly)
88169689Skan      target-specific constant and size of the loop.
89169689Skan
90169689Skan   4) Determine which of the references we prefetch.  We take into account
91169689Skan      that there is a maximum number of simultaneous prefetches (provided
92169689Skan      by machine description).  We prefetch as many prefetches as possible
93169689Skan      while still within this bound (starting with those with lowest
94169689Skan      prefetch_mod, since they are responsible for most of the cache
95169689Skan      misses).
96169689Skan
97169689Skan   5) We unroll and peel loops so that we are able to satisfy PREFETCH_MOD
98169689Skan      and PREFETCH_BEFORE requirements (within some bounds), and to avoid
99169689Skan      prefetching nonaccessed memory.
100169689Skan      TODO -- actually implement peeling.
101169689Skan
102169689Skan   6) We actually emit the prefetch instructions.  ??? Perhaps emit the
103169689Skan      prefetch instructions with guards in cases where 5) was not sufficient
104169689Skan      to satisfy the constraints?
105169689Skan
106169689Skan   Some other TODO:
107169689Skan      -- write and use more general reuse analysis (that could be also used
108169689Skan	 in other cache aimed loop optimizations)
109169689Skan      -- make it behave sanely together with the prefetches given by user
110169689Skan	 (now we just ignore them; at the very least we should avoid
111169689Skan	 optimizing loops in that user put his own prefetches)
112169689Skan      -- we assume cache line size alignment of arrays; this could be
113169689Skan	 improved.  */
114169689Skan
115169689Skan/* Magic constants follow.  These should be replaced by machine specific
116169689Skan   numbers.  */
117169689Skan
118169689Skan/* A number that should roughly correspond to the number of instructions
119169689Skan   executed before the prefetch is completed.  */
120169689Skan
121169689Skan#ifndef PREFETCH_LATENCY
122169689Skan#define PREFETCH_LATENCY 200
123169689Skan#endif
124169689Skan
125169689Skan/* Number of prefetches that can run at the same time.  */
126169689Skan
127169689Skan#ifndef SIMULTANEOUS_PREFETCHES
128169689Skan#define SIMULTANEOUS_PREFETCHES 3
129169689Skan#endif
130169689Skan
131169689Skan/* True if write can be prefetched by a read prefetch.  */
132169689Skan
133169689Skan#ifndef WRITE_CAN_USE_READ_PREFETCH
134169689Skan#define WRITE_CAN_USE_READ_PREFETCH 1
135169689Skan#endif
136169689Skan
137169689Skan/* True if read can be prefetched by a write prefetch. */
138169689Skan
139169689Skan#ifndef READ_CAN_USE_WRITE_PREFETCH
140169689Skan#define READ_CAN_USE_WRITE_PREFETCH 0
141169689Skan#endif
142169689Skan
143169689Skan/* Cache line size.  Assumed to be a power of two.  */
144169689Skan
145169689Skan#ifndef PREFETCH_BLOCK
146169689Skan#define PREFETCH_BLOCK 32
147169689Skan#endif
148169689Skan
149169689Skan/* Do we have a forward hardware sequential prefetching?  */
150169689Skan
151169689Skan#ifndef HAVE_FORWARD_PREFETCH
152169689Skan#define HAVE_FORWARD_PREFETCH 0
153169689Skan#endif
154169689Skan
155169689Skan/* Do we have a backward hardware sequential prefetching?  */
156169689Skan
157169689Skan#ifndef HAVE_BACKWARD_PREFETCH
158169689Skan#define HAVE_BACKWARD_PREFETCH 0
159169689Skan#endif
160169689Skan
161169689Skan/* In some cases we are only able to determine that there is a certain
162169689Skan   probability that the two accesses hit the same cache line.  In this
163169689Skan   case, we issue the prefetches for both of them if this probability
164169689Skan   is less then (1000 - ACCEPTABLE_MISS_RATE) promile.  */
165169689Skan
166169689Skan#ifndef ACCEPTABLE_MISS_RATE
167169689Skan#define ACCEPTABLE_MISS_RATE 50
168169689Skan#endif
169169689Skan
170169689Skan#ifndef HAVE_prefetch
171169689Skan#define HAVE_prefetch 0
172169689Skan#endif
173169689Skan
174169689Skan/* The group of references between that reuse may occur.  */
175169689Skan
176169689Skanstruct mem_ref_group
177169689Skan{
178169689Skan  tree base;			/* Base of the reference.  */
179169689Skan  HOST_WIDE_INT step;		/* Step of the reference.  */
180169689Skan  struct mem_ref *refs;		/* References in the group.  */
181169689Skan  struct mem_ref_group *next;	/* Next group of references.  */
182169689Skan};
183169689Skan
184169689Skan/* Assigned to PREFETCH_BEFORE when all iterations are to be prefetched.  */
185169689Skan
186169689Skan#define PREFETCH_ALL		(~(unsigned HOST_WIDE_INT) 0)
187169689Skan
188169689Skan/* The memory reference.  */
189169689Skan
190169689Skanstruct mem_ref
191169689Skan{
192169689Skan  tree stmt;			/* Statement in that the reference appears.  */
193169689Skan  tree mem;			/* The reference.  */
194169689Skan  HOST_WIDE_INT delta;		/* Constant offset of the reference.  */
195169689Skan  bool write_p;			/* Is it a write?  */
196169689Skan  struct mem_ref_group *group;	/* The group of references it belongs to.  */
197169689Skan  unsigned HOST_WIDE_INT prefetch_mod;
198169689Skan				/* Prefetch only each PREFETCH_MOD-th
199169689Skan				   iteration.  */
200169689Skan  unsigned HOST_WIDE_INT prefetch_before;
201169689Skan				/* Prefetch only first PREFETCH_BEFORE
202169689Skan				   iterations.  */
203169689Skan  bool issue_prefetch_p;	/* Should we really issue the prefetch?  */
204169689Skan  struct mem_ref *next;		/* The next reference in the group.  */
205169689Skan};
206169689Skan
207169689Skan/* Dumps information about reference REF to FILE.  */
208169689Skan
209169689Skanstatic void
210169689Skandump_mem_ref (FILE *file, struct mem_ref *ref)
211169689Skan{
212169689Skan  fprintf (file, "Reference %p:\n", (void *) ref);
213169689Skan
214169689Skan  fprintf (file, "  group %p (base ", (void *) ref->group);
215169689Skan  print_generic_expr (file, ref->group->base, TDF_SLIM);
216169689Skan  fprintf (file, ", step ");
217169689Skan  fprintf (file, HOST_WIDE_INT_PRINT_DEC, ref->group->step);
218169689Skan  fprintf (file, ")\n");
219169689Skan
220169689Skan  fprintf (dump_file, "  delta ");
221169689Skan  fprintf (file, HOST_WIDE_INT_PRINT_DEC, ref->delta);
222169689Skan  fprintf (file, "\n");
223169689Skan
224169689Skan  fprintf (file, "  %s\n", ref->write_p ? "write" : "read");
225169689Skan
226169689Skan  fprintf (file, "\n");
227169689Skan}
228169689Skan
229169689Skan/* Finds a group with BASE and STEP in GROUPS, or creates one if it does not
230169689Skan   exist.  */
231169689Skan
232169689Skanstatic struct mem_ref_group *
233169689Skanfind_or_create_group (struct mem_ref_group **groups, tree base,
234169689Skan		      HOST_WIDE_INT step)
235169689Skan{
236169689Skan  struct mem_ref_group *group;
237169689Skan
238169689Skan  for (; *groups; groups = &(*groups)->next)
239169689Skan    {
240169689Skan      if ((*groups)->step == step
241169689Skan	  && operand_equal_p ((*groups)->base, base, 0))
242169689Skan	return *groups;
243169689Skan
244169689Skan      /* Keep the list of groups sorted by decreasing step.  */
245169689Skan      if ((*groups)->step < step)
246169689Skan	break;
247169689Skan    }
248169689Skan
249169689Skan  group = xcalloc (1, sizeof (struct mem_ref_group));
250169689Skan  group->base = base;
251169689Skan  group->step = step;
252169689Skan  group->refs = NULL;
253169689Skan  group->next = *groups;
254169689Skan  *groups = group;
255169689Skan
256169689Skan  return group;
257169689Skan}
258169689Skan
259169689Skan/* Records a memory reference MEM in GROUP with offset DELTA and write status
260169689Skan   WRITE_P.  The reference occurs in statement STMT.  */
261169689Skan
262169689Skanstatic void
263169689Skanrecord_ref (struct mem_ref_group *group, tree stmt, tree mem,
264169689Skan	    HOST_WIDE_INT delta, bool write_p)
265169689Skan{
266169689Skan  struct mem_ref **aref;
267169689Skan
268169689Skan  /* Do not record the same address twice.  */
269169689Skan  for (aref = &group->refs; *aref; aref = &(*aref)->next)
270169689Skan    {
271169689Skan      /* It does not have to be possible for write reference to reuse the read
272169689Skan	 prefetch, or vice versa.  */
273169689Skan      if (!WRITE_CAN_USE_READ_PREFETCH
274169689Skan	  && write_p
275169689Skan	  && !(*aref)->write_p)
276169689Skan	continue;
277169689Skan      if (!READ_CAN_USE_WRITE_PREFETCH
278169689Skan	  && !write_p
279169689Skan	  && (*aref)->write_p)
280169689Skan	continue;
281169689Skan
282169689Skan      if ((*aref)->delta == delta)
283169689Skan	return;
284169689Skan    }
285169689Skan
286169689Skan  (*aref) = xcalloc (1, sizeof (struct mem_ref));
287169689Skan  (*aref)->stmt = stmt;
288169689Skan  (*aref)->mem = mem;
289169689Skan  (*aref)->delta = delta;
290169689Skan  (*aref)->write_p = write_p;
291169689Skan  (*aref)->prefetch_before = PREFETCH_ALL;
292169689Skan  (*aref)->prefetch_mod = 1;
293169689Skan  (*aref)->issue_prefetch_p = false;
294169689Skan  (*aref)->group = group;
295169689Skan  (*aref)->next = NULL;
296169689Skan
297169689Skan  if (dump_file && (dump_flags & TDF_DETAILS))
298169689Skan    dump_mem_ref (dump_file, *aref);
299169689Skan}
300169689Skan
301169689Skan/* Release memory references in GROUPS.  */
302169689Skan
303169689Skanstatic void
304169689Skanrelease_mem_refs (struct mem_ref_group *groups)
305169689Skan{
306169689Skan  struct mem_ref_group *next_g;
307169689Skan  struct mem_ref *ref, *next_r;
308169689Skan
309169689Skan  for (; groups; groups = next_g)
310169689Skan    {
311169689Skan      next_g = groups->next;
312169689Skan      for (ref = groups->refs; ref; ref = next_r)
313169689Skan	{
314169689Skan	  next_r = ref->next;
315169689Skan	  free (ref);
316169689Skan	}
317169689Skan      free (groups);
318169689Skan    }
319169689Skan}
320169689Skan
321169689Skan/* A structure used to pass arguments to idx_analyze_ref.  */
322169689Skan
323169689Skanstruct ar_data
324169689Skan{
325169689Skan  struct loop *loop;			/* Loop of the reference.  */
326169689Skan  tree stmt;				/* Statement of the reference.  */
327169689Skan  HOST_WIDE_INT *step;			/* Step of the memory reference.  */
328169689Skan  HOST_WIDE_INT *delta;			/* Offset of the memory reference.  */
329169689Skan};
330169689Skan
331169689Skan/* Analyzes a single INDEX of a memory reference to obtain information
332169689Skan   described at analyze_ref.  Callback for for_each_index.  */
333169689Skan
334169689Skanstatic bool
335169689Skanidx_analyze_ref (tree base, tree *index, void *data)
336169689Skan{
337169689Skan  struct ar_data *ar_data = data;
338169689Skan  tree ibase, step, stepsize;
339169689Skan  HOST_WIDE_INT istep, idelta = 0, imult = 1;
340169689Skan  affine_iv iv;
341169689Skan
342169689Skan  if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
343169689Skan      || TREE_CODE (base) == ALIGN_INDIRECT_REF)
344169689Skan    return false;
345169689Skan
346169689Skan  if (!simple_iv (ar_data->loop, ar_data->stmt, *index, &iv, false))
347169689Skan    return false;
348169689Skan  ibase = iv.base;
349169689Skan  step = iv.step;
350169689Skan
351169689Skan  if (zero_p (step))
352169689Skan    istep = 0;
353169689Skan  else
354169689Skan    {
355169689Skan      if (!cst_and_fits_in_hwi (step))
356169689Skan	return false;
357169689Skan      istep = int_cst_value (step);
358169689Skan    }
359169689Skan
360169689Skan  if (TREE_CODE (ibase) == PLUS_EXPR
361169689Skan      && cst_and_fits_in_hwi (TREE_OPERAND (ibase, 1)))
362169689Skan    {
363169689Skan      idelta = int_cst_value (TREE_OPERAND (ibase, 1));
364169689Skan      ibase = TREE_OPERAND (ibase, 0);
365169689Skan    }
366169689Skan  if (cst_and_fits_in_hwi (ibase))
367169689Skan    {
368169689Skan      idelta += int_cst_value (ibase);
369169689Skan      ibase = build_int_cst (TREE_TYPE (ibase), 0);
370169689Skan    }
371169689Skan
372169689Skan  if (TREE_CODE (base) == ARRAY_REF)
373169689Skan    {
374169689Skan      stepsize = array_ref_element_size (base);
375169689Skan      if (!cst_and_fits_in_hwi (stepsize))
376169689Skan	return false;
377169689Skan      imult = int_cst_value (stepsize);
378169689Skan
379169689Skan      istep *= imult;
380169689Skan      idelta *= imult;
381169689Skan    }
382169689Skan
383169689Skan  *ar_data->step += istep;
384169689Skan  *ar_data->delta += idelta;
385169689Skan  *index = ibase;
386169689Skan
387169689Skan  return true;
388169689Skan}
389169689Skan
390169689Skan/* Tries to express REF_P in shape &BASE + STEP * iter + DELTA, where DELTA and
391169689Skan   STEP are integer constants and iter is number of iterations of LOOP.  The
392169689Skan   reference occurs in statement STMT.  Strips nonaddressable component
393169689Skan   references from REF_P.  */
394169689Skan
395169689Skanstatic bool
396169689Skananalyze_ref (struct loop *loop, tree *ref_p, tree *base,
397169689Skan	     HOST_WIDE_INT *step, HOST_WIDE_INT *delta,
398169689Skan	     tree stmt)
399169689Skan{
400169689Skan  struct ar_data ar_data;
401169689Skan  tree off;
402169689Skan  HOST_WIDE_INT bit_offset;
403169689Skan  tree ref = *ref_p;
404169689Skan
405169689Skan  *step = 0;
406169689Skan  *delta = 0;
407169689Skan
408169689Skan  /* First strip off the component references.  Ignore bitfields.  */
409169689Skan  if (TREE_CODE (ref) == COMPONENT_REF
410169689Skan      && DECL_NONADDRESSABLE_P (TREE_OPERAND (ref, 1)))
411169689Skan    ref = TREE_OPERAND (ref, 0);
412169689Skan
413169689Skan  *ref_p = ref;
414169689Skan
415169689Skan  for (; TREE_CODE (ref) == COMPONENT_REF; ref = TREE_OPERAND (ref, 0))
416169689Skan    {
417169689Skan      off = DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1));
418169689Skan      bit_offset = TREE_INT_CST_LOW (off);
419169689Skan      gcc_assert (bit_offset % BITS_PER_UNIT == 0);
420169689Skan
421169689Skan      *delta += bit_offset / BITS_PER_UNIT;
422169689Skan    }
423169689Skan
424169689Skan  *base = unshare_expr (ref);
425169689Skan  ar_data.loop = loop;
426169689Skan  ar_data.stmt = stmt;
427169689Skan  ar_data.step = step;
428169689Skan  ar_data.delta = delta;
429169689Skan  return for_each_index (base, idx_analyze_ref, &ar_data);
430169689Skan}
431169689Skan
432169689Skan/* Record a memory reference REF to the list REFS.  The reference occurs in
433169689Skan   LOOP in statement STMT and it is write if WRITE_P.  */
434169689Skan
435169689Skanstatic void
436169689Skangather_memory_references_ref (struct loop *loop, struct mem_ref_group **refs,
437169689Skan			      tree ref, bool write_p, tree stmt)
438169689Skan{
439169689Skan  tree base;
440169689Skan  HOST_WIDE_INT step, delta;
441169689Skan  struct mem_ref_group *agrp;
442169689Skan
443169689Skan  if (!analyze_ref (loop, &ref, &base, &step, &delta, stmt))
444169689Skan    return;
445169689Skan
446169689Skan  /* Now we know that REF = &BASE + STEP * iter + DELTA, where DELTA and STEP
447169689Skan     are integer constants.  */
448169689Skan  agrp = find_or_create_group (refs, base, step);
449169689Skan  record_ref (agrp, stmt, ref, delta, write_p);
450169689Skan}
451169689Skan
452169689Skan/* Record the suitable memory references in LOOP.  */
453169689Skan
454169689Skanstatic struct mem_ref_group *
455169689Skangather_memory_references (struct loop *loop)
456169689Skan{
457169689Skan  basic_block *body = get_loop_body_in_dom_order (loop);
458169689Skan  basic_block bb;
459169689Skan  unsigned i;
460169689Skan  block_stmt_iterator bsi;
461169689Skan  tree stmt, lhs, rhs;
462169689Skan  struct mem_ref_group *refs = NULL;
463169689Skan
464169689Skan  /* Scan the loop body in order, so that the former references precede the
465169689Skan     later ones.  */
466169689Skan  for (i = 0; i < loop->num_nodes; i++)
467169689Skan    {
468169689Skan      bb = body[i];
469169689Skan      if (bb->loop_father != loop)
470169689Skan	continue;
471169689Skan
472169689Skan      for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
473169689Skan	{
474169689Skan	  stmt = bsi_stmt (bsi);
475169689Skan	  if (TREE_CODE (stmt) != MODIFY_EXPR)
476169689Skan	    continue;
477169689Skan
478169689Skan	  lhs = TREE_OPERAND (stmt, 0);
479169689Skan	  rhs = TREE_OPERAND (stmt, 1);
480169689Skan
481169689Skan	  if (REFERENCE_CLASS_P (rhs))
482169689Skan	    gather_memory_references_ref (loop, &refs, rhs, false, stmt);
483169689Skan	  if (REFERENCE_CLASS_P (lhs))
484169689Skan	    gather_memory_references_ref (loop, &refs, lhs, true, stmt);
485169689Skan	}
486169689Skan    }
487169689Skan  free (body);
488169689Skan
489169689Skan  return refs;
490169689Skan}
491169689Skan
492169689Skan/* Prune the prefetch candidate REF using the self-reuse.  */
493169689Skan
494169689Skanstatic void
495169689Skanprune_ref_by_self_reuse (struct mem_ref *ref)
496169689Skan{
497169689Skan  HOST_WIDE_INT step = ref->group->step;
498169689Skan  bool backward = step < 0;
499169689Skan
500169689Skan  if (step == 0)
501169689Skan    {
502169689Skan      /* Prefetch references to invariant address just once.  */
503169689Skan      ref->prefetch_before = 1;
504169689Skan      return;
505169689Skan    }
506169689Skan
507169689Skan  if (backward)
508169689Skan    step = -step;
509169689Skan
510169689Skan  if (step > PREFETCH_BLOCK)
511169689Skan    return;
512169689Skan
513169689Skan  if ((backward && HAVE_BACKWARD_PREFETCH)
514169689Skan      || (!backward && HAVE_FORWARD_PREFETCH))
515169689Skan    {
516169689Skan      ref->prefetch_before = 1;
517169689Skan      return;
518169689Skan    }
519169689Skan
520169689Skan  ref->prefetch_mod = PREFETCH_BLOCK / step;
521169689Skan}
522169689Skan
523169689Skan/* Divides X by BY, rounding down.  */
524169689Skan
525169689Skanstatic HOST_WIDE_INT
526169689Skanddown (HOST_WIDE_INT x, unsigned HOST_WIDE_INT by)
527169689Skan{
528169689Skan  gcc_assert (by > 0);
529169689Skan
530169689Skan  if (x >= 0)
531169689Skan    return x / by;
532169689Skan  else
533169689Skan    return (x + by - 1) / by;
534169689Skan}
535169689Skan
536169689Skan/* Prune the prefetch candidate REF using the reuse with BY.
537169689Skan   If BY_IS_BEFORE is true, BY is before REF in the loop.  */
538169689Skan
539169689Skanstatic void
540169689Skanprune_ref_by_group_reuse (struct mem_ref *ref, struct mem_ref *by,
541169689Skan			  bool by_is_before)
542169689Skan{
543169689Skan  HOST_WIDE_INT step = ref->group->step;
544169689Skan  bool backward = step < 0;
545169689Skan  HOST_WIDE_INT delta_r = ref->delta, delta_b = by->delta;
546169689Skan  HOST_WIDE_INT delta = delta_b - delta_r;
547169689Skan  HOST_WIDE_INT hit_from;
548169689Skan  unsigned HOST_WIDE_INT prefetch_before, prefetch_block;
549169689Skan
550169689Skan  if (delta == 0)
551169689Skan    {
552169689Skan      /* If the references has the same address, only prefetch the
553169689Skan	 former.  */
554169689Skan      if (by_is_before)
555169689Skan	ref->prefetch_before = 0;
556169689Skan
557169689Skan      return;
558169689Skan    }
559169689Skan
560169689Skan  if (!step)
561169689Skan    {
562169689Skan      /* If the reference addresses are invariant and fall into the
563169689Skan	 same cache line, prefetch just the first one.  */
564169689Skan      if (!by_is_before)
565169689Skan	return;
566169689Skan
567169689Skan      if (ddown (ref->delta, PREFETCH_BLOCK)
568169689Skan	  != ddown (by->delta, PREFETCH_BLOCK))
569169689Skan	return;
570169689Skan
571169689Skan      ref->prefetch_before = 0;
572169689Skan      return;
573169689Skan    }
574169689Skan
575169689Skan  /* Only prune the reference that is behind in the array.  */
576169689Skan  if (backward)
577169689Skan    {
578169689Skan      if (delta > 0)
579169689Skan	return;
580169689Skan
581169689Skan      /* Transform the data so that we may assume that the accesses
582169689Skan	 are forward.  */
583169689Skan      delta = - delta;
584169689Skan      step = -step;
585169689Skan      delta_r = PREFETCH_BLOCK - 1 - delta_r;
586169689Skan      delta_b = PREFETCH_BLOCK - 1 - delta_b;
587169689Skan    }
588169689Skan  else
589169689Skan    {
590169689Skan      if (delta < 0)
591169689Skan	return;
592169689Skan    }
593169689Skan
594169689Skan  /* Check whether the two references are likely to hit the same cache
595169689Skan     line, and how distant the iterations in that it occurs are from
596169689Skan     each other.  */
597169689Skan
598169689Skan  if (step <= PREFETCH_BLOCK)
599169689Skan    {
600169689Skan      /* The accesses are sure to meet.  Let us check when.  */
601169689Skan      hit_from = ddown (delta_b, PREFETCH_BLOCK) * PREFETCH_BLOCK;
602169689Skan      prefetch_before = (hit_from - delta_r + step - 1) / step;
603169689Skan
604169689Skan      if (prefetch_before < ref->prefetch_before)
605169689Skan	ref->prefetch_before = prefetch_before;
606169689Skan
607169689Skan      return;
608169689Skan    }
609169689Skan
610169689Skan  /* A more complicated case.  First let us ensure that size of cache line
611169689Skan     and step are coprime (here we assume that PREFETCH_BLOCK is a power
612169689Skan     of two.  */
613169689Skan  prefetch_block = PREFETCH_BLOCK;
614169689Skan  while ((step & 1) == 0
615169689Skan	 && prefetch_block > 1)
616169689Skan    {
617169689Skan      step >>= 1;
618169689Skan      prefetch_block >>= 1;
619169689Skan      delta >>= 1;
620169689Skan    }
621169689Skan
622169689Skan  /* Now step > prefetch_block, and step and prefetch_block are coprime.
623169689Skan     Determine the probability that the accesses hit the same cache line.  */
624169689Skan
625169689Skan  prefetch_before = delta / step;
626169689Skan  delta %= step;
627169689Skan  if ((unsigned HOST_WIDE_INT) delta
628169689Skan      <= (prefetch_block * ACCEPTABLE_MISS_RATE / 1000))
629169689Skan    {
630169689Skan      if (prefetch_before < ref->prefetch_before)
631169689Skan	ref->prefetch_before = prefetch_before;
632169689Skan
633169689Skan      return;
634169689Skan    }
635169689Skan
636169689Skan  /* Try also the following iteration.  */
637169689Skan  prefetch_before++;
638169689Skan  delta = step - delta;
639169689Skan  if ((unsigned HOST_WIDE_INT) delta
640169689Skan      <= (prefetch_block * ACCEPTABLE_MISS_RATE / 1000))
641169689Skan    {
642169689Skan      if (prefetch_before < ref->prefetch_before)
643169689Skan	ref->prefetch_before = prefetch_before;
644169689Skan
645169689Skan      return;
646169689Skan    }
647169689Skan
648169689Skan  /* The ref probably does not reuse by.  */
649169689Skan  return;
650169689Skan}
651169689Skan
652169689Skan/* Prune the prefetch candidate REF using the reuses with other references
653169689Skan   in REFS.  */
654169689Skan
655169689Skanstatic void
656169689Skanprune_ref_by_reuse (struct mem_ref *ref, struct mem_ref *refs)
657169689Skan{
658169689Skan  struct mem_ref *prune_by;
659169689Skan  bool before = true;
660169689Skan
661169689Skan  prune_ref_by_self_reuse (ref);
662169689Skan
663169689Skan  for (prune_by = refs; prune_by; prune_by = prune_by->next)
664169689Skan    {
665169689Skan      if (prune_by == ref)
666169689Skan	{
667169689Skan	  before = false;
668169689Skan	  continue;
669169689Skan	}
670169689Skan
671169689Skan      if (!WRITE_CAN_USE_READ_PREFETCH
672169689Skan	  && ref->write_p
673169689Skan	  && !prune_by->write_p)
674169689Skan	continue;
675169689Skan      if (!READ_CAN_USE_WRITE_PREFETCH
676169689Skan	  && !ref->write_p
677169689Skan	  && prune_by->write_p)
678169689Skan	continue;
679169689Skan
680169689Skan      prune_ref_by_group_reuse (ref, prune_by, before);
681169689Skan    }
682169689Skan}
683169689Skan
684169689Skan/* Prune the prefetch candidates in GROUP using the reuse analysis.  */
685169689Skan
686169689Skanstatic void
687169689Skanprune_group_by_reuse (struct mem_ref_group *group)
688169689Skan{
689169689Skan  struct mem_ref *ref_pruned;
690169689Skan
691169689Skan  for (ref_pruned = group->refs; ref_pruned; ref_pruned = ref_pruned->next)
692169689Skan    {
693169689Skan      prune_ref_by_reuse (ref_pruned, group->refs);
694169689Skan
695169689Skan      if (dump_file && (dump_flags & TDF_DETAILS))
696169689Skan	{
697169689Skan	  fprintf (dump_file, "Reference %p:", (void *) ref_pruned);
698169689Skan
699169689Skan	  if (ref_pruned->prefetch_before == PREFETCH_ALL
700169689Skan	      && ref_pruned->prefetch_mod == 1)
701169689Skan	    fprintf (dump_file, " no restrictions");
702169689Skan	  else if (ref_pruned->prefetch_before == 0)
703169689Skan	    fprintf (dump_file, " do not prefetch");
704169689Skan	  else if (ref_pruned->prefetch_before <= ref_pruned->prefetch_mod)
705169689Skan	    fprintf (dump_file, " prefetch once");
706169689Skan	  else
707169689Skan	    {
708169689Skan	      if (ref_pruned->prefetch_before != PREFETCH_ALL)
709169689Skan		{
710169689Skan		  fprintf (dump_file, " prefetch before ");
711169689Skan		  fprintf (dump_file, HOST_WIDE_INT_PRINT_DEC,
712169689Skan			   ref_pruned->prefetch_before);
713169689Skan		}
714169689Skan	      if (ref_pruned->prefetch_mod != 1)
715169689Skan		{
716169689Skan		  fprintf (dump_file, " prefetch mod ");
717169689Skan		  fprintf (dump_file, HOST_WIDE_INT_PRINT_DEC,
718169689Skan			   ref_pruned->prefetch_mod);
719169689Skan		}
720169689Skan	    }
721169689Skan	  fprintf (dump_file, "\n");
722169689Skan	}
723169689Skan    }
724169689Skan}
725169689Skan
726169689Skan/* Prune the list of prefetch candidates GROUPS using the reuse analysis.  */
727169689Skan
728169689Skanstatic void
729169689Skanprune_by_reuse (struct mem_ref_group *groups)
730169689Skan{
731169689Skan  for (; groups; groups = groups->next)
732169689Skan    prune_group_by_reuse (groups);
733169689Skan}
734169689Skan
735169689Skan/* Returns true if we should issue prefetch for REF.  */
736169689Skan
737169689Skanstatic bool
738169689Skanshould_issue_prefetch_p (struct mem_ref *ref)
739169689Skan{
740169689Skan  /* For now do not issue prefetches for only first few of the
741169689Skan     iterations.  */
742169689Skan  if (ref->prefetch_before != PREFETCH_ALL)
743169689Skan    return false;
744169689Skan
745169689Skan  return true;
746169689Skan}
747169689Skan
748169689Skan/* Decide which of the prefetch candidates in GROUPS to prefetch.
749169689Skan   AHEAD is the number of iterations to prefetch ahead (which corresponds
750169689Skan   to the number of simultaneous instances of one prefetch running at a
751169689Skan   time).  UNROLL_FACTOR is the factor by that the loop is going to be
752169689Skan   unrolled.  Returns true if there is anything to prefetch.  */
753169689Skan
754169689Skanstatic bool
755169689Skanschedule_prefetches (struct mem_ref_group *groups, unsigned unroll_factor,
756169689Skan		     unsigned ahead)
757169689Skan{
758169689Skan  unsigned max_prefetches, n_prefetches;
759169689Skan  struct mem_ref *ref;
760169689Skan  bool any = false;
761169689Skan
762169689Skan  max_prefetches = (SIMULTANEOUS_PREFETCHES * unroll_factor) / ahead;
763169689Skan  if (max_prefetches > (unsigned) SIMULTANEOUS_PREFETCHES)
764169689Skan    max_prefetches = SIMULTANEOUS_PREFETCHES;
765169689Skan
766169689Skan  if (dump_file && (dump_flags & TDF_DETAILS))
767169689Skan    fprintf (dump_file, "Max prefetches to issue: %d.\n", max_prefetches);
768169689Skan
769169689Skan  if (!max_prefetches)
770169689Skan    return false;
771169689Skan
772169689Skan  /* For now we just take memory references one by one and issue
773169689Skan     prefetches for as many as possible.  The groups are sorted
774169689Skan     starting with the largest step, since the references with
775169689Skan     large step are more likely to cause many cache misses.  */
776169689Skan
777169689Skan  for (; groups; groups = groups->next)
778169689Skan    for (ref = groups->refs; ref; ref = ref->next)
779169689Skan      {
780169689Skan	if (!should_issue_prefetch_p (ref))
781169689Skan	  continue;
782169689Skan
783169689Skan	ref->issue_prefetch_p = true;
784169689Skan
785169689Skan	/* If prefetch_mod is less then unroll_factor, we need to insert
786169689Skan	   several prefetches for the reference.  */
787169689Skan	n_prefetches = ((unroll_factor + ref->prefetch_mod - 1)
788169689Skan			/ ref->prefetch_mod);
789169689Skan	if (max_prefetches <= n_prefetches)
790169689Skan	  return true;
791169689Skan
792169689Skan	max_prefetches -= n_prefetches;
793169689Skan	any = true;
794169689Skan      }
795169689Skan
796169689Skan  return any;
797169689Skan}
798169689Skan
799169689Skan/* Determine whether there is any reference suitable for prefetching
800169689Skan   in GROUPS.  */
801169689Skan
802169689Skanstatic bool
803169689Skananything_to_prefetch_p (struct mem_ref_group *groups)
804169689Skan{
805169689Skan  struct mem_ref *ref;
806169689Skan
807169689Skan  for (; groups; groups = groups->next)
808169689Skan    for (ref = groups->refs; ref; ref = ref->next)
809169689Skan      if (should_issue_prefetch_p (ref))
810169689Skan	return true;
811169689Skan
812169689Skan  return false;
813169689Skan}
814169689Skan
815169689Skan/* Issue prefetches for the reference REF into loop as decided before.
816169689Skan   HEAD is the number of iterations to prefetch ahead.  UNROLL_FACTOR
817169689Skan   is the factor by which LOOP was unrolled.  */
818169689Skan
819169689Skanstatic void
820169689Skanissue_prefetch_ref (struct mem_ref *ref, unsigned unroll_factor, unsigned ahead)
821169689Skan{
822169689Skan  HOST_WIDE_INT delta;
823169689Skan  tree addr, addr_base, prefetch, params, write_p;
824169689Skan  block_stmt_iterator bsi;
825169689Skan  unsigned n_prefetches, ap;
826169689Skan
827169689Skan  if (dump_file && (dump_flags & TDF_DETAILS))
828169689Skan    fprintf (dump_file, "Issued prefetch for %p.\n", (void *) ref);
829169689Skan
830169689Skan  bsi = bsi_for_stmt (ref->stmt);
831169689Skan
832169689Skan  n_prefetches = ((unroll_factor + ref->prefetch_mod - 1)
833169689Skan		  / ref->prefetch_mod);
834169689Skan  addr_base = build_fold_addr_expr_with_type (ref->mem, ptr_type_node);
835169689Skan  addr_base = force_gimple_operand_bsi (&bsi, unshare_expr (addr_base), true, NULL);
836169689Skan
837169689Skan  for (ap = 0; ap < n_prefetches; ap++)
838169689Skan    {
839169689Skan      /* Determine the address to prefetch.  */
840169689Skan      delta = (ahead + ap * ref->prefetch_mod) * ref->group->step;
841169689Skan      addr = fold_build2 (PLUS_EXPR, ptr_type_node,
842169689Skan			  addr_base, build_int_cst (ptr_type_node, delta));
843169689Skan      addr = force_gimple_operand_bsi (&bsi, unshare_expr (addr), true, NULL);
844169689Skan
845169689Skan      /* Create the prefetch instruction.  */
846169689Skan      write_p = ref->write_p ? integer_one_node : integer_zero_node;
847169689Skan      params = tree_cons (NULL_TREE, addr,
848169689Skan			  tree_cons (NULL_TREE, write_p, NULL_TREE));
849169689Skan
850169689Skan      prefetch = build_function_call_expr (built_in_decls[BUILT_IN_PREFETCH],
851169689Skan					   params);
852169689Skan      bsi_insert_before (&bsi, prefetch, BSI_SAME_STMT);
853169689Skan    }
854169689Skan}
855169689Skan
856169689Skan/* Issue prefetches for the references in GROUPS into loop as decided before.
857169689Skan   HEAD is the number of iterations to prefetch ahead.  UNROLL_FACTOR is the
858169689Skan   factor by that LOOP was unrolled.  */
859169689Skan
860169689Skanstatic void
861169689Skanissue_prefetches (struct mem_ref_group *groups,
862169689Skan		  unsigned unroll_factor, unsigned ahead)
863169689Skan{
864169689Skan  struct mem_ref *ref;
865169689Skan
866169689Skan  for (; groups; groups = groups->next)
867169689Skan    for (ref = groups->refs; ref; ref = ref->next)
868169689Skan      if (ref->issue_prefetch_p)
869169689Skan	issue_prefetch_ref (ref, unroll_factor, ahead);
870169689Skan}
871169689Skan
872169689Skan/* Determines whether we can profitably unroll LOOP FACTOR times, and if
873169689Skan   this is the case, fill in DESC by the description of number of
874169689Skan   iterations.  */
875169689Skan
876169689Skanstatic bool
877169689Skanshould_unroll_loop_p (struct loop *loop, struct tree_niter_desc *desc,
878169689Skan		      unsigned factor)
879169689Skan{
880169689Skan  if (!can_unroll_loop_p (loop, factor, desc))
881169689Skan    return false;
882169689Skan
883169689Skan  /* We only consider loops without control flow for unrolling.  This is not
884169689Skan     a hard restriction -- tree_unroll_loop works with arbitrary loops
885169689Skan     as well; but the unrolling/prefetching is usually more profitable for
886169689Skan     loops consisting of a single basic block, and we want to limit the
887169689Skan     code growth.  */
888169689Skan  if (loop->num_nodes > 2)
889169689Skan    return false;
890169689Skan
891169689Skan  return true;
892169689Skan}
893169689Skan
894169689Skan/* Determine the coefficient by that unroll LOOP, from the information
895169689Skan   contained in the list of memory references REFS.  Description of
896169689Skan   umber of iterations of LOOP is stored to DESC.  AHEAD is the number
897169689Skan   of iterations ahead that we need to prefetch.  NINSNS is number of
898169689Skan   insns of the LOOP.  */
899169689Skan
900169689Skanstatic unsigned
901169689Skandetermine_unroll_factor (struct loop *loop, struct mem_ref_group *refs,
902169689Skan			 unsigned ahead, unsigned ninsns,
903169689Skan			 struct tree_niter_desc *desc)
904169689Skan{
905169689Skan  unsigned upper_bound, size_factor, constraint_factor;
906169689Skan  unsigned factor, max_mod_constraint, ahead_factor;
907169689Skan  struct mem_ref_group *agp;
908169689Skan  struct mem_ref *ref;
909169689Skan
910169689Skan  upper_bound = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
911169689Skan
912169689Skan  /* First check whether the loop is not too large to unroll.  */
913169689Skan  size_factor = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / ninsns;
914169689Skan  if (size_factor <= 1)
915169689Skan    return 1;
916169689Skan
917169689Skan  if (size_factor < upper_bound)
918169689Skan    upper_bound = size_factor;
919169689Skan
920169689Skan  max_mod_constraint = 1;
921169689Skan  for (agp = refs; agp; agp = agp->next)
922169689Skan    for (ref = agp->refs; ref; ref = ref->next)
923169689Skan      if (should_issue_prefetch_p (ref)
924169689Skan	  && ref->prefetch_mod > max_mod_constraint)
925169689Skan	max_mod_constraint = ref->prefetch_mod;
926169689Skan
927169689Skan  /* Set constraint_factor as large as needed to be able to satisfy the
928169689Skan     largest modulo constraint.  */
929169689Skan  constraint_factor = max_mod_constraint;
930169689Skan
931169689Skan  /* If ahead is too large in comparison with the number of available
932169689Skan     prefetches, unroll the loop as much as needed to be able to prefetch
933169689Skan     at least partially some of the references in the loop.  */
934169689Skan  ahead_factor = ((ahead + SIMULTANEOUS_PREFETCHES - 1)
935169689Skan		  / SIMULTANEOUS_PREFETCHES);
936169689Skan
937169689Skan  /* Unroll as much as useful, but bound the code size growth.  */
938169689Skan  if (constraint_factor < ahead_factor)
939169689Skan    factor = ahead_factor;
940169689Skan  else
941169689Skan    factor = constraint_factor;
942169689Skan  if (factor > upper_bound)
943169689Skan    factor = upper_bound;
944169689Skan
945169689Skan  if (!should_unroll_loop_p (loop, desc, factor))
946169689Skan    return 1;
947169689Skan
948169689Skan  return factor;
949169689Skan}
950169689Skan
951169689Skan/* Issue prefetch instructions for array references in LOOP.  Returns
952169689Skan   true if the LOOP was unrolled.  LOOPS is the array containing all
953169689Skan   loops.  */
954169689Skan
955169689Skanstatic bool
956169689Skanloop_prefetch_arrays (struct loops *loops, struct loop *loop)
957169689Skan{
958169689Skan  struct mem_ref_group *refs;
959169689Skan  unsigned ahead, ninsns, unroll_factor;
960169689Skan  struct tree_niter_desc desc;
961169689Skan  bool unrolled = false;
962169689Skan
963169689Skan  /* Step 1: gather the memory references.  */
964169689Skan  refs = gather_memory_references (loop);
965169689Skan
966169689Skan  /* Step 2: estimate the reuse effects.  */
967169689Skan  prune_by_reuse (refs);
968169689Skan
969169689Skan  if (!anything_to_prefetch_p (refs))
970169689Skan    goto fail;
971169689Skan
972169689Skan  /* Step 3: determine the ahead and unroll factor.  */
973169689Skan
974169689Skan  /* FIXME: We should use not size of the loop, but the average number of
975169689Skan     instructions executed per iteration of the loop.  */
976169689Skan  ninsns = tree_num_loop_insns (loop);
977169689Skan  ahead = (PREFETCH_LATENCY + ninsns - 1) / ninsns;
978169689Skan  unroll_factor = determine_unroll_factor (loop, refs, ahead, ninsns,
979169689Skan					   &desc);
980169689Skan  if (dump_file && (dump_flags & TDF_DETAILS))
981169689Skan    fprintf (dump_file, "Ahead %d, unroll factor %d\n", ahead, unroll_factor);
982169689Skan
983169689Skan  /* If the loop rolls less than the required unroll factor, prefetching
984169689Skan     is useless.  */
985169689Skan  if (unroll_factor > 1
986169689Skan      && cst_and_fits_in_hwi (desc.niter)
987169689Skan      && (unsigned HOST_WIDE_INT) int_cst_value (desc.niter) < unroll_factor)
988169689Skan    goto fail;
989169689Skan
990169689Skan  /* Step 4: what to prefetch?  */
991169689Skan  if (!schedule_prefetches (refs, unroll_factor, ahead))
992169689Skan    goto fail;
993169689Skan
994169689Skan  /* Step 5: unroll the loop.  TODO -- peeling of first and last few
995169689Skan     iterations so that we do not issue superfluous prefetches.  */
996169689Skan  if (unroll_factor != 1)
997169689Skan    {
998169689Skan      tree_unroll_loop (loops, loop, unroll_factor,
999169689Skan			single_dom_exit (loop), &desc);
1000169689Skan      unrolled = true;
1001169689Skan    }
1002169689Skan
1003169689Skan  /* Step 6: issue the prefetches.  */
1004169689Skan  issue_prefetches (refs, unroll_factor, ahead);
1005169689Skan
1006169689Skanfail:
1007169689Skan  release_mem_refs (refs);
1008169689Skan  return unrolled;
1009169689Skan}
1010169689Skan
1011169689Skan/* Issue prefetch instructions for array references in LOOPS.  */
1012169689Skan
1013169689Skanunsigned int
1014169689Skantree_ssa_prefetch_arrays (struct loops *loops)
1015169689Skan{
1016169689Skan  unsigned i;
1017169689Skan  struct loop *loop;
1018169689Skan  bool unrolled = false;
1019169689Skan  int todo_flags = 0;
1020169689Skan
1021169689Skan  if (!HAVE_prefetch
1022169689Skan      /* It is possible to ask compiler for say -mtune=i486 -march=pentium4.
1023169689Skan	 -mtune=i486 causes us having PREFETCH_BLOCK 0, since this is part
1024169689Skan	 of processor costs and i486 does not have prefetch, but
1025169689Skan	 -march=pentium4 causes HAVE_prefetch to be true.  Ugh.  */
1026169689Skan      || PREFETCH_BLOCK == 0)
1027169689Skan    return 0;
1028169689Skan
1029169689Skan  initialize_original_copy_tables ();
1030169689Skan
1031169689Skan  if (!built_in_decls[BUILT_IN_PREFETCH])
1032169689Skan    {
1033169689Skan      tree type = build_function_type (void_type_node,
1034169689Skan				       tree_cons (NULL_TREE,
1035169689Skan						  const_ptr_type_node,
1036169689Skan						  NULL_TREE));
1037169689Skan      tree decl = lang_hooks.builtin_function ("__builtin_prefetch", type,
1038169689Skan			BUILT_IN_PREFETCH, BUILT_IN_NORMAL,
1039169689Skan			NULL, NULL_TREE);
1040169689Skan      DECL_IS_NOVOPS (decl) = true;
1041169689Skan      built_in_decls[BUILT_IN_PREFETCH] = decl;
1042169689Skan    }
1043169689Skan
1044169689Skan  /* We assume that size of cache line is a power of two, so verify this
1045169689Skan     here.  */
1046169689Skan  gcc_assert ((PREFETCH_BLOCK & (PREFETCH_BLOCK - 1)) == 0);
1047169689Skan
1048169689Skan  for (i = loops->num - 1; i > 0; i--)
1049169689Skan    {
1050169689Skan      loop = loops->parray[i];
1051169689Skan
1052169689Skan      if (dump_file && (dump_flags & TDF_DETAILS))
1053169689Skan	fprintf (dump_file, "Processing loop %d:\n", loop->num);
1054169689Skan
1055169689Skan      if (loop)
1056169689Skan	unrolled |= loop_prefetch_arrays (loops, loop);
1057169689Skan
1058169689Skan      if (dump_file && (dump_flags & TDF_DETAILS))
1059169689Skan	fprintf (dump_file, "\n\n");
1060169689Skan    }
1061169689Skan
1062169689Skan  if (unrolled)
1063169689Skan    {
1064169689Skan      scev_reset ();
1065169689Skan      todo_flags |= TODO_cleanup_cfg;
1066169689Skan    }
1067169689Skan
1068169689Skan  free_original_copy_tables ();
1069169689Skan  return todo_flags;
1070169689Skan}
1071