1/* Interchange heuristics and transform for loop interchange on
2   polyhedral representation.
3
4   Copyright (C) 2009-2015 Free Software Foundation, Inc.
5   Contributed by Sebastian Pop <sebastian.pop@amd.com> and
6   Harsha Jagasia <harsha.jagasia@amd.com>.
7
8This file is part of GCC.
9
10GCC is free software; you can redistribute it and/or modify
11it under the terms of the GNU General Public License as published by
12the Free Software Foundation; either version 3, or (at your option)
13any later version.
14
15GCC is distributed in the hope that it will be useful,
16but WITHOUT ANY WARRANTY; without even the implied warranty of
17MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18GNU General Public License for more details.
19
20You should have received a copy of the GNU General Public License
21along with GCC; see the file COPYING3.  If not see
22<http://www.gnu.org/licenses/>.  */
23
24#include "config.h"
25
26#ifdef HAVE_isl
27#include <isl/constraint.h>
28#include <isl/aff.h>
29#include <isl/set.h>
30#include <isl/map.h>
31#include <isl/union_map.h>
32#include <isl/ilp.h>
33#include <isl/val.h>
34
35/* Since ISL-0.13, the extern is in val_gmp.h.  */
36#if !defined(HAVE_ISL_SCHED_CONSTRAINTS_COMPUTE_SCHEDULE) && defined(__cplusplus)
37extern "C" {
38#endif
39#include <isl/val_gmp.h>
40#if !defined(HAVE_ISL_SCHED_CONSTRAINTS_COMPUTE_SCHEDULE) && defined(__cplusplus)
41}
42#endif
43#endif
44
45#include "system.h"
46#include "coretypes.h"
47#include "hash-set.h"
48#include "machmode.h"
49#include "vec.h"
50#include "double-int.h"
51#include "input.h"
52#include "alias.h"
53#include "symtab.h"
54#include "options.h"
55#include "wide-int.h"
56#include "inchash.h"
57#include "tree.h"
58#include "fold-const.h"
59#include "predict.h"
60#include "tm.h"
61#include "hard-reg-set.h"
62#include "input.h"
63#include "function.h"
64#include "dominance.h"
65#include "cfg.h"
66#include "basic-block.h"
67#include "tree-ssa-alias.h"
68#include "internal-fn.h"
69#include "gimple-expr.h"
70#include "is-a.h"
71#include "gimple.h"
72#include "gimple-iterator.h"
73#include "tree-ssa-loop.h"
74#include "dumpfile.h"
75#include "cfgloop.h"
76#include "tree-chrec.h"
77#include "tree-data-ref.h"
78#include "tree-scalar-evolution.h"
79#include "sese.h"
80
81#ifdef HAVE_isl
82#include "graphite-poly.h"
83
84/* XXX isl rewrite following comment */
85/* Builds a linear expression, of dimension DIM, representing PDR's
86   memory access:
87
88   L = r_{n}*r_{n-1}*...*r_{1}*s_{0} + ... + r_{n}*s_{n-1} + s_{n}.
89
90   For an array A[10][20] with two subscript locations s0 and s1, the
91   linear memory access is 20 * s0 + s1: a stride of 1 in subscript s0
92   corresponds to a memory stride of 20.
93
94   OFFSET is a number of dimensions to prepend before the
95   subscript dimensions: s_0, s_1, ..., s_n.
96
97   Thus, the final linear expression has the following format:
98   0 .. 0_{offset} | 0 .. 0_{nit} | 0 .. 0_{gd} | 0 | c_0 c_1 ... c_n
99   where the expression itself is:
100   c_0 * s_0 + c_1 * s_1 + ... c_n * s_n.  */
101
102static isl_constraint *
103build_linearized_memory_access (isl_map *map, poly_dr_p pdr)
104{
105  isl_constraint *res;
106  isl_local_space *ls = isl_local_space_from_space (isl_map_get_space (map));
107  unsigned offset, nsubs;
108  int i;
109  isl_ctx *ctx;
110
111  isl_val *size, *subsize, *size1;
112
113  res = isl_equality_alloc (ls);
114  ctx = isl_local_space_get_ctx (ls);
115  size = isl_val_int_from_ui (ctx, 1);
116
117  nsubs = isl_set_dim (pdr->extent, isl_dim_set);
118  /* -1 for the already included L dimension.  */
119  offset = isl_map_dim (map, isl_dim_out) - 1 - nsubs;
120  res = isl_constraint_set_coefficient_si (res, isl_dim_out, offset + nsubs, -1);
121  /* Go through all subscripts from last to first.  First dimension
122     is the alias set, ignore it.  */
123  for (i = nsubs - 1; i >= 1; i--)
124    {
125      isl_space *dc;
126      isl_aff *aff;
127
128      size1 = isl_val_copy (size);
129      res = isl_constraint_set_coefficient_val (res, isl_dim_out, offset + i, size);
130      dc = isl_set_get_space (pdr->extent);
131      aff = isl_aff_zero_on_domain (isl_local_space_from_space (dc));
132      aff = isl_aff_set_coefficient_si (aff, isl_dim_in, i, 1);
133      subsize = isl_set_max_val (pdr->extent, aff);
134      isl_aff_free (aff);
135      size = isl_val_mul (size1, subsize);
136    }
137
138  isl_val_free (size);
139
140  return res;
141}
142
143/* Set STRIDE to the stride of PDR in memory by advancing by one in
144   the loop at DEPTH.  */
145
146static void
147pdr_stride_in_loop (mpz_t stride, graphite_dim_t depth, poly_dr_p pdr)
148{
149  poly_bb_p pbb = PDR_PBB (pdr);
150  isl_map *map;
151  isl_set *set;
152  isl_aff *aff;
153  isl_space *dc;
154  isl_constraint *lma, *c;
155  isl_val *islstride;
156  graphite_dim_t time_depth;
157  unsigned offset, nt;
158  unsigned i;
159  /* XXX isl rewrite following comments.  */
160  /* Builds a partial difference equations and inserts them
161     into pointset powerset polyhedron P.  Polyhedron is assumed
162     to have the format: T|I|T'|I'|G|S|S'|l1|l2.
163
164     TIME_DEPTH is the time dimension w.r.t. which we are
165     differentiating.
166     OFFSET represents the number of dimensions between
167     columns t_{time_depth} and t'_{time_depth}.
168     DIM_SCTR is the number of scattering dimensions.  It is
169     essentially the dimensionality of the T vector.
170
171     The following equations are inserted into the polyhedron P:
172     | t_1 = t_1'
173     | ...
174     | t_{time_depth-1} = t'_{time_depth-1}
175     | t_{time_depth} = t'_{time_depth} + 1
176     | t_{time_depth+1} = t'_{time_depth + 1}
177     | ...
178     | t_{dim_sctr} = t'_{dim_sctr}.  */
179
180  /* Add the equality: t_{time_depth} = t'_{time_depth} + 1.
181     This is the core part of this alogrithm, since this
182     constraint asks for the memory access stride (difference)
183     between two consecutive points in time dimensions.  */
184
185  /* Add equalities:
186     | t1 = t1'
187     | ...
188     | t_{time_depth-1} = t'_{time_depth-1}
189     | t_{time_depth+1} = t'_{time_depth+1}
190     | ...
191     | t_{dim_sctr} = t'_{dim_sctr}
192
193     This means that all the time dimensions are equal except for
194     time_depth, where the constraint is t_{depth} = t'_{depth} + 1
195     step.  More to this: we should be careful not to add equalities
196     to the 'coupled' dimensions, which happens when the one dimension
197     is stripmined dimension, and the other dimension corresponds
198     to the point loop inside stripmined dimension.  */
199
200  /* pdr->accesses:    [P1..nb_param,I1..nb_domain]->[a,S1..nb_subscript]
201          ??? [P] not used for PDRs?
202     pdr->extent:      [a,S1..nb_subscript]
203     pbb->domain:      [P1..nb_param,I1..nb_domain]
204     pbb->transformed: [P1..nb_param,I1..nb_domain]->[T1..Tnb_sctr]
205          [T] includes local vars (currently unused)
206
207     First we create [P,I] -> [T,a,S].  */
208
209  map = isl_map_flat_range_product (isl_map_copy (pbb->transformed),
210				    isl_map_copy (pdr->accesses));
211  /* Add a dimension for L: [P,I] -> [T,a,S,L].*/
212  map = isl_map_add_dims (map, isl_dim_out, 1);
213  /* Build a constraint for "lma[S] - L == 0", effectively calculating
214     L in terms of subscripts.  */
215  lma = build_linearized_memory_access (map, pdr);
216  /* And add it to the map, so we now have:
217     [P,I] -> [T,a,S,L] : lma([S]) == L.  */
218  map = isl_map_add_constraint (map, lma);
219
220  /* Then we create  [P,I,P',I'] -> [T,a,S,L,T',a',S',L'].  */
221  map = isl_map_flat_product (map, isl_map_copy (map));
222
223  /* Now add the equality T[time_depth] == T'[time_depth]+1.  This will
224     force L' to be the linear address at T[time_depth] + 1. */
225  time_depth = psct_dynamic_dim (pbb, depth);
226  /* Length of [a,S] plus [L] ...  */
227  offset = 1 + isl_map_dim (pdr->accesses, isl_dim_out);
228  /* ... plus [T].  */
229  offset += isl_map_dim (pbb->transformed, isl_dim_out);
230
231  c = isl_equality_alloc (isl_local_space_from_space (isl_map_get_space (map)));
232  c = isl_constraint_set_coefficient_si (c, isl_dim_out, time_depth, 1);
233  c = isl_constraint_set_coefficient_si (c, isl_dim_out,
234					 offset + time_depth, -1);
235  c = isl_constraint_set_constant_si (c, 1);
236  map = isl_map_add_constraint (map, c);
237
238  /* Now we equate most of the T/T' elements (making PITaSL nearly
239     the same is (PITaSL)', except for one dimension, namely for 'depth'
240     (an index into [I]), after translating to index into [T].  Take care
241     to not produce an empty map, which indicates we wanted to equate
242     two dimensions that are already coupled via the above time_depth
243     dimension.  Happens with strip mining where several scatter dimension
244     are interdependend.  */
245  /* Length of [T].  */
246  nt = pbb_nb_scattering_transform (pbb) + pbb_nb_local_vars (pbb);
247  for (i = 0; i < nt; i++)
248    if (i != time_depth)
249      {
250	isl_map *temp = isl_map_equate (isl_map_copy (map),
251					isl_dim_out, i,
252					isl_dim_out, offset + i);
253	if (isl_map_is_empty (temp))
254	  isl_map_free (temp);
255	else
256	  {
257	    isl_map_free (map);
258	    map = temp;
259	  }
260      }
261
262  /* Now maximize the expression L' - L.  */
263  set = isl_map_range (map);
264  dc = isl_set_get_space (set);
265  aff = isl_aff_zero_on_domain (isl_local_space_from_space (dc));
266  aff = isl_aff_set_coefficient_si (aff, isl_dim_in, offset - 1, -1);
267  aff = isl_aff_set_coefficient_si (aff, isl_dim_in, offset + offset - 1, 1);
268  islstride = isl_set_max_val (set, aff);
269  isl_val_get_num_gmp (islstride, stride);
270  isl_val_free (islstride);
271  isl_aff_free (aff);
272  isl_set_free (set);
273
274  if (dump_file && (dump_flags & TDF_DETAILS))
275    {
276      gmp_fprintf (dump_file, "\nStride in BB_%d, DR_%d, depth %d:  %Zd ",
277		   pbb_index (pbb), PDR_ID (pdr), (int) depth, stride);
278    }
279}
280
281/* Sets STRIDES to the sum of all the strides of the data references
282   accessed in LOOP at DEPTH.  */
283
284static void
285memory_strides_in_loop_1 (lst_p loop, graphite_dim_t depth, mpz_t strides)
286{
287  int i, j;
288  lst_p l;
289  poly_dr_p pdr;
290  mpz_t s, n;
291
292  mpz_init (s);
293  mpz_init (n);
294
295  FOR_EACH_VEC_ELT (LST_SEQ (loop), j, l)
296    if (LST_LOOP_P (l))
297      memory_strides_in_loop_1 (l, depth, strides);
298    else
299      FOR_EACH_VEC_ELT (PBB_DRS (LST_PBB (l)), i, pdr)
300	{
301	  pdr_stride_in_loop (s, depth, pdr);
302	  mpz_set_si (n, PDR_NB_REFS (pdr));
303	  mpz_mul (s, s, n);
304	  mpz_add (strides, strides, s);
305	}
306
307  mpz_clear (s);
308  mpz_clear (n);
309}
310
311/* Sets STRIDES to the sum of all the strides of the data references
312   accessed in LOOP at DEPTH.  */
313
314static void
315memory_strides_in_loop (lst_p loop, graphite_dim_t depth, mpz_t strides)
316{
317  if (mpz_cmp_si (loop->memory_strides, -1) == 0)
318    {
319      mpz_set_si (strides, 0);
320      memory_strides_in_loop_1 (loop, depth, strides);
321    }
322  else
323    mpz_set (strides, loop->memory_strides);
324}
325
326/* Return true when the interchange of loops LOOP1 and LOOP2 is
327   profitable.
328
329   Example:
330
331   | int a[100][100];
332   |
333   | int
334   | foo (int N)
335   | {
336   |   int j;
337   |   int i;
338   |
339   |   for (i = 0; i < N; i++)
340   |     for (j = 0; j < N; j++)
341   |       a[j][2 * i] += 1;
342   |
343   |   return a[N][12];
344   | }
345
346   The data access A[j][i] is described like this:
347
348   | i   j   N   a  s0  s1   1
349   | 0   0   0   1   0   0  -5    = 0
350   | 0  -1   0   0   1   0   0    = 0
351   |-2   0   0   0   0   1   0    = 0
352   | 0   0   0   0   1   0   0   >= 0
353   | 0   0   0   0   0   1   0   >= 0
354   | 0   0   0   0  -1   0 100   >= 0
355   | 0   0   0   0   0  -1 100   >= 0
356
357   The linearized memory access L to A[100][100] is:
358
359   | i   j   N   a  s0  s1   1
360   | 0   0   0   0 100   1   0
361
362   TODO: the shown format is not valid as it does not show the fact
363   that the iteration domain "i j" is transformed using the scattering.
364
365   Next, to measure the impact of iterating once in loop "i", we build
366   a maximization problem: first, we add to DR accesses the dimensions
367   k, s2, s3, L1 = 100 * s0 + s1, L2, and D1: this is the polyhedron P1.
368   L1 and L2 are the linearized memory access functions.
369
370   | i   j   N   a  s0  s1   k  s2  s3  L1  L2  D1   1
371   | 0   0   0   1   0   0   0   0   0   0   0   0  -5    = 0  alias = 5
372   | 0  -1   0   0   1   0   0   0   0   0   0   0   0    = 0  s0 = j
373   |-2   0   0   0   0   1   0   0   0   0   0   0   0    = 0  s1 = 2 * i
374   | 0   0   0   0   1   0   0   0   0   0   0   0   0   >= 0
375   | 0   0   0   0   0   1   0   0   0   0   0   0   0   >= 0
376   | 0   0   0   0  -1   0   0   0   0   0   0   0 100   >= 0
377   | 0   0   0   0   0  -1   0   0   0   0   0   0 100   >= 0
378   | 0   0   0   0 100   1   0   0   0  -1   0   0   0    = 0  L1 = 100 * s0 + s1
379
380   Then, we generate the polyhedron P2 by interchanging the dimensions
381   (s0, s2), (s1, s3), (L1, L2), (k, i)
382
383   | i   j   N   a  s0  s1   k  s2  s3  L1  L2  D1   1
384   | 0   0   0   1   0   0   0   0   0   0   0   0  -5    = 0  alias = 5
385   | 0  -1   0   0   0   0   0   1   0   0   0   0   0    = 0  s2 = j
386   | 0   0   0   0   0   0  -2   0   1   0   0   0   0    = 0  s3 = 2 * k
387   | 0   0   0   0   0   0   0   1   0   0   0   0   0   >= 0
388   | 0   0   0   0   0   0   0   0   1   0   0   0   0   >= 0
389   | 0   0   0   0   0   0   0  -1   0   0   0   0 100   >= 0
390   | 0   0   0   0   0   0   0   0  -1   0   0   0 100   >= 0
391   | 0   0   0   0   0   0   0 100   1   0  -1   0   0    = 0  L2 = 100 * s2 + s3
392
393   then we add to P2 the equality k = i + 1:
394
395   |-1   0   0   0   0   0   1   0   0   0   0   0  -1    = 0  k = i + 1
396
397   and finally we maximize the expression "D1 = max (P1 inter P2, L2 - L1)".
398
399   Similarly, to determine the impact of one iteration on loop "j", we
400   interchange (k, j), we add "k = j + 1", and we compute D2 the
401   maximal value of the difference.
402
403   Finally, the profitability test is D1 < D2: if in the outer loop
404   the strides are smaller than in the inner loop, then it is
405   profitable to interchange the loops at DEPTH1 and DEPTH2.  */
406
407static bool
408lst_interchange_profitable_p (lst_p nest, int depth1, int depth2)
409{
410  mpz_t d1, d2;
411  bool res;
412
413  gcc_assert (depth1 < depth2);
414
415  mpz_init (d1);
416  mpz_init (d2);
417
418  memory_strides_in_loop (nest, depth1, d1);
419  memory_strides_in_loop (nest, depth2, d2);
420
421  res = mpz_cmp (d1, d2) < 0;
422
423  mpz_clear (d1);
424  mpz_clear (d2);
425
426  return res;
427}
428
429/* Interchanges the loops at DEPTH1 and DEPTH2 of the original
430   scattering and assigns the resulting polyhedron to the transformed
431   scattering.  */
432
433static void
434pbb_interchange_loop_depths (graphite_dim_t depth1, graphite_dim_t depth2,
435			     poly_bb_p pbb)
436{
437  unsigned i;
438  unsigned dim1 = psct_dynamic_dim (pbb, depth1);
439  unsigned dim2 = psct_dynamic_dim (pbb, depth2);
440  isl_space *d = isl_map_get_space (pbb->transformed);
441  isl_space *d1 = isl_space_range (d);
442  unsigned n = isl_space_dim (d1, isl_dim_out);
443  isl_space *d2 = isl_space_add_dims (d1, isl_dim_in, n);
444  isl_map *x = isl_map_universe (d2);
445
446  x = isl_map_equate (x, isl_dim_in, dim1, isl_dim_out, dim2);
447  x = isl_map_equate (x, isl_dim_in, dim2, isl_dim_out, dim1);
448
449  for (i = 0; i < n; i++)
450    if (i != dim1 && i != dim2)
451      x = isl_map_equate (x, isl_dim_in, i, isl_dim_out, i);
452
453  pbb->transformed = isl_map_apply_range (pbb->transformed, x);
454}
455
456/* Apply the interchange of loops at depths DEPTH1 and DEPTH2 to all
457   the statements below LST.  */
458
459static void
460lst_apply_interchange (lst_p lst, int depth1, int depth2)
461{
462  if (!lst)
463    return;
464
465  if (LST_LOOP_P (lst))
466    {
467      int i;
468      lst_p l;
469
470      FOR_EACH_VEC_ELT (LST_SEQ (lst), i, l)
471	lst_apply_interchange (l, depth1, depth2);
472    }
473  else
474    pbb_interchange_loop_depths (depth1, depth2, LST_PBB (lst));
475}
476
477/* Return true when the nest starting at LOOP1 and ending on LOOP2 is
478   perfect: i.e. there are no sequence of statements.  */
479
480static bool
481lst_perfectly_nested_p (lst_p loop1, lst_p loop2)
482{
483  if (loop1 == loop2)
484    return true;
485
486  if (!LST_LOOP_P (loop1))
487    return false;
488
489  return LST_SEQ (loop1).length () == 1
490         && lst_perfectly_nested_p (LST_SEQ (loop1)[0], loop2);
491}
492
493/* Transform the loop nest between LOOP1 and LOOP2 into a perfect
494   nest.  To continue the naming tradition, this function is called
495   after perfect_nestify.  NEST is set to the perfectly nested loop
496   that is created.  BEFORE/AFTER are set to the loops distributed
497   before/after the loop NEST.  */
498
499static void
500lst_perfect_nestify (lst_p loop1, lst_p loop2, lst_p *before,
501		     lst_p *nest, lst_p *after)
502{
503  poly_bb_p first, last;
504
505  gcc_assert (loop1 && loop2
506	      && loop1 != loop2
507	      && LST_LOOP_P (loop1) && LST_LOOP_P (loop2));
508
509  first = LST_PBB (lst_find_first_pbb (loop2));
510  last = LST_PBB (lst_find_last_pbb (loop2));
511
512  *before = copy_lst (loop1);
513  *nest = copy_lst (loop1);
514  *after = copy_lst (loop1);
515
516  lst_remove_all_before_including_pbb (*before, first, false);
517  lst_remove_all_before_including_pbb (*after, last, true);
518
519  lst_remove_all_before_excluding_pbb (*nest, first, true);
520  lst_remove_all_before_excluding_pbb (*nest, last, false);
521
522  if (lst_empty_p (*before))
523    {
524      free_lst (*before);
525      *before = NULL;
526    }
527  if (lst_empty_p (*after))
528    {
529      free_lst (*after);
530      *after = NULL;
531    }
532  if (lst_empty_p (*nest))
533    {
534      free_lst (*nest);
535      *nest = NULL;
536    }
537}
538
539/* Try to interchange LOOP1 with LOOP2 for all the statements of the
540   body of LOOP2.  LOOP1 contains LOOP2.  Return true if it did the
541   interchange.  */
542
543static bool
544lst_try_interchange_loops (scop_p scop, lst_p loop1, lst_p loop2)
545{
546  int depth1 = lst_depth (loop1);
547  int depth2 = lst_depth (loop2);
548  lst_p transformed;
549
550  lst_p before = NULL, nest = NULL, after = NULL;
551
552  if (!lst_perfectly_nested_p (loop1, loop2))
553    lst_perfect_nestify (loop1, loop2, &before, &nest, &after);
554
555  if (!lst_interchange_profitable_p (loop2, depth1, depth2))
556    return false;
557
558  lst_apply_interchange (loop2, depth1, depth2);
559
560  /* Sync the transformed LST information and the PBB scatterings
561     before using the scatterings in the data dependence analysis.  */
562  if (before || nest || after)
563    {
564      transformed = lst_substitute_3 (SCOP_TRANSFORMED_SCHEDULE (scop), loop1,
565				      before, nest, after);
566      lst_update_scattering (transformed);
567      free_lst (transformed);
568    }
569
570  if (graphite_legal_transform (scop))
571    {
572      if (dump_file && (dump_flags & TDF_DETAILS))
573	fprintf (dump_file,
574		 "Loops at depths %d and %d will be interchanged.\n",
575		 depth1, depth2);
576
577      /* Transform the SCOP_TRANSFORMED_SCHEDULE of the SCOP.  */
578      lst_insert_in_sequence (before, loop1, true);
579      lst_insert_in_sequence (after, loop1, false);
580
581      if (nest)
582	{
583	  lst_replace (loop1, nest);
584	  free_lst (loop1);
585	}
586
587      return true;
588    }
589
590  /* Undo the transform.  */
591  free_lst (before);
592  free_lst (nest);
593  free_lst (after);
594  lst_apply_interchange (loop2, depth2, depth1);
595  return false;
596}
597
598/* Selects the inner loop in LST_SEQ (INNER_FATHER) to be interchanged
599   with the loop OUTER in LST_SEQ (OUTER_FATHER).  */
600
601static bool
602lst_interchange_select_inner (scop_p scop, lst_p outer_father, int outer,
603			      lst_p inner_father)
604{
605  int inner;
606  lst_p loop1, loop2;
607
608  gcc_assert (outer_father
609	      && LST_LOOP_P (outer_father)
610	      && LST_LOOP_P (LST_SEQ (outer_father)[outer])
611	      && inner_father
612	      && LST_LOOP_P (inner_father));
613
614  loop1 = LST_SEQ (outer_father)[outer];
615
616  FOR_EACH_VEC_ELT (LST_SEQ (inner_father), inner, loop2)
617    if (LST_LOOP_P (loop2)
618	&& (lst_try_interchange_loops (scop, loop1, loop2)
619	    || lst_interchange_select_inner (scop, outer_father, outer, loop2)))
620      return true;
621
622  return false;
623}
624
625/* Interchanges all the loops of LOOP and the loops of its body that
626   are considered profitable to interchange.  Return the number of
627   interchanged loops.  OUTER is the index in LST_SEQ (LOOP) that
628   points to the next outer loop to be considered for interchange.  */
629
630static int
631lst_interchange_select_outer (scop_p scop, lst_p loop, int outer)
632{
633  lst_p l;
634  int res = 0;
635  int i = 0;
636  lst_p father;
637
638  if (!loop || !LST_LOOP_P (loop))
639    return 0;
640
641  father = LST_LOOP_FATHER (loop);
642  if (father)
643    {
644      while (lst_interchange_select_inner (scop, father, outer, loop))
645	{
646	  res++;
647	  loop = LST_SEQ (father)[outer];
648	}
649    }
650
651  if (LST_LOOP_P (loop))
652    FOR_EACH_VEC_ELT (LST_SEQ (loop), i, l)
653      if (LST_LOOP_P (l))
654	res += lst_interchange_select_outer (scop, l, i);
655
656  return res;
657}
658
659/* Interchanges all the loop depths that are considered profitable for
660   SCOP.  Return the number of interchanged loops.  */
661
662int
663scop_do_interchange (scop_p scop)
664{
665  int res = lst_interchange_select_outer
666    (scop, SCOP_TRANSFORMED_SCHEDULE (scop), 0);
667
668  lst_update_scattering (SCOP_TRANSFORMED_SCHEDULE (scop));
669
670  return res;
671}
672
673
674#endif
675
676