1169689Skan/* Swing Modulo Scheduling implementation.
2169689Skan   Copyright (C) 2004, 2005, 2006
3169689Skan   Free Software Foundation, Inc.
4169689Skan   Contributed by Ayal Zaks and Mustafa Hagog <zaks,mustafa@il.ibm.com>
5169689Skan
6169689SkanThis file is part of GCC.
7169689Skan
8169689SkanGCC is free software; you can redistribute it and/or modify it under
9169689Skanthe terms of the GNU General Public License as published by the Free
10169689SkanSoftware Foundation; either version 2, or (at your option) any later
11169689Skanversion.
12169689Skan
13169689SkanGCC is distributed in the hope that it will be useful, but WITHOUT ANY
14169689SkanWARRANTY; without even the implied warranty of MERCHANTABILITY or
15169689SkanFITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16169689Skanfor more details.
17169689Skan
18169689SkanYou should have received a copy of the GNU General Public License
19169689Skanalong with GCC; see the file COPYING.  If not, write to the Free
20169689SkanSoftware Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
21169689Skan02110-1301, USA.  */
22169689Skan
23169689Skan
24169689Skan#include "config.h"
25169689Skan#include "system.h"
26169689Skan#include "coretypes.h"
27169689Skan#include "tm.h"
28169689Skan#include "toplev.h"
29169689Skan#include "rtl.h"
30169689Skan#include "tm_p.h"
31169689Skan#include "hard-reg-set.h"
32169689Skan#include "regs.h"
33169689Skan#include "function.h"
34169689Skan#include "flags.h"
35169689Skan#include "insn-config.h"
36169689Skan#include "insn-attr.h"
37169689Skan#include "except.h"
38169689Skan#include "toplev.h"
39169689Skan#include "recog.h"
40169689Skan#include "sched-int.h"
41169689Skan#include "target.h"
42169689Skan#include "cfglayout.h"
43169689Skan#include "cfgloop.h"
44169689Skan#include "cfghooks.h"
45169689Skan#include "expr.h"
46169689Skan#include "params.h"
47169689Skan#include "gcov-io.h"
48169689Skan#include "df.h"
49169689Skan#include "ddg.h"
50169689Skan#include "timevar.h"
51169689Skan#include "tree-pass.h"
52169689Skan
53169689Skan#ifdef INSN_SCHEDULING
54169689Skan
55169689Skan/* This file contains the implementation of the Swing Modulo Scheduler,
56169689Skan   described in the following references:
57169689Skan   [1] J. Llosa, A. Gonzalez, E. Ayguade, M. Valero., and J. Eckhardt.
58169689Skan       Lifetime--sensitive modulo scheduling in a production environment.
59169689Skan       IEEE Trans. on Comps., 50(3), March 2001
60169689Skan   [2] J. Llosa, A. Gonzalez, E. Ayguade, and M. Valero.
61169689Skan       Swing Modulo Scheduling: A Lifetime Sensitive Approach.
62169689Skan       PACT '96 , pages 80-87, October 1996 (Boston - Massachusetts - USA).
63169689Skan
64169689Skan   The basic structure is:
65169689Skan   1. Build a data-dependence graph (DDG) for each loop.
66169689Skan   2. Use the DDG to order the insns of a loop (not in topological order
67169689Skan      necessarily, but rather) trying to place each insn after all its
68169689Skan      predecessors _or_ after all its successors.
69169689Skan   3. Compute MII: a lower bound on the number of cycles to schedule the loop.
70169689Skan   4. Use the ordering to perform list-scheduling of the loop:
71169689Skan      1. Set II = MII.  We will try to schedule the loop within II cycles.
72169689Skan      2. Try to schedule the insns one by one according to the ordering.
73169689Skan	 For each insn compute an interval of cycles by considering already-
74169689Skan	 scheduled preds and succs (and associated latencies); try to place
75169689Skan	 the insn in the cycles of this window checking for potential
76169689Skan	 resource conflicts (using the DFA interface).
77169689Skan	 Note: this is different from the cycle-scheduling of schedule_insns;
78169689Skan	 here the insns are not scheduled monotonically top-down (nor bottom-
79169689Skan	 up).
80169689Skan      3. If failed in scheduling all insns - bump II++ and try again, unless
81169689Skan	 II reaches an upper bound MaxII, in which case report failure.
82169689Skan   5. If we succeeded in scheduling the loop within II cycles, we now
83169689Skan      generate prolog and epilog, decrease the counter of the loop, and
84169689Skan      perform modulo variable expansion for live ranges that span more than
85169689Skan      II cycles (i.e. use register copies to prevent a def from overwriting
86169689Skan      itself before reaching the use).
87169689Skan*/
88169689Skan
89169689Skan
90169689Skan/* This page defines partial-schedule structures and functions for
91169689Skan   modulo scheduling.  */
92169689Skan
93169689Skantypedef struct partial_schedule *partial_schedule_ptr;
94169689Skantypedef struct ps_insn *ps_insn_ptr;
95169689Skan
96169689Skan/* The minimum (absolute) cycle that a node of ps was scheduled in.  */
97169689Skan#define PS_MIN_CYCLE(ps) (((partial_schedule_ptr)(ps))->min_cycle)
98169689Skan
99169689Skan/* The maximum (absolute) cycle that a node of ps was scheduled in.  */
100169689Skan#define PS_MAX_CYCLE(ps) (((partial_schedule_ptr)(ps))->max_cycle)
101169689Skan
102169689Skan/* Perform signed modulo, always returning a non-negative value.  */
103169689Skan#define SMODULO(x,y) ((x) % (y) < 0 ? ((x) % (y) + (y)) : (x) % (y))
104169689Skan
105169689Skan/* The number of different iterations the nodes in ps span, assuming
106169689Skan   the stage boundaries are placed efficiently.  */
107169689Skan#define PS_STAGE_COUNT(ps) ((PS_MAX_CYCLE (ps) - PS_MIN_CYCLE (ps) \
108169689Skan			     + 1 + (ps)->ii - 1) / (ps)->ii)
109169689Skan
110169689Skan/* A single instruction in the partial schedule.  */
111169689Skanstruct ps_insn
112169689Skan{
113169689Skan  /* The corresponding DDG_NODE.  */
114169689Skan  ddg_node_ptr node;
115169689Skan
116169689Skan  /* The (absolute) cycle in which the PS instruction is scheduled.
117169689Skan     Same as SCHED_TIME (node).  */
118169689Skan  int cycle;
119169689Skan
120169689Skan  /* The next/prev PS_INSN in the same row.  */
121169689Skan  ps_insn_ptr next_in_row,
122169689Skan	      prev_in_row;
123169689Skan
124169689Skan  /* The number of nodes in the same row that come after this node.  */
125169689Skan  int row_rest_count;
126169689Skan};
127169689Skan
128169689Skan/* Holds the partial schedule as an array of II rows.  Each entry of the
129169689Skan   array points to a linked list of PS_INSNs, which represents the
130169689Skan   instructions that are scheduled for that row.  */
131169689Skanstruct partial_schedule
132169689Skan{
133169689Skan  int ii;	/* Number of rows in the partial schedule.  */
134169689Skan  int history;  /* Threshold for conflict checking using DFA.  */
135169689Skan
136169689Skan  /* rows[i] points to linked list of insns scheduled in row i (0<=i<ii).  */
137169689Skan  ps_insn_ptr *rows;
138169689Skan
139169689Skan  /* The earliest absolute cycle of an insn in the partial schedule.  */
140169689Skan  int min_cycle;
141169689Skan
142169689Skan  /* The latest absolute cycle of an insn in the partial schedule.  */
143169689Skan  int max_cycle;
144169689Skan
145169689Skan  ddg_ptr g;	/* The DDG of the insns in the partial schedule.  */
146169689Skan};
147169689Skan
148169689Skan/* We use this to record all the register replacements we do in
149169689Skan   the kernel so we can undo SMS if it is not profitable.  */
150169689Skanstruct undo_replace_buff_elem
151169689Skan{
152169689Skan  rtx insn;
153169689Skan  rtx orig_reg;
154169689Skan  rtx new_reg;
155169689Skan  struct undo_replace_buff_elem *next;
156169689Skan};
157169689Skan
158169689Skan
159169689Skan
160169689Skanstatic partial_schedule_ptr create_partial_schedule (int ii, ddg_ptr, int history);
161169689Skanstatic void free_partial_schedule (partial_schedule_ptr);
162169689Skanstatic void reset_partial_schedule (partial_schedule_ptr, int new_ii);
163169689Skanvoid print_partial_schedule (partial_schedule_ptr, FILE *);
164169689Skanstatic int kernel_number_of_cycles (rtx first_insn, rtx last_insn);
165169689Skanstatic ps_insn_ptr ps_add_node_check_conflicts (partial_schedule_ptr,
166169689Skan						ddg_node_ptr node, int cycle,
167169689Skan						sbitmap must_precede,
168169689Skan						sbitmap must_follow);
169169689Skanstatic void rotate_partial_schedule (partial_schedule_ptr, int);
170169689Skanvoid set_row_column_for_ps (partial_schedule_ptr);
171169689Skanstatic bool ps_unschedule_node (partial_schedule_ptr, ddg_node_ptr );
172169689Skan
173169689Skan
174169689Skan/* This page defines constants and structures for the modulo scheduling
175169689Skan   driver.  */
176169689Skan
177169689Skan/* As in haifa-sched.c:  */
178169689Skan/* issue_rate is the number of insns that can be scheduled in the same
179169689Skan   machine cycle.  It can be defined in the config/mach/mach.h file,
180169689Skan   otherwise we set it to 1.  */
181169689Skan
182169689Skanstatic int issue_rate;
183169689Skan
184169689Skanstatic int sms_order_nodes (ddg_ptr, int, int * result);
185169689Skanstatic void set_node_sched_params (ddg_ptr);
186169689Skanstatic partial_schedule_ptr sms_schedule_by_order (ddg_ptr, int, int, int *);
187169689Skanstatic void permute_partial_schedule (partial_schedule_ptr ps, rtx last);
188169689Skanstatic void generate_prolog_epilog (partial_schedule_ptr ,struct loop * loop, rtx);
189169689Skanstatic void duplicate_insns_of_cycles (partial_schedule_ptr ps,
190169689Skan				       int from_stage, int to_stage,
191169689Skan				       int is_prolog);
192169689Skan
193169689Skan#define SCHED_ASAP(x) (((node_sched_params_ptr)(x)->aux.info)->asap)
194169689Skan#define SCHED_TIME(x) (((node_sched_params_ptr)(x)->aux.info)->time)
195169689Skan#define SCHED_FIRST_REG_MOVE(x) \
196169689Skan	(((node_sched_params_ptr)(x)->aux.info)->first_reg_move)
197169689Skan#define SCHED_NREG_MOVES(x) \
198169689Skan	(((node_sched_params_ptr)(x)->aux.info)->nreg_moves)
199169689Skan#define SCHED_ROW(x) (((node_sched_params_ptr)(x)->aux.info)->row)
200169689Skan#define SCHED_STAGE(x) (((node_sched_params_ptr)(x)->aux.info)->stage)
201169689Skan#define SCHED_COLUMN(x) (((node_sched_params_ptr)(x)->aux.info)->column)
202169689Skan
203169689Skan/* The scheduling parameters held for each node.  */
204169689Skantypedef struct node_sched_params
205169689Skan{
206169689Skan  int asap;	/* A lower-bound on the absolute scheduling cycle.  */
207169689Skan  int time;	/* The absolute scheduling cycle (time >= asap).  */
208169689Skan
209169689Skan  /* The following field (first_reg_move) is a pointer to the first
210169689Skan     register-move instruction added to handle the modulo-variable-expansion
211169689Skan     of the register defined by this node.  This register-move copies the
212169689Skan     original register defined by the node.  */
213169689Skan  rtx first_reg_move;
214169689Skan
215169689Skan  /* The number of register-move instructions added, immediately preceding
216169689Skan     first_reg_move.  */
217169689Skan  int nreg_moves;
218169689Skan
219169689Skan  int row;    /* Holds time % ii.  */
220169689Skan  int stage;  /* Holds time / ii.  */
221169689Skan
222169689Skan  /* The column of a node inside the ps.  If nodes u, v are on the same row,
223169689Skan     u will precede v if column (u) < column (v).  */
224169689Skan  int column;
225169689Skan} *node_sched_params_ptr;
226169689Skan
227169689Skan
228169689Skan/* The following three functions are copied from the current scheduler
229169689Skan   code in order to use sched_analyze() for computing the dependencies.
230169689Skan   They are used when initializing the sched_info structure.  */
231169689Skanstatic const char *
232169689Skansms_print_insn (rtx insn, int aligned ATTRIBUTE_UNUSED)
233169689Skan{
234169689Skan  static char tmp[80];
235169689Skan
236169689Skan  sprintf (tmp, "i%4d", INSN_UID (insn));
237169689Skan  return tmp;
238169689Skan}
239169689Skan
240169689Skanstatic void
241169689Skancompute_jump_reg_dependencies (rtx insn ATTRIBUTE_UNUSED,
242169689Skan			       regset cond_exec ATTRIBUTE_UNUSED,
243169689Skan			       regset used ATTRIBUTE_UNUSED,
244169689Skan			       regset set ATTRIBUTE_UNUSED)
245169689Skan{
246169689Skan}
247169689Skan
248169689Skanstatic struct sched_info sms_sched_info =
249169689Skan{
250169689Skan  NULL,
251169689Skan  NULL,
252169689Skan  NULL,
253169689Skan  NULL,
254169689Skan  NULL,
255169689Skan  sms_print_insn,
256169689Skan  NULL,
257169689Skan  compute_jump_reg_dependencies,
258169689Skan  NULL, NULL,
259169689Skan  NULL, NULL,
260169689Skan  0, 0, 0,
261169689Skan
262169689Skan  NULL, NULL, NULL, NULL, NULL,
263169689Skan#ifdef ENABLE_CHECKING
264169689Skan  NULL,
265169689Skan#endif
266169689Skan  0
267169689Skan};
268169689Skan
269169689Skan
270169689Skan/* Return the register decremented and tested in INSN,
271169689Skan   or zero if it is not a decrement-and-branch insn.  */
272169689Skan
273169689Skanstatic rtx
274169689Skandoloop_register_get (rtx insn ATTRIBUTE_UNUSED)
275169689Skan{
276169689Skan#ifdef HAVE_doloop_end
277169689Skan  rtx pattern, reg, condition;
278169689Skan
279169689Skan  if (! JUMP_P (insn))
280169689Skan    return NULL_RTX;
281169689Skan
282169689Skan  pattern = PATTERN (insn);
283169689Skan  condition = doloop_condition_get (pattern);
284169689Skan  if (! condition)
285169689Skan    return NULL_RTX;
286169689Skan
287169689Skan  if (REG_P (XEXP (condition, 0)))
288169689Skan    reg = XEXP (condition, 0);
289169689Skan  else if (GET_CODE (XEXP (condition, 0)) == PLUS
290169689Skan	   && REG_P (XEXP (XEXP (condition, 0), 0)))
291169689Skan    reg = XEXP (XEXP (condition, 0), 0);
292169689Skan  else
293169689Skan    gcc_unreachable ();
294169689Skan
295169689Skan  return reg;
296169689Skan#else
297169689Skan  return NULL_RTX;
298169689Skan#endif
299169689Skan}
300169689Skan
301169689Skan/* Check if COUNT_REG is set to a constant in the PRE_HEADER block, so
302169689Skan   that the number of iterations is a compile-time constant.  If so,
303169689Skan   return the rtx that sets COUNT_REG to a constant, and set COUNT to
304169689Skan   this constant.  Otherwise return 0.  */
305169689Skanstatic rtx
306169689Skanconst_iteration_count (rtx count_reg, basic_block pre_header,
307169689Skan		       HOST_WIDEST_INT * count)
308169689Skan{
309169689Skan  rtx insn;
310169689Skan  rtx head, tail;
311169689Skan
312169689Skan  if (! pre_header)
313169689Skan    return NULL_RTX;
314169689Skan
315169689Skan  get_ebb_head_tail (pre_header, pre_header, &head, &tail);
316169689Skan
317169689Skan  for (insn = tail; insn != PREV_INSN (head); insn = PREV_INSN (insn))
318169689Skan    if (INSN_P (insn) && single_set (insn) &&
319169689Skan	rtx_equal_p (count_reg, SET_DEST (single_set (insn))))
320169689Skan      {
321169689Skan	rtx pat = single_set (insn);
322169689Skan
323169689Skan	if (GET_CODE (SET_SRC (pat)) == CONST_INT)
324169689Skan	  {
325169689Skan	    *count = INTVAL (SET_SRC (pat));
326169689Skan	    return insn;
327169689Skan	  }
328169689Skan
329169689Skan	return NULL_RTX;
330169689Skan      }
331169689Skan
332169689Skan  return NULL_RTX;
333169689Skan}
334169689Skan
335169689Skan/* A very simple resource-based lower bound on the initiation interval.
336169689Skan   ??? Improve the accuracy of this bound by considering the
337169689Skan   utilization of various units.  */
338169689Skanstatic int
339169689Skanres_MII (ddg_ptr g)
340169689Skan{
341169689Skan  return (g->num_nodes / issue_rate);
342169689Skan}
343169689Skan
344169689Skan
345169689Skan/* Points to the array that contains the sched data for each node.  */
346169689Skanstatic node_sched_params_ptr node_sched_params;
347169689Skan
348169689Skan/* Allocate sched_params for each node and initialize it.  Assumes that
349169689Skan   the aux field of each node contain the asap bound (computed earlier),
350169689Skan   and copies it into the sched_params field.  */
351169689Skanstatic void
352169689Skanset_node_sched_params (ddg_ptr g)
353169689Skan{
354169689Skan  int i;
355169689Skan
356169689Skan  /* Allocate for each node in the DDG a place to hold the "sched_data".  */
357169689Skan  /* Initialize ASAP/ALAP/HIGHT to zero.  */
358169689Skan  node_sched_params = (node_sched_params_ptr)
359169689Skan		       xcalloc (g->num_nodes,
360169689Skan				sizeof (struct node_sched_params));
361169689Skan
362169689Skan  /* Set the pointer of the general data of the node to point to the
363169689Skan     appropriate sched_params structure.  */
364169689Skan  for (i = 0; i < g->num_nodes; i++)
365169689Skan    {
366169689Skan      /* Watch out for aliasing problems?  */
367169689Skan      node_sched_params[i].asap = g->nodes[i].aux.count;
368169689Skan      g->nodes[i].aux.info = &node_sched_params[i];
369169689Skan    }
370169689Skan}
371169689Skan
372169689Skanstatic void
373169689Skanprint_node_sched_params (FILE * file, int num_nodes)
374169689Skan{
375169689Skan  int i;
376169689Skan
377169689Skan  if (! file)
378169689Skan    return;
379169689Skan  for (i = 0; i < num_nodes; i++)
380169689Skan    {
381169689Skan      node_sched_params_ptr nsp = &node_sched_params[i];
382169689Skan      rtx reg_move = nsp->first_reg_move;
383169689Skan      int j;
384169689Skan
385169689Skan      fprintf (file, "Node %d:\n", i);
386169689Skan      fprintf (file, " asap = %d:\n", nsp->asap);
387169689Skan      fprintf (file, " time = %d:\n", nsp->time);
388169689Skan      fprintf (file, " nreg_moves = %d:\n", nsp->nreg_moves);
389169689Skan      for (j = 0; j < nsp->nreg_moves; j++)
390169689Skan	{
391169689Skan	  fprintf (file, " reg_move = ");
392169689Skan	  print_rtl_single (file, reg_move);
393169689Skan	  reg_move = PREV_INSN (reg_move);
394169689Skan	}
395169689Skan    }
396169689Skan}
397169689Skan
398169689Skan/* Calculate an upper bound for II.  SMS should not schedule the loop if it
399169689Skan   requires more cycles than this bound.  Currently set to the sum of the
400169689Skan   longest latency edge for each node.  Reset based on experiments.  */
401169689Skanstatic int
402169689Skancalculate_maxii (ddg_ptr g)
403169689Skan{
404169689Skan  int i;
405169689Skan  int maxii = 0;
406169689Skan
407169689Skan  for (i = 0; i < g->num_nodes; i++)
408169689Skan    {
409169689Skan      ddg_node_ptr u = &g->nodes[i];
410169689Skan      ddg_edge_ptr e;
411169689Skan      int max_edge_latency = 0;
412169689Skan
413169689Skan      for (e = u->out; e; e = e->next_out)
414169689Skan	max_edge_latency = MAX (max_edge_latency, e->latency);
415169689Skan
416169689Skan      maxii += max_edge_latency;
417169689Skan    }
418169689Skan  return maxii;
419169689Skan}
420169689Skan
421169689Skan/*
422169689Skan   Breaking intra-loop register anti-dependences:
423169689Skan   Each intra-loop register anti-dependence implies a cross-iteration true
424169689Skan   dependence of distance 1. Therefore, we can remove such false dependencies
425169689Skan   and figure out if the partial schedule broke them by checking if (for a
426169689Skan   true-dependence of distance 1): SCHED_TIME (def) < SCHED_TIME (use) and
427169689Skan   if so generate a register move.   The number of such moves is equal to:
428169689Skan              SCHED_TIME (use) - SCHED_TIME (def)       { 0 broken
429169689Skan   nreg_moves = ----------------------------------- + 1 - {   dependence.
430169689Skan                            ii                          { 1 if not.
431169689Skan*/
432169689Skanstatic struct undo_replace_buff_elem *
433169689Skangenerate_reg_moves (partial_schedule_ptr ps)
434169689Skan{
435169689Skan  ddg_ptr g = ps->g;
436169689Skan  int ii = ps->ii;
437169689Skan  int i;
438169689Skan  struct undo_replace_buff_elem *reg_move_replaces = NULL;
439169689Skan
440169689Skan  for (i = 0; i < g->num_nodes; i++)
441169689Skan    {
442169689Skan      ddg_node_ptr u = &g->nodes[i];
443169689Skan      ddg_edge_ptr e;
444169689Skan      int nreg_moves = 0, i_reg_move;
445169689Skan      sbitmap *uses_of_defs;
446169689Skan      rtx last_reg_move;
447169689Skan      rtx prev_reg, old_reg;
448169689Skan
449169689Skan      /* Compute the number of reg_moves needed for u, by looking at life
450169689Skan	 ranges started at u (excluding self-loops).  */
451169689Skan      for (e = u->out; e; e = e->next_out)
452169689Skan	if (e->type == TRUE_DEP && e->dest != e->src)
453169689Skan	  {
454169689Skan	    int nreg_moves4e = (SCHED_TIME (e->dest) - SCHED_TIME (e->src)) / ii;
455169689Skan
456169689Skan            if (e->distance == 1)
457169689Skan              nreg_moves4e = (SCHED_TIME (e->dest) - SCHED_TIME (e->src) + ii) / ii;
458169689Skan
459169689Skan	    /* If dest precedes src in the schedule of the kernel, then dest
460169689Skan	       will read before src writes and we can save one reg_copy.  */
461169689Skan	    if (SCHED_ROW (e->dest) == SCHED_ROW (e->src)
462169689Skan		&& SCHED_COLUMN (e->dest) < SCHED_COLUMN (e->src))
463169689Skan	      nreg_moves4e--;
464169689Skan
465169689Skan	    nreg_moves = MAX (nreg_moves, nreg_moves4e);
466169689Skan	  }
467169689Skan
468169689Skan      if (nreg_moves == 0)
469169689Skan	continue;
470169689Skan
471169689Skan      /* Every use of the register defined by node may require a different
472169689Skan	 copy of this register, depending on the time the use is scheduled.
473169689Skan	 Set a bitmap vector, telling which nodes use each copy of this
474169689Skan	 register.  */
475169689Skan      uses_of_defs = sbitmap_vector_alloc (nreg_moves, g->num_nodes);
476169689Skan      sbitmap_vector_zero (uses_of_defs, nreg_moves);
477169689Skan      for (e = u->out; e; e = e->next_out)
478169689Skan	if (e->type == TRUE_DEP && e->dest != e->src)
479169689Skan	  {
480169689Skan	    int dest_copy = (SCHED_TIME (e->dest) - SCHED_TIME (e->src)) / ii;
481169689Skan
482169689Skan	    if (e->distance == 1)
483169689Skan	      dest_copy = (SCHED_TIME (e->dest) - SCHED_TIME (e->src) + ii) / ii;
484169689Skan
485169689Skan	    if (SCHED_ROW (e->dest) == SCHED_ROW (e->src)
486169689Skan		&& SCHED_COLUMN (e->dest) < SCHED_COLUMN (e->src))
487169689Skan	      dest_copy--;
488169689Skan
489169689Skan	    if (dest_copy)
490169689Skan	      SET_BIT (uses_of_defs[dest_copy - 1], e->dest->cuid);
491169689Skan	  }
492169689Skan
493169689Skan      /* Now generate the reg_moves, attaching relevant uses to them.  */
494169689Skan      SCHED_NREG_MOVES (u) = nreg_moves;
495169689Skan      old_reg = prev_reg = copy_rtx (SET_DEST (single_set (u->insn)));
496169689Skan      last_reg_move = u->insn;
497169689Skan
498169689Skan      for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++)
499169689Skan	{
500169689Skan	  unsigned int i_use = 0;
501169689Skan	  rtx new_reg = gen_reg_rtx (GET_MODE (prev_reg));
502169689Skan	  rtx reg_move = gen_move_insn (new_reg, prev_reg);
503169689Skan	  sbitmap_iterator sbi;
504169689Skan
505169689Skan	  add_insn_before (reg_move, last_reg_move);
506169689Skan	  last_reg_move = reg_move;
507169689Skan
508169689Skan	  if (!SCHED_FIRST_REG_MOVE (u))
509169689Skan	    SCHED_FIRST_REG_MOVE (u) = reg_move;
510169689Skan
511169689Skan	  EXECUTE_IF_SET_IN_SBITMAP (uses_of_defs[i_reg_move], 0, i_use, sbi)
512169689Skan	    {
513169689Skan	      struct undo_replace_buff_elem *rep;
514169689Skan
515169689Skan	      rep = (struct undo_replace_buff_elem *)
516169689Skan		    xcalloc (1, sizeof (struct undo_replace_buff_elem));
517169689Skan	      rep->insn = g->nodes[i_use].insn;
518169689Skan	      rep->orig_reg = old_reg;
519169689Skan	      rep->new_reg = new_reg;
520169689Skan
521169689Skan	      if (! reg_move_replaces)
522169689Skan		reg_move_replaces = rep;
523169689Skan	      else
524169689Skan		{
525169689Skan		  rep->next = reg_move_replaces;
526169689Skan		  reg_move_replaces = rep;
527169689Skan		}
528169689Skan
529169689Skan	      replace_rtx (g->nodes[i_use].insn, old_reg, new_reg);
530169689Skan	    }
531169689Skan
532169689Skan	  prev_reg = new_reg;
533169689Skan	}
534169689Skan      sbitmap_vector_free (uses_of_defs);
535169689Skan    }
536169689Skan  return reg_move_replaces;
537169689Skan}
538169689Skan
539169689Skan/* We call this when we want to undo the SMS schedule for a given loop.
540169689Skan   One of the things that we do is to delete the register moves generated
541169689Skan   for the sake of SMS; this function deletes the register move instructions
542169689Skan   recorded in the undo buffer.  */
543169689Skanstatic void
544169689Skanundo_generate_reg_moves (partial_schedule_ptr ps,
545169689Skan			 struct undo_replace_buff_elem *reg_move_replaces)
546169689Skan{
547169689Skan  int i,j;
548169689Skan
549169689Skan  for (i = 0; i < ps->g->num_nodes; i++)
550169689Skan    {
551169689Skan      ddg_node_ptr u = &ps->g->nodes[i];
552169689Skan      rtx prev;
553169689Skan      rtx crr = SCHED_FIRST_REG_MOVE (u);
554169689Skan
555169689Skan      for (j = 0; j < SCHED_NREG_MOVES (u); j++)
556169689Skan	{
557169689Skan	  prev = PREV_INSN (crr);
558169689Skan	  delete_insn (crr);
559169689Skan	  crr = prev;
560169689Skan	}
561169689Skan      SCHED_FIRST_REG_MOVE (u) = NULL_RTX;
562169689Skan    }
563169689Skan
564169689Skan  while (reg_move_replaces)
565169689Skan    {
566169689Skan      struct undo_replace_buff_elem *rep = reg_move_replaces;
567169689Skan
568169689Skan      reg_move_replaces = reg_move_replaces->next;
569169689Skan      replace_rtx (rep->insn, rep->new_reg, rep->orig_reg);
570169689Skan    }
571169689Skan}
572169689Skan
573169689Skan/* Free memory allocated for the undo buffer.  */
574169689Skanstatic void
575169689Skanfree_undo_replace_buff (struct undo_replace_buff_elem *reg_move_replaces)
576169689Skan{
577169689Skan
578169689Skan  while (reg_move_replaces)
579169689Skan    {
580169689Skan      struct undo_replace_buff_elem *rep = reg_move_replaces;
581169689Skan
582169689Skan      reg_move_replaces = reg_move_replaces->next;
583169689Skan      free (rep);
584169689Skan    }
585169689Skan}
586169689Skan
587169689Skan/* Bump the SCHED_TIMEs of all nodes to start from zero.  Set the values
588169689Skan   of SCHED_ROW and SCHED_STAGE.  */
589169689Skanstatic void
590169689Skannormalize_sched_times (partial_schedule_ptr ps)
591169689Skan{
592169689Skan  int i;
593169689Skan  ddg_ptr g = ps->g;
594169689Skan  int amount = PS_MIN_CYCLE (ps);
595169689Skan  int ii = ps->ii;
596169689Skan
597169689Skan  /* Don't include the closing branch assuming that it is the last node.  */
598169689Skan  for (i = 0; i < g->num_nodes - 1; i++)
599169689Skan    {
600169689Skan      ddg_node_ptr u = &g->nodes[i];
601169689Skan      int normalized_time = SCHED_TIME (u) - amount;
602169689Skan
603169689Skan      gcc_assert (normalized_time >= 0);
604169689Skan
605169689Skan      SCHED_TIME (u) = normalized_time;
606169689Skan      SCHED_ROW (u) = normalized_time % ii;
607169689Skan      SCHED_STAGE (u) = normalized_time / ii;
608169689Skan    }
609169689Skan}
610169689Skan
611169689Skan/* Set SCHED_COLUMN of each node according to its position in PS.  */
612169689Skanstatic void
613169689Skanset_columns_for_ps (partial_schedule_ptr ps)
614169689Skan{
615169689Skan  int row;
616169689Skan
617169689Skan  for (row = 0; row < ps->ii; row++)
618169689Skan    {
619169689Skan      ps_insn_ptr cur_insn = ps->rows[row];
620169689Skan      int column = 0;
621169689Skan
622169689Skan      for (; cur_insn; cur_insn = cur_insn->next_in_row)
623169689Skan	SCHED_COLUMN (cur_insn->node) = column++;
624169689Skan    }
625169689Skan}
626169689Skan
627169689Skan/* Permute the insns according to their order in PS, from row 0 to
628169689Skan   row ii-1, and position them right before LAST.  This schedules
629169689Skan   the insns of the loop kernel.  */
630169689Skanstatic void
631169689Skanpermute_partial_schedule (partial_schedule_ptr ps, rtx last)
632169689Skan{
633169689Skan  int ii = ps->ii;
634169689Skan  int row;
635169689Skan  ps_insn_ptr ps_ij;
636169689Skan
637169689Skan  for (row = 0; row < ii ; row++)
638169689Skan    for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
639169689Skan      if (PREV_INSN (last) != ps_ij->node->insn)
640169689Skan      	reorder_insns_nobb (ps_ij->node->first_note, ps_ij->node->insn,
641169689Skan			    PREV_INSN (last));
642169689Skan}
643169689Skan
644169689Skan/* As part of undoing SMS we return to the original ordering of the
645169689Skan   instructions inside the loop kernel.  Given the partial schedule PS, this
646169689Skan   function returns the ordering of the instruction according to their CUID
647169689Skan   in the DDG (PS->G), which is the original order of the instruction before
648169689Skan   performing SMS.  */
649169689Skanstatic void
650169689Skanundo_permute_partial_schedule (partial_schedule_ptr ps, rtx last)
651169689Skan{
652169689Skan  int i;
653169689Skan
654169689Skan  for (i = 0 ; i < ps->g->num_nodes; i++)
655169689Skan    if (last == ps->g->nodes[i].insn
656169689Skan	|| last == ps->g->nodes[i].first_note)
657169689Skan      break;
658169689Skan    else if (PREV_INSN (last) != ps->g->nodes[i].insn)
659169689Skan      reorder_insns_nobb (ps->g->nodes[i].first_note, ps->g->nodes[i].insn,
660169689Skan			  PREV_INSN (last));
661169689Skan}
662169689Skan
663169689Skan/* Used to generate the prologue & epilogue.  Duplicate the subset of
664169689Skan   nodes whose stages are between FROM_STAGE and TO_STAGE (inclusive
665169689Skan   of both), together with a prefix/suffix of their reg_moves.  */
666169689Skanstatic void
667169689Skanduplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage,
668169689Skan			   int to_stage, int for_prolog)
669169689Skan{
670169689Skan  int row;
671169689Skan  ps_insn_ptr ps_ij;
672169689Skan
673169689Skan  for (row = 0; row < ps->ii; row++)
674169689Skan    for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
675169689Skan      {
676169689Skan	ddg_node_ptr u_node = ps_ij->node;
677169689Skan	int j, i_reg_moves;
678169689Skan	rtx reg_move = NULL_RTX;
679169689Skan
680169689Skan	if (for_prolog)
681169689Skan	  {
682169689Skan	    /* SCHED_STAGE (u_node) >= from_stage == 0.  Generate increasing
683169689Skan	       number of reg_moves starting with the second occurrence of
684169689Skan	       u_node, which is generated if its SCHED_STAGE <= to_stage.  */
685169689Skan	    i_reg_moves = to_stage - SCHED_STAGE (u_node) + 1;
686169689Skan	    i_reg_moves = MAX (i_reg_moves, 0);
687169689Skan	    i_reg_moves = MIN (i_reg_moves, SCHED_NREG_MOVES (u_node));
688169689Skan
689169689Skan	    /* The reg_moves start from the *first* reg_move backwards.  */
690169689Skan	    if (i_reg_moves)
691169689Skan	      {
692169689Skan		reg_move = SCHED_FIRST_REG_MOVE (u_node);
693169689Skan		for (j = 1; j < i_reg_moves; j++)
694169689Skan		  reg_move = PREV_INSN (reg_move);
695169689Skan	      }
696169689Skan	  }
697169689Skan	else /* It's for the epilog.  */
698169689Skan	  {
699169689Skan	    /* SCHED_STAGE (u_node) <= to_stage.  Generate all reg_moves,
700169689Skan	       starting to decrease one stage after u_node no longer occurs;
701169689Skan	       that is, generate all reg_moves until
702169689Skan	       SCHED_STAGE (u_node) == from_stage - 1.  */
703169689Skan	    i_reg_moves = SCHED_NREG_MOVES (u_node)
704169689Skan	    	       - (from_stage - SCHED_STAGE (u_node) - 1);
705169689Skan	    i_reg_moves = MAX (i_reg_moves, 0);
706169689Skan	    i_reg_moves = MIN (i_reg_moves, SCHED_NREG_MOVES (u_node));
707169689Skan
708169689Skan	    /* The reg_moves start from the *last* reg_move forwards.  */
709169689Skan	    if (i_reg_moves)
710169689Skan	      {
711169689Skan		reg_move = SCHED_FIRST_REG_MOVE (u_node);
712169689Skan		for (j = 1; j < SCHED_NREG_MOVES (u_node); j++)
713169689Skan		  reg_move = PREV_INSN (reg_move);
714169689Skan	      }
715169689Skan	  }
716169689Skan
717169689Skan	for (j = 0; j < i_reg_moves; j++, reg_move = NEXT_INSN (reg_move))
718169689Skan	  emit_insn (copy_rtx (PATTERN (reg_move)));
719169689Skan	if (SCHED_STAGE (u_node) >= from_stage
720169689Skan	    && SCHED_STAGE (u_node) <= to_stage)
721169689Skan	  duplicate_insn_chain (u_node->first_note, u_node->insn);
722169689Skan      }
723169689Skan}
724169689Skan
725169689Skan
726169689Skan/* Generate the instructions (including reg_moves) for prolog & epilog.  */
727169689Skanstatic void
728169689Skangenerate_prolog_epilog (partial_schedule_ptr ps, struct loop * loop, rtx count_reg)
729169689Skan{
730169689Skan  int i;
731169689Skan  int last_stage = PS_STAGE_COUNT (ps) - 1;
732169689Skan  edge e;
733169689Skan
734169689Skan  /* Generate the prolog, inserting its insns on the loop-entry edge.  */
735169689Skan  start_sequence ();
736169689Skan
737169689Skan  if (count_reg)
738169689Skan   /* Generate a subtract instruction at the beginning of the prolog to
739169689Skan      adjust the loop count by STAGE_COUNT.  */
740169689Skan   emit_insn (gen_sub2_insn (count_reg, GEN_INT (last_stage)));
741169689Skan
742169689Skan  for (i = 0; i < last_stage; i++)
743169689Skan    duplicate_insns_of_cycles (ps, 0, i, 1);
744169689Skan
745169689Skan  /* Put the prolog ,  on the one and only entry edge.  */
746169689Skan  e = loop_preheader_edge (loop);
747169689Skan  loop_split_edge_with(e , get_insns());
748169689Skan
749169689Skan  end_sequence ();
750169689Skan
751169689Skan  /* Generate the epilog, inserting its insns on the loop-exit edge.  */
752169689Skan  start_sequence ();
753169689Skan
754169689Skan  for (i = 0; i < last_stage; i++)
755169689Skan    duplicate_insns_of_cycles (ps, i + 1, last_stage, 0);
756169689Skan
757169689Skan  /* Put the epilogue on the one and only one exit edge.  */
758169689Skan  gcc_assert (loop->single_exit);
759169689Skan  e = loop->single_exit;
760169689Skan  loop_split_edge_with(e , get_insns());
761169689Skan  end_sequence ();
762169689Skan}
763169689Skan
764169689Skan/* Return the line note insn preceding INSN, for debugging.  Taken from
765169689Skan   emit-rtl.c.  */
766169689Skanstatic rtx
767169689Skanfind_line_note (rtx insn)
768169689Skan{
769169689Skan  for (; insn; insn = PREV_INSN (insn))
770169689Skan    if (NOTE_P (insn)
771169689Skan	&& NOTE_LINE_NUMBER (insn) >= 0)
772169689Skan      break;
773169689Skan
774169689Skan  return insn;
775169689Skan}
776169689Skan
777169689Skan/* Return true if all the BBs of the loop are empty except the
778169689Skan   loop header.  */
779169689Skanstatic bool
780169689Skanloop_single_full_bb_p (struct loop *loop)
781169689Skan{
782169689Skan  unsigned i;
783169689Skan  basic_block *bbs = get_loop_body (loop);
784169689Skan
785169689Skan  for (i = 0; i < loop->num_nodes ; i++)
786169689Skan    {
787169689Skan      rtx head, tail;
788169689Skan      bool empty_bb = true;
789169689Skan
790169689Skan      if (bbs[i] == loop->header)
791169689Skan        continue;
792169689Skan
793169689Skan      /* Make sure that basic blocks other than the header
794169689Skan         have only notes labels or jumps.  */
795169689Skan      get_ebb_head_tail (bbs[i], bbs[i], &head, &tail);
796169689Skan      for (; head != NEXT_INSN (tail); head = NEXT_INSN (head))
797169689Skan        {
798169689Skan          if (NOTE_P (head) || LABEL_P (head)
799169689Skan 	      || (INSN_P (head) && JUMP_P (head)))
800169689Skan 	    continue;
801169689Skan 	  empty_bb = false;
802169689Skan 	  break;
803169689Skan        }
804169689Skan
805169689Skan      if (! empty_bb)
806169689Skan        {
807169689Skan          free (bbs);
808169689Skan          return false;
809169689Skan        }
810169689Skan    }
811169689Skan  free (bbs);
812169689Skan  return true;
813169689Skan}
814169689Skan
815169689Skan/* A simple loop from SMS point of view; it is a loop that is composed of
816169689Skan   either a single basic block or two BBs - a header and a latch.  */
817169689Skan#define SIMPLE_SMS_LOOP_P(loop) ((loop->num_nodes < 3 ) 		    \
818169689Skan				  && (EDGE_COUNT (loop->latch->preds) == 1) \
819169689Skan                                  && (EDGE_COUNT (loop->latch->succs) == 1))
820169689Skan
821169689Skan/* Return true if the loop is in its canonical form and false if not.
822169689Skan   i.e. SIMPLE_SMS_LOOP_P and have one preheader block, and single exit.  */
823169689Skanstatic bool
824169689Skanloop_canon_p (struct loop *loop)
825169689Skan{
826169689Skan
827169689Skan  if (loop->inner || ! loop->outer)
828169689Skan    return false;
829169689Skan
830169689Skan  if (!loop->single_exit)
831169689Skan    {
832169689Skan      if (dump_file)
833169689Skan	{
834169689Skan	  rtx line_note = find_line_note (BB_END (loop->header));
835169689Skan
836169689Skan	  fprintf (dump_file, "SMS loop many exits ");
837169689Skan	  if (line_note)
838169689Skan	    {
839169689Skan	      expanded_location xloc;
840169689Skan	      NOTE_EXPANDED_LOCATION (xloc, line_note);
841169689Skan	      fprintf (dump_file, " %s %d (file, line)\n",
842169689Skan		       xloc.file, xloc.line);
843169689Skan	    }
844169689Skan	}
845169689Skan      return false;
846169689Skan    }
847169689Skan
848169689Skan  if (! SIMPLE_SMS_LOOP_P (loop) && ! loop_single_full_bb_p (loop))
849169689Skan    {
850169689Skan      if (dump_file)
851169689Skan	{
852169689Skan	  rtx line_note = find_line_note (BB_END (loop->header));
853169689Skan
854169689Skan	  fprintf (dump_file, "SMS loop many BBs. ");
855169689Skan	  if (line_note)
856169689Skan	    {
857169689Skan	      expanded_location xloc;
858169689Skan  	      NOTE_EXPANDED_LOCATION (xloc, line_note);
859169689Skan	      fprintf (dump_file, " %s %d (file, line)\n",
860169689Skan		       xloc.file, xloc.line);
861169689Skan	    }
862169689Skan	}
863169689Skan      return false;
864169689Skan    }
865169689Skan
866169689Skan    return true;
867169689Skan}
868169689Skan
869169689Skan/* If there are more than one entry for the loop,
870169689Skan   make it one by splitting the first entry edge and
871169689Skan   redirecting the others to the new BB.  */
872169689Skanstatic void
873169689Skancanon_loop (struct loop *loop)
874169689Skan{
875169689Skan  edge e;
876169689Skan  edge_iterator i;
877169689Skan
878169689Skan  /* Avoid annoying special cases of edges going to exit
879169689Skan     block.  */
880169689Skan  FOR_EACH_EDGE (e, i, EXIT_BLOCK_PTR->preds)
881169689Skan    if ((e->flags & EDGE_FALLTHRU) && (EDGE_COUNT (e->src->succs) > 1))
882169689Skan      loop_split_edge_with (e, NULL_RTX);
883169689Skan
884169689Skan  if (loop->latch == loop->header
885169689Skan      || EDGE_COUNT (loop->latch->succs) > 1)
886169689Skan    {
887169689Skan      FOR_EACH_EDGE (e, i, loop->header->preds)
888169689Skan        if (e->src == loop->latch)
889169689Skan          break;
890169689Skan      loop_split_edge_with (e, NULL_RTX);
891169689Skan    }
892169689Skan}
893169689Skan
894169689Skan/* Main entry point, perform SMS scheduling on the loops of the function
895169689Skan   that consist of single basic blocks.  */
896169689Skanstatic void
897169689Skansms_schedule (void)
898169689Skan{
899169689Skan  static int passes = 0;
900169689Skan  rtx insn;
901169689Skan  ddg_ptr *g_arr, g;
902169689Skan  int * node_order;
903169689Skan  int maxii;
904169689Skan  unsigned i,num_loops;
905169689Skan  partial_schedule_ptr ps;
906169689Skan  struct df *df;
907169689Skan  struct loops *loops;
908169689Skan  basic_block bb = NULL;
909169689Skan  /* vars to the versioning only if needed*/
910169689Skan  struct loop * nloop;
911169689Skan  basic_block condition_bb = NULL;
912169689Skan  edge latch_edge;
913169689Skan  gcov_type trip_count = 0;
914169689Skan
915169689Skan  loops = loop_optimizer_init (LOOPS_HAVE_PREHEADERS
916169689Skan			       | LOOPS_HAVE_MARKED_SINGLE_EXITS);
917169689Skan  if (!loops)
918169689Skan    return;  /* There is no loops to schedule.  */
919169689Skan
920169689Skan  /* Initialize issue_rate.  */
921169689Skan  if (targetm.sched.issue_rate)
922169689Skan    {
923169689Skan      int temp = reload_completed;
924169689Skan
925169689Skan      reload_completed = 1;
926169689Skan      issue_rate = targetm.sched.issue_rate ();
927169689Skan      reload_completed = temp;
928169689Skan    }
929169689Skan  else
930169689Skan    issue_rate = 1;
931169689Skan
932169689Skan  /* Initialize the scheduler.  */
933169689Skan  current_sched_info = &sms_sched_info;
934169689Skan  sched_init ();
935169689Skan
936169689Skan  /* Init Data Flow analysis, to be used in interloop dep calculation.  */
937169689Skan  df = df_init (DF_HARD_REGS | DF_EQUIV_NOTES | DF_SUBREGS);
938169689Skan  df_rd_add_problem (df, 0);
939169689Skan  df_ru_add_problem (df, 0);
940169689Skan  df_chain_add_problem (df, DF_DU_CHAIN | DF_UD_CHAIN);
941169689Skan  df_analyze (df);
942169689Skan
943169689Skan  if (dump_file)
944169689Skan    df_dump (df, dump_file);
945169689Skan
946169689Skan  /* Allocate memory to hold the DDG array one entry for each loop.
947169689Skan     We use loop->num as index into this array.  */
948169689Skan  g_arr = XCNEWVEC (ddg_ptr, loops->num);
949169689Skan
950169689Skan
951169689Skan  /* Build DDGs for all the relevant loops and hold them in G_ARR
952169689Skan     indexed by the loop index.  */
953169689Skan  for (i = 0; i < loops->num; i++)
954169689Skan    {
955169689Skan      rtx head, tail;
956169689Skan      rtx count_reg;
957169689Skan      struct loop *loop = loops->parray[i];
958169689Skan
959169689Skan      /* For debugging.  */
960169689Skan      if ((passes++ > MAX_SMS_LOOP_NUMBER) && (MAX_SMS_LOOP_NUMBER != -1))
961169689Skan        {
962169689Skan          if (dump_file)
963169689Skan            fprintf (dump_file, "SMS reached MAX_PASSES... \n");
964169689Skan
965169689Skan          break;
966169689Skan        }
967169689Skan
968169689Skan      if (! loop_canon_p (loop))
969169689Skan        continue;
970169689Skan
971169689Skan      if (! loop_single_full_bb_p (loop))
972169689Skan	continue;
973169689Skan
974169689Skan      bb = loop->header;
975169689Skan
976169689Skan      get_ebb_head_tail (bb, bb, &head, &tail);
977169689Skan      latch_edge = loop_latch_edge (loop);
978169689Skan      gcc_assert (loop->single_exit);
979169689Skan      if (loop->single_exit->count)
980169689Skan	trip_count = latch_edge->count / loop->single_exit->count;
981169689Skan
982169689Skan      /* Perfrom SMS only on loops that their average count is above threshold.  */
983169689Skan
984169689Skan      if ( latch_edge->count
985169689Skan          && (latch_edge->count < loop->single_exit->count * SMS_LOOP_AVERAGE_COUNT_THRESHOLD))
986169689Skan	{
987169689Skan	  if (dump_file)
988169689Skan	    {
989169689Skan	      rtx line_note = find_line_note (tail);
990169689Skan
991169689Skan	      if (line_note)
992169689Skan		{
993169689Skan		  expanded_location xloc;
994169689Skan		  NOTE_EXPANDED_LOCATION (xloc, line_note);
995169689Skan		  fprintf (dump_file, "SMS bb %s %d (file, line)\n",
996169689Skan			   xloc.file, xloc.line);
997169689Skan		}
998169689Skan	      fprintf (dump_file, "SMS single-bb-loop\n");
999169689Skan	      if (profile_info && flag_branch_probabilities)
1000169689Skan	    	{
1001169689Skan	      	  fprintf (dump_file, "SMS loop-count ");
1002169689Skan	      	  fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1003169689Skan	             	   (HOST_WIDEST_INT) bb->count);
1004169689Skan	      	  fprintf (dump_file, "\n");
1005169689Skan                  fprintf (dump_file, "SMS trip-count ");
1006169689Skan                  fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1007169689Skan                           (HOST_WIDEST_INT) trip_count);
1008169689Skan                  fprintf (dump_file, "\n");
1009169689Skan	      	  fprintf (dump_file, "SMS profile-sum-max ");
1010169689Skan	      	  fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1011169689Skan	          	   (HOST_WIDEST_INT) profile_info->sum_max);
1012169689Skan	      	  fprintf (dump_file, "\n");
1013169689Skan	    	}
1014169689Skan	    }
1015169689Skan          continue;
1016169689Skan        }
1017169689Skan
1018169689Skan      /* Make sure this is a doloop.  */
1019169689Skan      if ( !(count_reg = doloop_register_get (tail)))
1020169689Skan	continue;
1021169689Skan
1022169689Skan      /* Don't handle BBs with calls or barriers, or !single_set insns.  */
1023169689Skan      for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn))
1024169689Skan	if (CALL_P (insn)
1025169689Skan	    || BARRIER_P (insn)
1026169689Skan	    || (INSN_P (insn) && !JUMP_P (insn)
1027169689Skan		&& !single_set (insn) && GET_CODE (PATTERN (insn)) != USE))
1028169689Skan	  break;
1029169689Skan
1030169689Skan      if (insn != NEXT_INSN (tail))
1031169689Skan	{
1032169689Skan	  if (dump_file)
1033169689Skan	    {
1034169689Skan	      if (CALL_P (insn))
1035169689Skan		fprintf (dump_file, "SMS loop-with-call\n");
1036169689Skan	      else if (BARRIER_P (insn))
1037169689Skan		fprintf (dump_file, "SMS loop-with-barrier\n");
1038169689Skan	      else
1039169689Skan		fprintf (dump_file, "SMS loop-with-not-single-set\n");
1040169689Skan	      print_rtl_single (dump_file, insn);
1041169689Skan	    }
1042169689Skan
1043169689Skan	  continue;
1044169689Skan	}
1045169689Skan
1046169689Skan      if (! (g = create_ddg (bb, df, 0)))
1047169689Skan        {
1048169689Skan          if (dump_file)
1049169689Skan	    fprintf (dump_file, "SMS doloop\n");
1050169689Skan	  continue;
1051169689Skan        }
1052169689Skan
1053169689Skan      g_arr[i] = g;
1054169689Skan    }
1055169689Skan
1056169689Skan  /* Release Data Flow analysis data structures.  */
1057169689Skan  df_finish (df);
1058169689Skan  df = NULL;
1059169689Skan
1060169689Skan  /* We don't want to perform SMS on new loops - created by versioning.  */
1061169689Skan  num_loops = loops->num;
1062169689Skan  /* Go over the built DDGs and perfrom SMS for each one of them.  */
1063169689Skan  for (i = 0; i < num_loops; i++)
1064169689Skan    {
1065169689Skan      rtx head, tail;
1066169689Skan      rtx count_reg, count_init;
1067169689Skan      int mii, rec_mii;
1068169689Skan      unsigned stage_count = 0;
1069169689Skan      HOST_WIDEST_INT loop_count = 0;
1070169689Skan      struct loop *loop = loops->parray[i];
1071169689Skan
1072169689Skan      if (! (g = g_arr[i]))
1073169689Skan        continue;
1074169689Skan
1075169689Skan      if (dump_file)
1076169689Skan	print_ddg (dump_file, g);
1077169689Skan
1078169689Skan      get_ebb_head_tail (loop->header, loop->header, &head, &tail);
1079169689Skan
1080169689Skan      latch_edge = loop_latch_edge (loop);
1081169689Skan      gcc_assert (loop->single_exit);
1082169689Skan      if (loop->single_exit->count)
1083169689Skan	trip_count = latch_edge->count / loop->single_exit->count;
1084169689Skan
1085169689Skan      if (dump_file)
1086169689Skan	{
1087169689Skan	  rtx line_note = find_line_note (tail);
1088169689Skan
1089169689Skan	  if (line_note)
1090169689Skan	    {
1091169689Skan	      expanded_location xloc;
1092169689Skan	      NOTE_EXPANDED_LOCATION (xloc, line_note);
1093169689Skan	      fprintf (dump_file, "SMS bb %s %d (file, line)\n",
1094169689Skan		       xloc.file, xloc.line);
1095169689Skan	    }
1096169689Skan	  fprintf (dump_file, "SMS single-bb-loop\n");
1097169689Skan	  if (profile_info && flag_branch_probabilities)
1098169689Skan	    {
1099169689Skan	      fprintf (dump_file, "SMS loop-count ");
1100169689Skan	      fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1101169689Skan	               (HOST_WIDEST_INT) bb->count);
1102169689Skan	      fprintf (dump_file, "\n");
1103169689Skan	      fprintf (dump_file, "SMS profile-sum-max ");
1104169689Skan	      fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1105169689Skan	               (HOST_WIDEST_INT) profile_info->sum_max);
1106169689Skan	      fprintf (dump_file, "\n");
1107169689Skan	    }
1108169689Skan	  fprintf (dump_file, "SMS doloop\n");
1109169689Skan	  fprintf (dump_file, "SMS built-ddg %d\n", g->num_nodes);
1110169689Skan          fprintf (dump_file, "SMS num-loads %d\n", g->num_loads);
1111169689Skan          fprintf (dump_file, "SMS num-stores %d\n", g->num_stores);
1112169689Skan	}
1113169689Skan
1114169689Skan
1115169689Skan      /* In case of th loop have doloop register it gets special
1116169689Skan	 handling.  */
1117169689Skan      count_init = NULL_RTX;
1118169689Skan      if ((count_reg = doloop_register_get (tail)))
1119169689Skan	{
1120169689Skan	  basic_block pre_header;
1121169689Skan
1122169689Skan	  pre_header = loop_preheader_edge (loop)->src;
1123169689Skan	  count_init = const_iteration_count (count_reg, pre_header,
1124169689Skan					      &loop_count);
1125169689Skan	}
1126169689Skan      gcc_assert (count_reg);
1127169689Skan
1128169689Skan      if (dump_file && count_init)
1129169689Skan        {
1130169689Skan          fprintf (dump_file, "SMS const-doloop ");
1131169689Skan          fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1132169689Skan		     loop_count);
1133169689Skan          fprintf (dump_file, "\n");
1134169689Skan        }
1135169689Skan
1136169689Skan      node_order = XNEWVEC (int, g->num_nodes);
1137169689Skan
1138169689Skan      mii = 1; /* Need to pass some estimate of mii.  */
1139169689Skan      rec_mii = sms_order_nodes (g, mii, node_order);
1140169689Skan      mii = MAX (res_MII (g), rec_mii);
1141169689Skan      maxii = (calculate_maxii (g) * SMS_MAX_II_FACTOR) / 100;
1142169689Skan
1143169689Skan      if (dump_file)
1144169689Skan	fprintf (dump_file, "SMS iis %d %d %d (rec_mii, mii, maxii)\n",
1145169689Skan		 rec_mii, mii, maxii);
1146169689Skan
1147169689Skan      /* After sms_order_nodes and before sms_schedule_by_order, to copy over
1148169689Skan	 ASAP.  */
1149169689Skan      set_node_sched_params (g);
1150169689Skan
1151169689Skan      ps = sms_schedule_by_order (g, mii, maxii, node_order);
1152169689Skan
1153169689Skan      if (ps)
1154169689Skan	stage_count = PS_STAGE_COUNT (ps);
1155169689Skan
1156169689Skan      /* Stage count of 1 means that there is no interleaving between
1157169689Skan         iterations, let the scheduling passes do the job.  */
1158169689Skan      if (stage_count < 1
1159169689Skan	  || (count_init && (loop_count <= stage_count))
1160169689Skan	  || (flag_branch_probabilities && (trip_count <= stage_count)))
1161169689Skan	{
1162169689Skan	  if (dump_file)
1163169689Skan	    {
1164169689Skan	      fprintf (dump_file, "SMS failed... \n");
1165169689Skan	      fprintf (dump_file, "SMS sched-failed (stage-count=%d, loop-count=", stage_count);
1166169689Skan	      fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, loop_count);
1167169689Skan	      fprintf (dump_file, ", trip-count=");
1168169689Skan	      fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, trip_count);
1169169689Skan	      fprintf (dump_file, ")\n");
1170169689Skan	    }
1171169689Skan	  continue;
1172169689Skan	}
1173169689Skan      else
1174169689Skan	{
1175169689Skan	  int orig_cycles = kernel_number_of_cycles (BB_HEAD (g->bb), BB_END (g->bb));
1176169689Skan	  int new_cycles;
1177169689Skan	  struct undo_replace_buff_elem *reg_move_replaces;
1178169689Skan
1179169689Skan	  if (dump_file)
1180169689Skan	    {
1181169689Skan	      fprintf (dump_file,
1182169689Skan		       "SMS succeeded %d %d (with ii, sc)\n", ps->ii,
1183169689Skan		       stage_count);
1184169689Skan	      print_partial_schedule (ps, dump_file);
1185169689Skan	      fprintf (dump_file,
1186169689Skan		       "SMS Branch (%d) will later be scheduled at cycle %d.\n",
1187169689Skan		       g->closing_branch->cuid, PS_MIN_CYCLE (ps) - 1);
1188169689Skan	    }
1189169689Skan
1190169689Skan	  /* Set the stage boundaries.  If the DDG is built with closing_branch_deps,
1191169689Skan	     the closing_branch was scheduled and should appear in the last (ii-1)
1192169689Skan	     row.  Otherwise, we are free to schedule the branch, and we let nodes
1193169689Skan	     that were scheduled at the first PS_MIN_CYCLE cycle appear in the first
1194169689Skan	     row; this should reduce stage_count to minimum.  */
1195169689Skan	  normalize_sched_times (ps);
1196169689Skan	  rotate_partial_schedule (ps, PS_MIN_CYCLE (ps));
1197169689Skan	  set_columns_for_ps (ps);
1198169689Skan
1199169689Skan	  /* Generate the kernel just to be able to measure its cycles.  */
1200169689Skan	  permute_partial_schedule (ps, g->closing_branch->first_note);
1201169689Skan	  reg_move_replaces = generate_reg_moves (ps);
1202169689Skan
1203169689Skan	  /* Get the number of cycles the new kernel expect to execute in.  */
1204169689Skan	  new_cycles = kernel_number_of_cycles (BB_HEAD (g->bb), BB_END (g->bb));
1205169689Skan
1206169689Skan	  /* Get back to the original loop so we can do loop versioning.  */
1207169689Skan	  undo_permute_partial_schedule (ps, g->closing_branch->first_note);
1208169689Skan	  if (reg_move_replaces)
1209169689Skan	    undo_generate_reg_moves (ps, reg_move_replaces);
1210169689Skan
1211169689Skan	  if ( new_cycles >= orig_cycles)
1212169689Skan	    {
1213169689Skan	      /* SMS is not profitable so undo the permutation and reg move generation
1214169689Skan	         and return the kernel to its original state.  */
1215169689Skan	      if (dump_file)
1216169689Skan		fprintf (dump_file, "Undoing SMS because it is not profitable.\n");
1217169689Skan
1218169689Skan	    }
1219169689Skan	  else
1220169689Skan	    {
1221169689Skan	      canon_loop (loop);
1222169689Skan
1223169689Skan              /* case the BCT count is not known , Do loop-versioning */
1224169689Skan	      if (count_reg && ! count_init)
1225169689Skan		{
1226169689Skan		  rtx comp_rtx = gen_rtx_fmt_ee (GT, VOIDmode, count_reg,
1227169689Skan						 GEN_INT(stage_count));
1228169689Skan
1229169689Skan		  nloop = loop_version (loops, loop, comp_rtx, &condition_bb,
1230169689Skan					true);
1231169689Skan		}
1232169689Skan
1233169689Skan	      /* Set new iteration count of loop kernel.  */
1234169689Skan              if (count_reg && count_init)
1235169689Skan		SET_SRC (single_set (count_init)) = GEN_INT (loop_count
1236169689Skan							     - stage_count + 1);
1237169689Skan
1238169689Skan	      /* Now apply the scheduled kernel to the RTL of the loop.  */
1239169689Skan	      permute_partial_schedule (ps, g->closing_branch->first_note);
1240169689Skan
1241169689Skan              /* Mark this loop as software pipelined so the later
1242169689Skan	      scheduling passes doesn't touch it.  */
1243169689Skan	      if (! flag_resched_modulo_sched)
1244169689Skan		g->bb->flags |= BB_DISABLE_SCHEDULE;
1245169689Skan	      /* The life-info is not valid any more.  */
1246169689Skan	      g->bb->flags |= BB_DIRTY;
1247169689Skan
1248169689Skan	      reg_move_replaces = generate_reg_moves (ps);
1249169689Skan	      if (dump_file)
1250169689Skan		print_node_sched_params (dump_file, g->num_nodes);
1251169689Skan	      /* Generate prolog and epilog.  */
1252169689Skan	      if (count_reg && !count_init)
1253169689Skan		generate_prolog_epilog (ps, loop, count_reg);
1254169689Skan	      else
1255169689Skan	 	generate_prolog_epilog (ps, loop, NULL_RTX);
1256169689Skan	    }
1257169689Skan	  free_undo_replace_buff (reg_move_replaces);
1258169689Skan	}
1259169689Skan
1260169689Skan      free_partial_schedule (ps);
1261169689Skan      free (node_sched_params);
1262169689Skan      free (node_order);
1263169689Skan      free_ddg (g);
1264169689Skan    }
1265169689Skan
1266169689Skan  free (g_arr);
1267169689Skan
1268169689Skan  /* Release scheduler data, needed until now because of DFA.  */
1269169689Skan  sched_finish ();
1270169689Skan  loop_optimizer_finalize (loops);
1271169689Skan}
1272169689Skan
1273169689Skan/* The SMS scheduling algorithm itself
1274169689Skan   -----------------------------------
1275169689Skan   Input: 'O' an ordered list of insns of a loop.
1276169689Skan   Output: A scheduling of the loop - kernel, prolog, and epilogue.
1277169689Skan
1278169689Skan   'Q' is the empty Set
1279169689Skan   'PS' is the partial schedule; it holds the currently scheduled nodes with
1280169689Skan	their cycle/slot.
1281169689Skan   'PSP' previously scheduled predecessors.
1282169689Skan   'PSS' previously scheduled successors.
1283169689Skan   't(u)' the cycle where u is scheduled.
1284169689Skan   'l(u)' is the latency of u.
1285169689Skan   'd(v,u)' is the dependence distance from v to u.
1286169689Skan   'ASAP(u)' the earliest time at which u could be scheduled as computed in
1287169689Skan	     the node ordering phase.
1288169689Skan   'check_hardware_resources_conflicts(u, PS, c)'
1289169689Skan			     run a trace around cycle/slot through DFA model
1290169689Skan			     to check resource conflicts involving instruction u
1291169689Skan			     at cycle c given the partial schedule PS.
1292169689Skan   'add_to_partial_schedule_at_time(u, PS, c)'
1293169689Skan			     Add the node/instruction u to the partial schedule
1294169689Skan			     PS at time c.
1295169689Skan   'calculate_register_pressure(PS)'
1296169689Skan			     Given a schedule of instructions, calculate the register
1297169689Skan			     pressure it implies.  One implementation could be the
1298169689Skan			     maximum number of overlapping live ranges.
1299169689Skan   'maxRP' The maximum allowed register pressure, it is usually derived from the number
1300169689Skan	   registers available in the hardware.
1301169689Skan
1302169689Skan   1. II = MII.
1303169689Skan   2. PS = empty list
1304169689Skan   3. for each node u in O in pre-computed order
1305169689Skan   4.   if (PSP(u) != Q && PSS(u) == Q) then
1306169689Skan   5.     Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
1307169689Skan   6.     start = Early_start; end = Early_start + II - 1; step = 1
1308169689Skan   11.  else if (PSP(u) == Q && PSS(u) != Q) then
1309169689Skan   12.      Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
1310169689Skan   13.     start = Late_start; end = Late_start - II + 1; step = -1
1311169689Skan   14.  else if (PSP(u) != Q && PSS(u) != Q) then
1312169689Skan   15.     Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
1313169689Skan   16.     Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
1314169689Skan   17.     start = Early_start;
1315169689Skan   18.     end = min(Early_start + II - 1 , Late_start);
1316169689Skan   19.     step = 1
1317169689Skan   20.     else "if (PSP(u) == Q && PSS(u) == Q)"
1318169689Skan   21.	  start = ASAP(u); end = start + II - 1; step = 1
1319169689Skan   22.  endif
1320169689Skan
1321169689Skan   23.  success = false
1322169689Skan   24.  for (c = start ; c != end ; c += step)
1323169689Skan   25.     if check_hardware_resources_conflicts(u, PS, c) then
1324169689Skan   26.       add_to_partial_schedule_at_time(u, PS, c)
1325169689Skan   27.       success = true
1326169689Skan   28.       break
1327169689Skan   29.     endif
1328169689Skan   30.  endfor
1329169689Skan   31.  if (success == false) then
1330169689Skan   32.    II = II + 1
1331169689Skan   33.    if (II > maxII) then
1332169689Skan   34.       finish - failed to schedule
1333169689Skan   35.	 endif
1334169689Skan   36.    goto 2.
1335169689Skan   37.  endif
1336169689Skan   38. endfor
1337169689Skan   39. if (calculate_register_pressure(PS) > maxRP) then
1338169689Skan   40.    goto 32.
1339169689Skan   41. endif
1340169689Skan   42. compute epilogue & prologue
1341169689Skan   43. finish - succeeded to schedule
1342169689Skan*/
1343169689Skan
1344169689Skan/* A limit on the number of cycles that resource conflicts can span.  ??? Should
1345169689Skan   be provided by DFA, and be dependent on the type of insn scheduled.  Currently
1346169689Skan   set to 0 to save compile time.  */
1347169689Skan#define DFA_HISTORY SMS_DFA_HISTORY
1348169689Skan
1349169689Skan/* Given the partial schedule PS, this function calculates and returns the
1350169689Skan   cycles in which we can schedule the node with the given index I.
1351169689Skan   NOTE: Here we do the backtracking in SMS, in some special cases. We have
1352169689Skan   noticed that there are several cases in which we fail    to SMS the loop
1353169689Skan   because the sched window of a node is empty    due to tight data-deps. In
1354169689Skan   such cases we want to unschedule    some of the predecessors/successors
1355169689Skan   until we get non-empty    scheduling window.  It returns -1 if the
1356169689Skan   scheduling window is empty and zero otherwise.  */
1357169689Skan
1358169689Skanstatic int
1359169689Skanget_sched_window (partial_schedule_ptr ps, int *nodes_order, int i,
1360169689Skan		  sbitmap sched_nodes, int ii, int *start_p, int *step_p, int *end_p)
1361169689Skan{
1362169689Skan  int start, step, end;
1363169689Skan  ddg_edge_ptr e;
1364169689Skan  int u = nodes_order [i];
1365169689Skan  ddg_node_ptr u_node = &ps->g->nodes[u];
1366169689Skan  sbitmap psp = sbitmap_alloc (ps->g->num_nodes);
1367169689Skan  sbitmap pss = sbitmap_alloc (ps->g->num_nodes);
1368169689Skan  sbitmap u_node_preds = NODE_PREDECESSORS (u_node);
1369169689Skan  sbitmap u_node_succs = NODE_SUCCESSORS (u_node);
1370169689Skan  int psp_not_empty;
1371169689Skan  int pss_not_empty;
1372169689Skan
1373169689Skan  /* 1. compute sched window for u (start, end, step).  */
1374169689Skan  sbitmap_zero (psp);
1375169689Skan  sbitmap_zero (pss);
1376169689Skan  psp_not_empty = sbitmap_a_and_b_cg (psp, u_node_preds, sched_nodes);
1377169689Skan  pss_not_empty = sbitmap_a_and_b_cg (pss, u_node_succs, sched_nodes);
1378169689Skan
1379169689Skan  if (psp_not_empty && !pss_not_empty)
1380169689Skan    {
1381169689Skan      int early_start = INT_MIN;
1382169689Skan
1383169689Skan      end = INT_MAX;
1384169689Skan      for (e = u_node->in; e != 0; e = e->next_in)
1385169689Skan	{
1386169689Skan	  ddg_node_ptr v_node = e->src;
1387169689Skan	  if (TEST_BIT (sched_nodes, v_node->cuid))
1388169689Skan	    {
1389169689Skan	      int node_st = SCHED_TIME (v_node)
1390169689Skan	      		    + e->latency - (e->distance * ii);
1391169689Skan
1392169689Skan	      early_start = MAX (early_start, node_st);
1393169689Skan
1394169689Skan	      if (e->data_type == MEM_DEP)
1395169689Skan		end = MIN (end, SCHED_TIME (v_node) + ii - 1);
1396169689Skan	    }
1397169689Skan	}
1398169689Skan      start = early_start;
1399169689Skan      end = MIN (end, early_start + ii);
1400169689Skan      step = 1;
1401169689Skan    }
1402169689Skan
1403169689Skan  else if (!psp_not_empty && pss_not_empty)
1404169689Skan    {
1405169689Skan      int late_start = INT_MAX;
1406169689Skan
1407169689Skan      end = INT_MIN;
1408169689Skan      for (e = u_node->out; e != 0; e = e->next_out)
1409169689Skan	{
1410169689Skan	  ddg_node_ptr v_node = e->dest;
1411169689Skan	  if (TEST_BIT (sched_nodes, v_node->cuid))
1412169689Skan	    {
1413169689Skan	      late_start = MIN (late_start,
1414169689Skan				SCHED_TIME (v_node) - e->latency
1415169689Skan				+ (e->distance * ii));
1416169689Skan	      if (e->data_type == MEM_DEP)
1417169689Skan		end = MAX (end, SCHED_TIME (v_node) - ii + 1);
1418169689Skan	    }
1419169689Skan	}
1420169689Skan      start = late_start;
1421169689Skan      end = MAX (end, late_start - ii);
1422169689Skan      step = -1;
1423169689Skan    }
1424169689Skan
1425169689Skan  else if (psp_not_empty && pss_not_empty)
1426169689Skan    {
1427169689Skan      int early_start = INT_MIN;
1428169689Skan      int late_start = INT_MAX;
1429169689Skan
1430169689Skan      start = INT_MIN;
1431169689Skan      end = INT_MAX;
1432169689Skan      for (e = u_node->in; e != 0; e = e->next_in)
1433169689Skan	{
1434169689Skan	  ddg_node_ptr v_node = e->src;
1435169689Skan
1436169689Skan	  if (TEST_BIT (sched_nodes, v_node->cuid))
1437169689Skan	    {
1438169689Skan	      early_start = MAX (early_start,
1439169689Skan				 SCHED_TIME (v_node) + e->latency
1440169689Skan				 - (e->distance * ii));
1441169689Skan	      if (e->data_type == MEM_DEP)
1442169689Skan		end = MIN (end, SCHED_TIME (v_node) + ii - 1);
1443169689Skan	    }
1444169689Skan	}
1445169689Skan      for (e = u_node->out; e != 0; e = e->next_out)
1446169689Skan	{
1447169689Skan	  ddg_node_ptr v_node = e->dest;
1448169689Skan
1449169689Skan	  if (TEST_BIT (sched_nodes, v_node->cuid))
1450169689Skan	    {
1451169689Skan	      late_start = MIN (late_start,
1452169689Skan				SCHED_TIME (v_node) - e->latency
1453169689Skan				+ (e->distance * ii));
1454169689Skan	      if (e->data_type == MEM_DEP)
1455169689Skan		start = MAX (start, SCHED_TIME (v_node) - ii + 1);
1456169689Skan	    }
1457169689Skan	}
1458169689Skan      start = MAX (start, early_start);
1459169689Skan      end = MIN (end, MIN (early_start + ii, late_start + 1));
1460169689Skan      step = 1;
1461169689Skan    }
1462169689Skan  else /* psp is empty && pss is empty.  */
1463169689Skan    {
1464169689Skan      start = SCHED_ASAP (u_node);
1465169689Skan      end = start + ii;
1466169689Skan      step = 1;
1467169689Skan    }
1468169689Skan
1469169689Skan  *start_p = start;
1470169689Skan  *step_p = step;
1471169689Skan  *end_p = end;
1472169689Skan  sbitmap_free (psp);
1473169689Skan  sbitmap_free (pss);
1474169689Skan
1475169689Skan  if ((start >= end && step == 1) || (start <= end && step == -1))
1476169689Skan    return -1;
1477169689Skan  else
1478169689Skan    return 0;
1479169689Skan}
1480169689Skan
1481169689Skan/* This function implements the scheduling algorithm for SMS according to the
1482169689Skan   above algorithm.  */
1483169689Skanstatic partial_schedule_ptr
1484169689Skansms_schedule_by_order (ddg_ptr g, int mii, int maxii, int *nodes_order)
1485169689Skan{
1486169689Skan  int ii = mii;
1487169689Skan  int i, c, success;
1488169689Skan  int try_again_with_larger_ii = true;
1489169689Skan  int num_nodes = g->num_nodes;
1490169689Skan  ddg_edge_ptr e;
1491169689Skan  int start, end, step; /* Place together into one struct?  */
1492169689Skan  sbitmap sched_nodes = sbitmap_alloc (num_nodes);
1493169689Skan  sbitmap must_precede = sbitmap_alloc (num_nodes);
1494169689Skan  sbitmap must_follow = sbitmap_alloc (num_nodes);
1495169689Skan  sbitmap tobe_scheduled = sbitmap_alloc (num_nodes);
1496169689Skan
1497169689Skan  partial_schedule_ptr ps = create_partial_schedule (ii, g, DFA_HISTORY);
1498169689Skan
1499169689Skan  sbitmap_ones (tobe_scheduled);
1500169689Skan  sbitmap_zero (sched_nodes);
1501169689Skan
1502169689Skan  while ((! sbitmap_equal (tobe_scheduled, sched_nodes)
1503169689Skan	 || try_again_with_larger_ii ) && ii < maxii)
1504169689Skan    {
1505169689Skan      int j;
1506169689Skan      bool unscheduled_nodes = false;
1507169689Skan
1508169689Skan      if (dump_file)
1509169689Skan	fprintf(dump_file, "Starting with ii=%d\n", ii);
1510169689Skan      if (try_again_with_larger_ii)
1511169689Skan	{
1512169689Skan	  try_again_with_larger_ii = false;
1513169689Skan	  sbitmap_zero (sched_nodes);
1514169689Skan	}
1515169689Skan
1516169689Skan      for (i = 0; i < num_nodes; i++)
1517169689Skan	{
1518169689Skan	  int u = nodes_order[i];
1519169689Skan  	  ddg_node_ptr u_node = &ps->g->nodes[u];
1520169689Skan	  rtx insn = u_node->insn;
1521169689Skan
1522169689Skan	  if (!INSN_P (insn))
1523169689Skan	    {
1524169689Skan	      RESET_BIT (tobe_scheduled, u);
1525169689Skan	      continue;
1526169689Skan	    }
1527169689Skan
1528169689Skan	  if (JUMP_P (insn)) /* Closing branch handled later.  */
1529169689Skan	    {
1530169689Skan	      RESET_BIT (tobe_scheduled, u);
1531169689Skan	      continue;
1532169689Skan	    }
1533169689Skan
1534169689Skan	  if (TEST_BIT (sched_nodes, u))
1535169689Skan	    continue;
1536169689Skan
1537169689Skan	  /* Try to get non-empty scheduling window.  */
1538169689Skan	  j = i;
1539169689Skan	  while (get_sched_window (ps, nodes_order, i, sched_nodes, ii, &start, &step, &end) < 0
1540169689Skan		 && j > 0)
1541169689Skan	    {
1542169689Skan	      unscheduled_nodes = true;
1543169689Skan	      if (TEST_BIT (NODE_PREDECESSORS (u_node), nodes_order[j - 1])
1544169689Skan		  || TEST_BIT (NODE_SUCCESSORS (u_node), nodes_order[j - 1]))
1545169689Skan		{
1546169689Skan		  ps_unschedule_node (ps, &ps->g->nodes[nodes_order[j - 1]]);
1547169689Skan		  RESET_BIT (sched_nodes, nodes_order [j - 1]);
1548169689Skan		}
1549169689Skan	      j--;
1550169689Skan	    }
1551169689Skan	  if (j < 0)
1552169689Skan	    {
1553169689Skan	      /* ??? Try backtracking instead of immediately ii++?  */
1554169689Skan	      ii++;
1555169689Skan	      try_again_with_larger_ii = true;
1556169689Skan	      reset_partial_schedule (ps, ii);
1557169689Skan	      break;
1558169689Skan	    }
1559169689Skan	  /* 2. Try scheduling u in window.  */
1560169689Skan	  if (dump_file)
1561169689Skan	    fprintf(dump_file, "Trying to schedule node %d in (%d .. %d) step %d\n",
1562169689Skan		    u, start, end, step);
1563169689Skan
1564169689Skan          /* use must_follow & must_precede bitmaps to determine order
1565169689Skan	     of nodes within the cycle.  */
1566169689Skan          sbitmap_zero (must_precede);
1567169689Skan          sbitmap_zero (must_follow);
1568169689Skan      	  for (e = u_node->in; e != 0; e = e->next_in)
1569169689Skan            if (TEST_BIT (sched_nodes, e->src->cuid)
1570169689Skan	        && e->latency == (ii * e->distance)
1571169689Skan		&& start == SCHED_TIME (e->src))
1572169689Skan             SET_BIT (must_precede, e->src->cuid);
1573169689Skan
1574169689Skan	  for (e = u_node->out; e != 0; e = e->next_out)
1575169689Skan            if (TEST_BIT (sched_nodes, e->dest->cuid)
1576169689Skan	        && e->latency == (ii * e->distance)
1577169689Skan		&& end == SCHED_TIME (e->dest))
1578169689Skan             SET_BIT (must_follow, e->dest->cuid);
1579169689Skan
1580169689Skan	  success = 0;
1581169689Skan	  if ((step > 0 && start < end) ||  (step < 0 && start > end))
1582169689Skan	    for (c = start; c != end; c += step)
1583169689Skan	      {
1584169689Skan		ps_insn_ptr psi;
1585169689Skan
1586169689Skan		psi = ps_add_node_check_conflicts (ps, u_node, c,
1587169689Skan						   must_precede,
1588169689Skan						   must_follow);
1589169689Skan
1590169689Skan  		if (psi)
1591169689Skan		  {
1592169689Skan		    SCHED_TIME (u_node) = c;
1593169689Skan		    SET_BIT (sched_nodes, u);
1594169689Skan		    success = 1;
1595169689Skan		    if (dump_file)
1596169689Skan		      fprintf(dump_file, "Schedule in %d\n", c);
1597169689Skan		    break;
1598169689Skan		  }
1599169689Skan	      }
1600169689Skan	  if (!success)
1601169689Skan	    {
1602169689Skan	      /* ??? Try backtracking instead of immediately ii++?  */
1603169689Skan	      ii++;
1604169689Skan	      try_again_with_larger_ii = true;
1605169689Skan	      reset_partial_schedule (ps, ii);
1606169689Skan	      break;
1607169689Skan	    }
1608169689Skan	  if (unscheduled_nodes)
1609169689Skan	    break;
1610169689Skan
1611169689Skan	  /* ??? If (success), check register pressure estimates.  */
1612169689Skan	} /* Continue with next node.  */
1613169689Skan    } /* While try_again_with_larger_ii.  */
1614169689Skan
1615169689Skan  sbitmap_free (sched_nodes);
1616169689Skan  sbitmap_free (must_precede);
1617169689Skan  sbitmap_free (must_follow);
1618169689Skan  sbitmap_free (tobe_scheduled);
1619169689Skan
1620169689Skan  if (ii >= maxii)
1621169689Skan    {
1622169689Skan      free_partial_schedule (ps);
1623169689Skan      ps = NULL;
1624169689Skan    }
1625169689Skan  return ps;
1626169689Skan}
1627169689Skan
1628169689Skan
1629169689Skan/* This page implements the algorithm for ordering the nodes of a DDG
1630169689Skan   for modulo scheduling, activated through the
1631169689Skan   "int sms_order_nodes (ddg_ptr, int mii, int * result)" API.  */
1632169689Skan
1633169689Skan#define ORDER_PARAMS(x) ((struct node_order_params *) (x)->aux.info)
1634169689Skan#define ASAP(x) (ORDER_PARAMS ((x))->asap)
1635169689Skan#define ALAP(x) (ORDER_PARAMS ((x))->alap)
1636169689Skan#define HEIGHT(x) (ORDER_PARAMS ((x))->height)
1637169689Skan#define MOB(x) (ALAP ((x)) - ASAP ((x)))
1638169689Skan#define DEPTH(x) (ASAP ((x)))
1639169689Skan
1640169689Skantypedef struct node_order_params * nopa;
1641169689Skan
1642169689Skanstatic void order_nodes_of_sccs (ddg_all_sccs_ptr, int * result);
1643169689Skanstatic int order_nodes_in_scc (ddg_ptr, sbitmap, sbitmap, int*, int);
1644169689Skanstatic nopa  calculate_order_params (ddg_ptr, int mii);
1645169689Skanstatic int find_max_asap (ddg_ptr, sbitmap);
1646169689Skanstatic int find_max_hv_min_mob (ddg_ptr, sbitmap);
1647169689Skanstatic int find_max_dv_min_mob (ddg_ptr, sbitmap);
1648169689Skan
1649169689Skanenum sms_direction {BOTTOMUP, TOPDOWN};
1650169689Skan
1651169689Skanstruct node_order_params
1652169689Skan{
1653169689Skan  int asap;
1654169689Skan  int alap;
1655169689Skan  int height;
1656169689Skan};
1657169689Skan
1658169689Skan/* Check if NODE_ORDER contains a permutation of 0 .. NUM_NODES-1.  */
1659169689Skanstatic void
1660169689Skancheck_nodes_order (int *node_order, int num_nodes)
1661169689Skan{
1662169689Skan  int i;
1663169689Skan  sbitmap tmp = sbitmap_alloc (num_nodes);
1664169689Skan
1665169689Skan  sbitmap_zero (tmp);
1666169689Skan
1667169689Skan  for (i = 0; i < num_nodes; i++)
1668169689Skan    {
1669169689Skan      int u = node_order[i];
1670169689Skan
1671169689Skan      gcc_assert (u < num_nodes && u >= 0 && !TEST_BIT (tmp, u));
1672169689Skan
1673169689Skan      SET_BIT (tmp, u);
1674169689Skan    }
1675169689Skan
1676169689Skan  sbitmap_free (tmp);
1677169689Skan}
1678169689Skan
1679169689Skan/* Order the nodes of G for scheduling and pass the result in
1680169689Skan   NODE_ORDER.  Also set aux.count of each node to ASAP.
1681169689Skan   Return the recMII for the given DDG.  */
1682169689Skanstatic int
1683169689Skansms_order_nodes (ddg_ptr g, int mii, int * node_order)
1684169689Skan{
1685169689Skan  int i;
1686169689Skan  int rec_mii = 0;
1687169689Skan  ddg_all_sccs_ptr sccs = create_ddg_all_sccs (g);
1688169689Skan
1689169689Skan  nopa nops = calculate_order_params (g, mii);
1690169689Skan
1691169689Skan  order_nodes_of_sccs (sccs, node_order);
1692169689Skan
1693169689Skan  if (sccs->num_sccs > 0)
1694169689Skan    /* First SCC has the largest recurrence_length.  */
1695169689Skan    rec_mii = sccs->sccs[0]->recurrence_length;
1696169689Skan
1697169689Skan  /* Save ASAP before destroying node_order_params.  */
1698169689Skan  for (i = 0; i < g->num_nodes; i++)
1699169689Skan    {
1700169689Skan      ddg_node_ptr v = &g->nodes[i];
1701169689Skan      v->aux.count = ASAP (v);
1702169689Skan    }
1703169689Skan
1704169689Skan  free (nops);
1705169689Skan  free_ddg_all_sccs (sccs);
1706169689Skan  check_nodes_order (node_order, g->num_nodes);
1707169689Skan
1708169689Skan  return rec_mii;
1709169689Skan}
1710169689Skan
1711169689Skanstatic void
1712169689Skanorder_nodes_of_sccs (ddg_all_sccs_ptr all_sccs, int * node_order)
1713169689Skan{
1714169689Skan  int i, pos = 0;
1715169689Skan  ddg_ptr g = all_sccs->ddg;
1716169689Skan  int num_nodes = g->num_nodes;
1717169689Skan  sbitmap prev_sccs = sbitmap_alloc (num_nodes);
1718169689Skan  sbitmap on_path = sbitmap_alloc (num_nodes);
1719169689Skan  sbitmap tmp = sbitmap_alloc (num_nodes);
1720169689Skan  sbitmap ones = sbitmap_alloc (num_nodes);
1721169689Skan
1722169689Skan  sbitmap_zero (prev_sccs);
1723169689Skan  sbitmap_ones (ones);
1724169689Skan
1725169689Skan  /* Perfrom the node ordering starting from the SCC with the highest recMII.
1726169689Skan     For each SCC order the nodes according to their ASAP/ALAP/HEIGHT etc.  */
1727169689Skan  for (i = 0; i < all_sccs->num_sccs; i++)
1728169689Skan    {
1729169689Skan      ddg_scc_ptr scc = all_sccs->sccs[i];
1730169689Skan
1731169689Skan      /* Add nodes on paths from previous SCCs to the current SCC.  */
1732169689Skan      find_nodes_on_paths (on_path, g, prev_sccs, scc->nodes);
1733169689Skan      sbitmap_a_or_b (tmp, scc->nodes, on_path);
1734169689Skan
1735169689Skan      /* Add nodes on paths from the current SCC to previous SCCs.  */
1736169689Skan      find_nodes_on_paths (on_path, g, scc->nodes, prev_sccs);
1737169689Skan      sbitmap_a_or_b (tmp, tmp, on_path);
1738169689Skan
1739169689Skan      /* Remove nodes of previous SCCs from current extended SCC.  */
1740169689Skan      sbitmap_difference (tmp, tmp, prev_sccs);
1741169689Skan
1742169689Skan      pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos);
1743169689Skan      /* Above call to order_nodes_in_scc updated prev_sccs |= tmp.  */
1744169689Skan    }
1745169689Skan
1746169689Skan  /* Handle the remaining nodes that do not belong to any scc.  Each call
1747169689Skan     to order_nodes_in_scc handles a single connected component.  */
1748169689Skan  while (pos < g->num_nodes)
1749169689Skan    {
1750169689Skan      sbitmap_difference (tmp, ones, prev_sccs);
1751169689Skan      pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos);
1752169689Skan    }
1753169689Skan  sbitmap_free (prev_sccs);
1754169689Skan  sbitmap_free (on_path);
1755169689Skan  sbitmap_free (tmp);
1756169689Skan  sbitmap_free (ones);
1757169689Skan}
1758169689Skan
1759169689Skan/* MII is needed if we consider backarcs (that do not close recursive cycles).  */
1760169689Skanstatic struct node_order_params *
1761169689Skancalculate_order_params (ddg_ptr g, int mii ATTRIBUTE_UNUSED)
1762169689Skan{
1763169689Skan  int u;
1764169689Skan  int max_asap;
1765169689Skan  int num_nodes = g->num_nodes;
1766169689Skan  ddg_edge_ptr e;
1767169689Skan  /* Allocate a place to hold ordering params for each node in the DDG.  */
1768169689Skan  nopa node_order_params_arr;
1769169689Skan
1770169689Skan  /* Initialize of ASAP/ALAP/HEIGHT to zero.  */
1771169689Skan  node_order_params_arr = (nopa) xcalloc (num_nodes,
1772169689Skan					  sizeof (struct node_order_params));
1773169689Skan
1774169689Skan  /* Set the aux pointer of each node to point to its order_params structure.  */
1775169689Skan  for (u = 0; u < num_nodes; u++)
1776169689Skan    g->nodes[u].aux.info = &node_order_params_arr[u];
1777169689Skan
1778169689Skan  /* Disregarding a backarc from each recursive cycle to obtain a DAG,
1779169689Skan     calculate ASAP, ALAP, mobility, distance, and height for each node
1780169689Skan     in the dependence (direct acyclic) graph.  */
1781169689Skan
1782169689Skan  /* We assume that the nodes in the array are in topological order.  */
1783169689Skan
1784169689Skan  max_asap = 0;
1785169689Skan  for (u = 0; u < num_nodes; u++)
1786169689Skan    {
1787169689Skan      ddg_node_ptr u_node = &g->nodes[u];
1788169689Skan
1789169689Skan      ASAP (u_node) = 0;
1790169689Skan      for (e = u_node->in; e; e = e->next_in)
1791169689Skan	if (e->distance == 0)
1792169689Skan	  ASAP (u_node) = MAX (ASAP (u_node),
1793169689Skan			       ASAP (e->src) + e->latency);
1794169689Skan      max_asap = MAX (max_asap, ASAP (u_node));
1795169689Skan    }
1796169689Skan
1797169689Skan  for (u = num_nodes - 1; u > -1; u--)
1798169689Skan    {
1799169689Skan      ddg_node_ptr u_node = &g->nodes[u];
1800169689Skan
1801169689Skan      ALAP (u_node) = max_asap;
1802169689Skan      HEIGHT (u_node) = 0;
1803169689Skan      for (e = u_node->out; e; e = e->next_out)
1804169689Skan	if (e->distance == 0)
1805169689Skan	  {
1806169689Skan	    ALAP (u_node) = MIN (ALAP (u_node),
1807169689Skan				 ALAP (e->dest) - e->latency);
1808169689Skan	    HEIGHT (u_node) = MAX (HEIGHT (u_node),
1809169689Skan				   HEIGHT (e->dest) + e->latency);
1810169689Skan	  }
1811169689Skan    }
1812169689Skan
1813169689Skan  return node_order_params_arr;
1814169689Skan}
1815169689Skan
1816169689Skanstatic int
1817169689Skanfind_max_asap (ddg_ptr g, sbitmap nodes)
1818169689Skan{
1819169689Skan  unsigned int u = 0;
1820169689Skan  int max_asap = -1;
1821169689Skan  int result = -1;
1822169689Skan  sbitmap_iterator sbi;
1823169689Skan
1824169689Skan  EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u, sbi)
1825169689Skan    {
1826169689Skan      ddg_node_ptr u_node = &g->nodes[u];
1827169689Skan
1828169689Skan      if (max_asap < ASAP (u_node))
1829169689Skan	{
1830169689Skan	  max_asap = ASAP (u_node);
1831169689Skan	  result = u;
1832169689Skan	}
1833169689Skan    }
1834169689Skan  return result;
1835169689Skan}
1836169689Skan
1837169689Skanstatic int
1838169689Skanfind_max_hv_min_mob (ddg_ptr g, sbitmap nodes)
1839169689Skan{
1840169689Skan  unsigned int u = 0;
1841169689Skan  int max_hv = -1;
1842169689Skan  int min_mob = INT_MAX;
1843169689Skan  int result = -1;
1844169689Skan  sbitmap_iterator sbi;
1845169689Skan
1846169689Skan  EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u, sbi)
1847169689Skan    {
1848169689Skan      ddg_node_ptr u_node = &g->nodes[u];
1849169689Skan
1850169689Skan      if (max_hv < HEIGHT (u_node))
1851169689Skan	{
1852169689Skan	  max_hv = HEIGHT (u_node);
1853169689Skan	  min_mob = MOB (u_node);
1854169689Skan	  result = u;
1855169689Skan	}
1856169689Skan      else if ((max_hv == HEIGHT (u_node))
1857169689Skan	       && (min_mob > MOB (u_node)))
1858169689Skan	{
1859169689Skan	  min_mob = MOB (u_node);
1860169689Skan	  result = u;
1861169689Skan	}
1862169689Skan    }
1863169689Skan  return result;
1864169689Skan}
1865169689Skan
1866169689Skanstatic int
1867169689Skanfind_max_dv_min_mob (ddg_ptr g, sbitmap nodes)
1868169689Skan{
1869169689Skan  unsigned int u = 0;
1870169689Skan  int max_dv = -1;
1871169689Skan  int min_mob = INT_MAX;
1872169689Skan  int result = -1;
1873169689Skan  sbitmap_iterator sbi;
1874169689Skan
1875169689Skan  EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u, sbi)
1876169689Skan    {
1877169689Skan      ddg_node_ptr u_node = &g->nodes[u];
1878169689Skan
1879169689Skan      if (max_dv < DEPTH (u_node))
1880169689Skan	{
1881169689Skan	  max_dv = DEPTH (u_node);
1882169689Skan	  min_mob = MOB (u_node);
1883169689Skan	  result = u;
1884169689Skan	}
1885169689Skan      else if ((max_dv == DEPTH (u_node))
1886169689Skan	       && (min_mob > MOB (u_node)))
1887169689Skan	{
1888169689Skan	  min_mob = MOB (u_node);
1889169689Skan	  result = u;
1890169689Skan	}
1891169689Skan    }
1892169689Skan  return result;
1893169689Skan}
1894169689Skan
1895169689Skan/* Places the nodes of SCC into the NODE_ORDER array starting
1896169689Skan   at position POS, according to the SMS ordering algorithm.
1897169689Skan   NODES_ORDERED (in&out parameter) holds the bitset of all nodes in
1898169689Skan   the NODE_ORDER array, starting from position zero.  */
1899169689Skanstatic int
1900169689Skanorder_nodes_in_scc (ddg_ptr g, sbitmap nodes_ordered, sbitmap scc,
1901169689Skan		    int * node_order, int pos)
1902169689Skan{
1903169689Skan  enum sms_direction dir;
1904169689Skan  int num_nodes = g->num_nodes;
1905169689Skan  sbitmap workset = sbitmap_alloc (num_nodes);
1906169689Skan  sbitmap tmp = sbitmap_alloc (num_nodes);
1907169689Skan  sbitmap zero_bitmap = sbitmap_alloc (num_nodes);
1908169689Skan  sbitmap predecessors = sbitmap_alloc (num_nodes);
1909169689Skan  sbitmap successors = sbitmap_alloc (num_nodes);
1910169689Skan
1911169689Skan  sbitmap_zero (predecessors);
1912169689Skan  find_predecessors (predecessors, g, nodes_ordered);
1913169689Skan
1914169689Skan  sbitmap_zero (successors);
1915169689Skan  find_successors (successors, g, nodes_ordered);
1916169689Skan
1917169689Skan  sbitmap_zero (tmp);
1918169689Skan  if (sbitmap_a_and_b_cg (tmp, predecessors, scc))
1919169689Skan    {
1920169689Skan      sbitmap_copy (workset, tmp);
1921169689Skan      dir = BOTTOMUP;
1922169689Skan    }
1923169689Skan  else if (sbitmap_a_and_b_cg (tmp, successors, scc))
1924169689Skan    {
1925169689Skan      sbitmap_copy (workset, tmp);
1926169689Skan      dir = TOPDOWN;
1927169689Skan    }
1928169689Skan  else
1929169689Skan    {
1930169689Skan      int u;
1931169689Skan
1932169689Skan      sbitmap_zero (workset);
1933169689Skan      if ((u = find_max_asap (g, scc)) >= 0)
1934169689Skan	SET_BIT (workset, u);
1935169689Skan      dir = BOTTOMUP;
1936169689Skan    }
1937169689Skan
1938169689Skan  sbitmap_zero (zero_bitmap);
1939169689Skan  while (!sbitmap_equal (workset, zero_bitmap))
1940169689Skan    {
1941169689Skan      int v;
1942169689Skan      ddg_node_ptr v_node;
1943169689Skan      sbitmap v_node_preds;
1944169689Skan      sbitmap v_node_succs;
1945169689Skan
1946169689Skan      if (dir == TOPDOWN)
1947169689Skan	{
1948169689Skan	  while (!sbitmap_equal (workset, zero_bitmap))
1949169689Skan	    {
1950169689Skan	      v = find_max_hv_min_mob (g, workset);
1951169689Skan	      v_node = &g->nodes[v];
1952169689Skan	      node_order[pos++] = v;
1953169689Skan	      v_node_succs = NODE_SUCCESSORS (v_node);
1954169689Skan	      sbitmap_a_and_b (tmp, v_node_succs, scc);
1955169689Skan
1956169689Skan	      /* Don't consider the already ordered successors again.  */
1957169689Skan	      sbitmap_difference (tmp, tmp, nodes_ordered);
1958169689Skan	      sbitmap_a_or_b (workset, workset, tmp);
1959169689Skan	      RESET_BIT (workset, v);
1960169689Skan	      SET_BIT (nodes_ordered, v);
1961169689Skan	    }
1962169689Skan	  dir = BOTTOMUP;
1963169689Skan	  sbitmap_zero (predecessors);
1964169689Skan	  find_predecessors (predecessors, g, nodes_ordered);
1965169689Skan	  sbitmap_a_and_b (workset, predecessors, scc);
1966169689Skan	}
1967169689Skan      else
1968169689Skan	{
1969169689Skan	  while (!sbitmap_equal (workset, zero_bitmap))
1970169689Skan	    {
1971169689Skan	      v = find_max_dv_min_mob (g, workset);
1972169689Skan	      v_node = &g->nodes[v];
1973169689Skan	      node_order[pos++] = v;
1974169689Skan	      v_node_preds = NODE_PREDECESSORS (v_node);
1975169689Skan	      sbitmap_a_and_b (tmp, v_node_preds, scc);
1976169689Skan
1977169689Skan	      /* Don't consider the already ordered predecessors again.  */
1978169689Skan	      sbitmap_difference (tmp, tmp, nodes_ordered);
1979169689Skan	      sbitmap_a_or_b (workset, workset, tmp);
1980169689Skan	      RESET_BIT (workset, v);
1981169689Skan	      SET_BIT (nodes_ordered, v);
1982169689Skan	    }
1983169689Skan	  dir = TOPDOWN;
1984169689Skan	  sbitmap_zero (successors);
1985169689Skan	  find_successors (successors, g, nodes_ordered);
1986169689Skan	  sbitmap_a_and_b (workset, successors, scc);
1987169689Skan	}
1988169689Skan    }
1989169689Skan  sbitmap_free (tmp);
1990169689Skan  sbitmap_free (workset);
1991169689Skan  sbitmap_free (zero_bitmap);
1992169689Skan  sbitmap_free (predecessors);
1993169689Skan  sbitmap_free (successors);
1994169689Skan  return pos;
1995169689Skan}
1996169689Skan
1997169689Skan
1998169689Skan/* This page contains functions for manipulating partial-schedules during
1999169689Skan   modulo scheduling.  */
2000169689Skan
2001169689Skan/* Create a partial schedule and allocate a memory to hold II rows.  */
2002169689Skan
2003169689Skanstatic partial_schedule_ptr
2004169689Skancreate_partial_schedule (int ii, ddg_ptr g, int history)
2005169689Skan{
2006169689Skan  partial_schedule_ptr ps = XNEW (struct partial_schedule);
2007169689Skan  ps->rows = (ps_insn_ptr *) xcalloc (ii, sizeof (ps_insn_ptr));
2008169689Skan  ps->ii = ii;
2009169689Skan  ps->history = history;
2010169689Skan  ps->min_cycle = INT_MAX;
2011169689Skan  ps->max_cycle = INT_MIN;
2012169689Skan  ps->g = g;
2013169689Skan
2014169689Skan  return ps;
2015169689Skan}
2016169689Skan
2017169689Skan/* Free the PS_INSNs in rows array of the given partial schedule.
2018169689Skan   ??? Consider caching the PS_INSN's.  */
2019169689Skanstatic void
2020169689Skanfree_ps_insns (partial_schedule_ptr ps)
2021169689Skan{
2022169689Skan  int i;
2023169689Skan
2024169689Skan  for (i = 0; i < ps->ii; i++)
2025169689Skan    {
2026169689Skan      while (ps->rows[i])
2027169689Skan	{
2028169689Skan	  ps_insn_ptr ps_insn = ps->rows[i]->next_in_row;
2029169689Skan
2030169689Skan	  free (ps->rows[i]);
2031169689Skan	  ps->rows[i] = ps_insn;
2032169689Skan	}
2033169689Skan      ps->rows[i] = NULL;
2034169689Skan    }
2035169689Skan}
2036169689Skan
2037169689Skan/* Free all the memory allocated to the partial schedule.  */
2038169689Skan
2039169689Skanstatic void
2040169689Skanfree_partial_schedule (partial_schedule_ptr ps)
2041169689Skan{
2042169689Skan  if (!ps)
2043169689Skan    return;
2044169689Skan  free_ps_insns (ps);
2045169689Skan  free (ps->rows);
2046169689Skan  free (ps);
2047169689Skan}
2048169689Skan
2049169689Skan/* Clear the rows array with its PS_INSNs, and create a new one with
2050169689Skan   NEW_II rows.  */
2051169689Skan
2052169689Skanstatic void
2053169689Skanreset_partial_schedule (partial_schedule_ptr ps, int new_ii)
2054169689Skan{
2055169689Skan  if (!ps)
2056169689Skan    return;
2057169689Skan  free_ps_insns (ps);
2058169689Skan  if (new_ii == ps->ii)
2059169689Skan    return;
2060169689Skan  ps->rows = (ps_insn_ptr *) xrealloc (ps->rows, new_ii
2061169689Skan						 * sizeof (ps_insn_ptr));
2062169689Skan  memset (ps->rows, 0, new_ii * sizeof (ps_insn_ptr));
2063169689Skan  ps->ii = new_ii;
2064169689Skan  ps->min_cycle = INT_MAX;
2065169689Skan  ps->max_cycle = INT_MIN;
2066169689Skan}
2067169689Skan
2068169689Skan/* Prints the partial schedule as an ii rows array, for each rows
2069169689Skan   print the ids of the insns in it.  */
2070169689Skanvoid
2071169689Skanprint_partial_schedule (partial_schedule_ptr ps, FILE *dump)
2072169689Skan{
2073169689Skan  int i;
2074169689Skan
2075169689Skan  for (i = 0; i < ps->ii; i++)
2076169689Skan    {
2077169689Skan      ps_insn_ptr ps_i = ps->rows[i];
2078169689Skan
2079169689Skan      fprintf (dump, "\n[CYCLE %d ]: ", i);
2080169689Skan      while (ps_i)
2081169689Skan	{
2082169689Skan	  fprintf (dump, "%d, ",
2083169689Skan		   INSN_UID (ps_i->node->insn));
2084169689Skan	  ps_i = ps_i->next_in_row;
2085169689Skan	}
2086169689Skan    }
2087169689Skan}
2088169689Skan
2089169689Skan/* Creates an object of PS_INSN and initializes it to the given parameters.  */
2090169689Skanstatic ps_insn_ptr
2091169689Skancreate_ps_insn (ddg_node_ptr node, int rest_count, int cycle)
2092169689Skan{
2093169689Skan  ps_insn_ptr ps_i = XNEW (struct ps_insn);
2094169689Skan
2095169689Skan  ps_i->node = node;
2096169689Skan  ps_i->next_in_row = NULL;
2097169689Skan  ps_i->prev_in_row = NULL;
2098169689Skan  ps_i->row_rest_count = rest_count;
2099169689Skan  ps_i->cycle = cycle;
2100169689Skan
2101169689Skan  return ps_i;
2102169689Skan}
2103169689Skan
2104169689Skan
2105169689Skan/* Removes the given PS_INSN from the partial schedule.  Returns false if the
2106169689Skan   node is not found in the partial schedule, else returns true.  */
2107169689Skanstatic bool
2108169689Skanremove_node_from_ps (partial_schedule_ptr ps, ps_insn_ptr ps_i)
2109169689Skan{
2110169689Skan  int row;
2111169689Skan
2112169689Skan  if (!ps || !ps_i)
2113169689Skan    return false;
2114169689Skan
2115169689Skan  row = SMODULO (ps_i->cycle, ps->ii);
2116169689Skan  if (! ps_i->prev_in_row)
2117169689Skan    {
2118169689Skan      if (ps_i != ps->rows[row])
2119169689Skan	return false;
2120169689Skan
2121169689Skan      ps->rows[row] = ps_i->next_in_row;
2122169689Skan      if (ps->rows[row])
2123169689Skan	ps->rows[row]->prev_in_row = NULL;
2124169689Skan    }
2125169689Skan  else
2126169689Skan    {
2127169689Skan      ps_i->prev_in_row->next_in_row = ps_i->next_in_row;
2128169689Skan      if (ps_i->next_in_row)
2129169689Skan	ps_i->next_in_row->prev_in_row = ps_i->prev_in_row;
2130169689Skan    }
2131169689Skan  free (ps_i);
2132169689Skan  return true;
2133169689Skan}
2134169689Skan
2135169689Skan/* Unlike what literature describes for modulo scheduling (which focuses
2136169689Skan   on VLIW machines) the order of the instructions inside a cycle is
2137169689Skan   important.  Given the bitmaps MUST_FOLLOW and MUST_PRECEDE we know
2138169689Skan   where the current instruction should go relative to the already
2139169689Skan   scheduled instructions in the given cycle.  Go over these
2140169689Skan   instructions and find the first possible column to put it in.  */
2141169689Skanstatic bool
2142169689Skanps_insn_find_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
2143169689Skan		     sbitmap must_precede, sbitmap must_follow)
2144169689Skan{
2145169689Skan  ps_insn_ptr next_ps_i;
2146169689Skan  ps_insn_ptr first_must_follow = NULL;
2147169689Skan  ps_insn_ptr last_must_precede = NULL;
2148169689Skan  int row;
2149169689Skan
2150169689Skan  if (! ps_i)
2151169689Skan    return false;
2152169689Skan
2153169689Skan  row = SMODULO (ps_i->cycle, ps->ii);
2154169689Skan
2155169689Skan  /* Find the first must follow and the last must precede
2156169689Skan     and insert the node immediately after the must precede
2157169689Skan     but make sure that it there is no must follow after it.  */
2158169689Skan  for (next_ps_i = ps->rows[row];
2159169689Skan       next_ps_i;
2160169689Skan       next_ps_i = next_ps_i->next_in_row)
2161169689Skan    {
2162169689Skan      if (TEST_BIT (must_follow, next_ps_i->node->cuid)
2163169689Skan	  && ! first_must_follow)
2164169689Skan        first_must_follow = next_ps_i;
2165169689Skan      if (TEST_BIT (must_precede, next_ps_i->node->cuid))
2166169689Skan        {
2167169689Skan          /* If we have already met a node that must follow, then
2168169689Skan	     there is no possible column.  */
2169169689Skan  	  if (first_must_follow)
2170169689Skan            return false;
2171169689Skan	  else
2172169689Skan            last_must_precede = next_ps_i;
2173169689Skan        }
2174169689Skan    }
2175169689Skan
2176169689Skan  /* Now insert the node after INSERT_AFTER_PSI.  */
2177169689Skan
2178169689Skan  if (! last_must_precede)
2179169689Skan    {
2180169689Skan      ps_i->next_in_row = ps->rows[row];
2181169689Skan      ps_i->prev_in_row = NULL;
2182169689Skan      if (ps_i->next_in_row)
2183169689Skan    	ps_i->next_in_row->prev_in_row = ps_i;
2184169689Skan      ps->rows[row] = ps_i;
2185169689Skan    }
2186169689Skan  else
2187169689Skan    {
2188169689Skan      ps_i->next_in_row = last_must_precede->next_in_row;
2189169689Skan      last_must_precede->next_in_row = ps_i;
2190169689Skan      ps_i->prev_in_row = last_must_precede;
2191169689Skan      if (ps_i->next_in_row)
2192169689Skan        ps_i->next_in_row->prev_in_row = ps_i;
2193169689Skan    }
2194169689Skan
2195169689Skan  return true;
2196169689Skan}
2197169689Skan
2198169689Skan/* Advances the PS_INSN one column in its current row; returns false
2199169689Skan   in failure and true in success.  Bit N is set in MUST_FOLLOW if
2200169689Skan   the node with cuid N must be come after the node pointed to by
2201169689Skan   PS_I when scheduled in the same cycle.  */
2202169689Skanstatic int
2203169689Skanps_insn_advance_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
2204169689Skan			sbitmap must_follow)
2205169689Skan{
2206169689Skan  ps_insn_ptr prev, next;
2207169689Skan  int row;
2208169689Skan  ddg_node_ptr next_node;
2209169689Skan
2210169689Skan  if (!ps || !ps_i)
2211169689Skan    return false;
2212169689Skan
2213169689Skan  row = SMODULO (ps_i->cycle, ps->ii);
2214169689Skan
2215169689Skan  if (! ps_i->next_in_row)
2216169689Skan    return false;
2217169689Skan
2218169689Skan  next_node = ps_i->next_in_row->node;
2219169689Skan
2220169689Skan  /* Check if next_in_row is dependent on ps_i, both having same sched
2221169689Skan     times (typically ANTI_DEP).  If so, ps_i cannot skip over it.  */
2222169689Skan  if (TEST_BIT (must_follow, next_node->cuid))
2223169689Skan    return false;
2224169689Skan
2225169689Skan  /* Advance PS_I over its next_in_row in the doubly linked list.  */
2226169689Skan  prev = ps_i->prev_in_row;
2227169689Skan  next = ps_i->next_in_row;
2228169689Skan
2229169689Skan  if (ps_i == ps->rows[row])
2230169689Skan    ps->rows[row] = next;
2231169689Skan
2232169689Skan  ps_i->next_in_row = next->next_in_row;
2233169689Skan
2234169689Skan  if (next->next_in_row)
2235169689Skan    next->next_in_row->prev_in_row = ps_i;
2236169689Skan
2237169689Skan  next->next_in_row = ps_i;
2238169689Skan  ps_i->prev_in_row = next;
2239169689Skan
2240169689Skan  next->prev_in_row = prev;
2241169689Skan  if (prev)
2242169689Skan    prev->next_in_row = next;
2243169689Skan
2244169689Skan  return true;
2245169689Skan}
2246169689Skan
2247169689Skan/* Inserts a DDG_NODE to the given partial schedule at the given cycle.
2248169689Skan   Returns 0 if this is not possible and a PS_INSN otherwise.  Bit N is
2249169689Skan   set in MUST_PRECEDE/MUST_FOLLOW if the node with cuid N must be come
2250169689Skan   before/after (respectively) the node pointed to by PS_I when scheduled
2251169689Skan   in the same cycle.  */
2252169689Skanstatic ps_insn_ptr
2253169689Skanadd_node_to_ps (partial_schedule_ptr ps, ddg_node_ptr node, int cycle,
2254169689Skan		sbitmap must_precede, sbitmap must_follow)
2255169689Skan{
2256169689Skan  ps_insn_ptr ps_i;
2257169689Skan  int rest_count = 1;
2258169689Skan  int row = SMODULO (cycle, ps->ii);
2259169689Skan
2260169689Skan  if (ps->rows[row]
2261169689Skan      && ps->rows[row]->row_rest_count >= issue_rate)
2262169689Skan    return NULL;
2263169689Skan
2264169689Skan  if (ps->rows[row])
2265169689Skan    rest_count += ps->rows[row]->row_rest_count;
2266169689Skan
2267169689Skan  ps_i = create_ps_insn (node, rest_count, cycle);
2268169689Skan
2269169689Skan  /* Finds and inserts PS_I according to MUST_FOLLOW and
2270169689Skan     MUST_PRECEDE.  */
2271169689Skan  if (! ps_insn_find_column (ps, ps_i, must_precede, must_follow))
2272169689Skan    {
2273169689Skan      free (ps_i);
2274169689Skan      return NULL;
2275169689Skan    }
2276169689Skan
2277169689Skan  return ps_i;
2278169689Skan}
2279169689Skan
2280169689Skan/* Advance time one cycle.  Assumes DFA is being used.  */
2281169689Skanstatic void
2282169689Skanadvance_one_cycle (void)
2283169689Skan{
2284169689Skan  if (targetm.sched.dfa_pre_cycle_insn)
2285169689Skan    state_transition (curr_state,
2286169689Skan		      targetm.sched.dfa_pre_cycle_insn ());
2287169689Skan
2288169689Skan  state_transition (curr_state, NULL);
2289169689Skan
2290169689Skan  if (targetm.sched.dfa_post_cycle_insn)
2291169689Skan    state_transition (curr_state,
2292169689Skan		      targetm.sched.dfa_post_cycle_insn ());
2293169689Skan}
2294169689Skan
2295169689Skan/* Given the kernel of a loop (from FIRST_INSN to LAST_INSN), finds
2296169689Skan   the number of cycles according to DFA that the kernel fits in,
2297169689Skan   we use this to check if we done well with SMS after we add
2298169689Skan   register moves.  In some cases register moves overhead makes
2299169689Skan   it even worse than the original loop.  We want SMS to be performed
2300169689Skan   when it gives less cycles after register moves are added.  */
2301169689Skanstatic int
2302169689Skankernel_number_of_cycles (rtx first_insn, rtx last_insn)
2303169689Skan{
2304169689Skan  int cycles = 0;
2305169689Skan  rtx insn;
2306169689Skan  int can_issue_more = issue_rate;
2307169689Skan
2308169689Skan  state_reset (curr_state);
2309169689Skan
2310169689Skan  for (insn = first_insn;
2311169689Skan       insn != NULL_RTX && insn != last_insn;
2312169689Skan       insn = NEXT_INSN (insn))
2313169689Skan    {
2314169689Skan      if (! INSN_P (insn) || GET_CODE (PATTERN (insn)) == USE)
2315169689Skan	continue;
2316169689Skan
2317169689Skan      /* Check if there is room for the current insn.  */
2318169689Skan      if (!can_issue_more || state_dead_lock_p (curr_state))
2319169689Skan	{
2320169689Skan	  cycles ++;
2321169689Skan	  advance_one_cycle ();
2322169689Skan	  can_issue_more = issue_rate;
2323169689Skan	}
2324169689Skan
2325169689Skan	/* Update the DFA state and return with failure if the DFA found
2326169689Skan	   recource conflicts.  */
2327169689Skan      if (state_transition (curr_state, insn) >= 0)
2328169689Skan	{
2329169689Skan	  cycles ++;
2330169689Skan	  advance_one_cycle ();
2331169689Skan	  can_issue_more = issue_rate;
2332169689Skan	}
2333169689Skan
2334169689Skan      if (targetm.sched.variable_issue)
2335169689Skan	can_issue_more =
2336169689Skan	  targetm.sched.variable_issue (sched_dump, sched_verbose,
2337169689Skan					insn, can_issue_more);
2338169689Skan      /* A naked CLOBBER or USE generates no instruction, so don't
2339169689Skan	 let them consume issue slots.  */
2340169689Skan      else if (GET_CODE (PATTERN (insn)) != USE
2341169689Skan	       && GET_CODE (PATTERN (insn)) != CLOBBER)
2342169689Skan	can_issue_more--;
2343169689Skan    }
2344169689Skan  return cycles;
2345169689Skan}
2346169689Skan
2347169689Skan/* Checks if PS has resource conflicts according to DFA, starting from
2348169689Skan   FROM cycle to TO cycle; returns true if there are conflicts and false
2349169689Skan   if there are no conflicts.  Assumes DFA is being used.  */
2350169689Skanstatic int
2351169689Skanps_has_conflicts (partial_schedule_ptr ps, int from, int to)
2352169689Skan{
2353169689Skan  int cycle;
2354169689Skan
2355169689Skan  state_reset (curr_state);
2356169689Skan
2357169689Skan  for (cycle = from; cycle <= to; cycle++)
2358169689Skan    {
2359169689Skan      ps_insn_ptr crr_insn;
2360169689Skan      /* Holds the remaining issue slots in the current row.  */
2361169689Skan      int can_issue_more = issue_rate;
2362169689Skan
2363169689Skan      /* Walk through the DFA for the current row.  */
2364169689Skan      for (crr_insn = ps->rows[SMODULO (cycle, ps->ii)];
2365169689Skan	   crr_insn;
2366169689Skan	   crr_insn = crr_insn->next_in_row)
2367169689Skan	{
2368169689Skan	  rtx insn = crr_insn->node->insn;
2369169689Skan
2370169689Skan	  if (!INSN_P (insn))
2371169689Skan	    continue;
2372169689Skan
2373169689Skan	  /* Check if there is room for the current insn.  */
2374169689Skan	  if (!can_issue_more || state_dead_lock_p (curr_state))
2375169689Skan	    return true;
2376169689Skan
2377169689Skan	  /* Update the DFA state and return with failure if the DFA found
2378169689Skan	     recource conflicts.  */
2379169689Skan	  if (state_transition (curr_state, insn) >= 0)
2380169689Skan	    return true;
2381169689Skan
2382169689Skan	  if (targetm.sched.variable_issue)
2383169689Skan	    can_issue_more =
2384169689Skan	      targetm.sched.variable_issue (sched_dump, sched_verbose,
2385169689Skan					    insn, can_issue_more);
2386169689Skan	  /* A naked CLOBBER or USE generates no instruction, so don't
2387169689Skan	     let them consume issue slots.  */
2388169689Skan	  else if (GET_CODE (PATTERN (insn)) != USE
2389169689Skan		   && GET_CODE (PATTERN (insn)) != CLOBBER)
2390169689Skan	    can_issue_more--;
2391169689Skan	}
2392169689Skan
2393169689Skan      /* Advance the DFA to the next cycle.  */
2394169689Skan      advance_one_cycle ();
2395169689Skan    }
2396169689Skan  return false;
2397169689Skan}
2398169689Skan
2399169689Skan/* Checks if the given node causes resource conflicts when added to PS at
2400169689Skan   cycle C.  If not the node is added to PS and returned; otherwise zero
2401169689Skan   is returned.  Bit N is set in MUST_PRECEDE/MUST_FOLLOW if the node with
2402169689Skan   cuid N must be come before/after (respectively) the node pointed to by
2403169689Skan   PS_I when scheduled in the same cycle.  */
2404169689Skanps_insn_ptr
2405169689Skanps_add_node_check_conflicts (partial_schedule_ptr ps, ddg_node_ptr n,
2406169689Skan   			     int c, sbitmap must_precede,
2407169689Skan			     sbitmap must_follow)
2408169689Skan{
2409169689Skan  int has_conflicts = 0;
2410169689Skan  ps_insn_ptr ps_i;
2411169689Skan
2412169689Skan  /* First add the node to the PS, if this succeeds check for
2413169689Skan     conflicts, trying different issue slots in the same row.  */
2414169689Skan  if (! (ps_i = add_node_to_ps (ps, n, c, must_precede, must_follow)))
2415169689Skan    return NULL; /* Failed to insert the node at the given cycle.  */
2416169689Skan
2417169689Skan  has_conflicts = ps_has_conflicts (ps, c, c)
2418169689Skan		  || (ps->history > 0
2419169689Skan		      && ps_has_conflicts (ps,
2420169689Skan					   c - ps->history,
2421169689Skan					   c + ps->history));
2422169689Skan
2423169689Skan  /* Try different issue slots to find one that the given node can be
2424169689Skan     scheduled in without conflicts.  */
2425169689Skan  while (has_conflicts)
2426169689Skan    {
2427169689Skan      if (! ps_insn_advance_column (ps, ps_i, must_follow))
2428169689Skan	break;
2429169689Skan      has_conflicts = ps_has_conflicts (ps, c, c)
2430169689Skan		      || (ps->history > 0
2431169689Skan			  && ps_has_conflicts (ps,
2432169689Skan					       c - ps->history,
2433169689Skan					       c + ps->history));
2434169689Skan    }
2435169689Skan
2436169689Skan  if (has_conflicts)
2437169689Skan    {
2438169689Skan      remove_node_from_ps (ps, ps_i);
2439169689Skan      return NULL;
2440169689Skan    }
2441169689Skan
2442169689Skan  ps->min_cycle = MIN (ps->min_cycle, c);
2443169689Skan  ps->max_cycle = MAX (ps->max_cycle, c);
2444169689Skan  return ps_i;
2445169689Skan}
2446169689Skan
2447169689Skan/* Rotate the rows of PS such that insns scheduled at time
2448169689Skan   START_CYCLE will appear in row 0.  Updates max/min_cycles.  */
2449169689Skanvoid
2450169689Skanrotate_partial_schedule (partial_schedule_ptr ps, int start_cycle)
2451169689Skan{
2452169689Skan  int i, row, backward_rotates;
2453169689Skan  int last_row = ps->ii - 1;
2454169689Skan
2455169689Skan  if (start_cycle == 0)
2456169689Skan    return;
2457169689Skan
2458169689Skan  backward_rotates = SMODULO (start_cycle, ps->ii);
2459169689Skan
2460169689Skan  /* Revisit later and optimize this into a single loop.  */
2461169689Skan  for (i = 0; i < backward_rotates; i++)
2462169689Skan    {
2463169689Skan      ps_insn_ptr first_row = ps->rows[0];
2464169689Skan
2465169689Skan      for (row = 0; row < last_row; row++)
2466169689Skan	ps->rows[row] = ps->rows[row+1];
2467169689Skan
2468169689Skan      ps->rows[last_row] = first_row;
2469169689Skan    }
2470169689Skan
2471169689Skan  ps->max_cycle -= start_cycle;
2472169689Skan  ps->min_cycle -= start_cycle;
2473169689Skan}
2474169689Skan
2475169689Skan/* Remove the node N from the partial schedule PS; because we restart the DFA
2476169689Skan   each time we want to check for resource conflicts; this is equivalent to
2477169689Skan   unscheduling the node N.  */
2478169689Skanstatic bool
2479169689Skanps_unschedule_node (partial_schedule_ptr ps, ddg_node_ptr n)
2480169689Skan{
2481169689Skan  ps_insn_ptr ps_i;
2482169689Skan  int row = SMODULO (SCHED_TIME (n), ps->ii);
2483169689Skan
2484169689Skan  if (row < 0 || row > ps->ii)
2485169689Skan    return false;
2486169689Skan
2487169689Skan  for (ps_i = ps->rows[row];
2488169689Skan       ps_i &&  ps_i->node != n;
2489169689Skan       ps_i = ps_i->next_in_row);
2490169689Skan  if (!ps_i)
2491169689Skan    return false;
2492169689Skan
2493169689Skan  return remove_node_from_ps (ps, ps_i);
2494169689Skan}
2495169689Skan#endif /* INSN_SCHEDULING */
2496169689Skan
2497169689Skanstatic bool
2498169689Skangate_handle_sms (void)
2499169689Skan{
2500169689Skan  return (optimize > 0 && flag_modulo_sched);
2501169689Skan}
2502169689Skan
2503169689Skan
2504169689Skan/* Run instruction scheduler.  */
2505169689Skan/* Perform SMS module scheduling.  */
2506169689Skanstatic unsigned int
2507169689Skanrest_of_handle_sms (void)
2508169689Skan{
2509169689Skan#ifdef INSN_SCHEDULING
2510169689Skan  basic_block bb;
2511169689Skan
2512169689Skan  /* We want to be able to create new pseudos.  */
2513169689Skan  no_new_pseudos = 0;
2514169689Skan  /* Collect loop information to be used in SMS.  */
2515169689Skan  cfg_layout_initialize (CLEANUP_UPDATE_LIFE);
2516169689Skan  sms_schedule ();
2517169689Skan
2518169689Skan  /* Update the life information, because we add pseudos.  */
2519169689Skan  max_regno = max_reg_num ();
2520169689Skan  allocate_reg_info (max_regno, FALSE, FALSE);
2521169689Skan  update_life_info (NULL, UPDATE_LIFE_GLOBAL_RM_NOTES,
2522169689Skan                    (PROP_DEATH_NOTES
2523169689Skan                     | PROP_REG_INFO
2524169689Skan                     | PROP_KILL_DEAD_CODE
2525169689Skan                     | PROP_SCAN_DEAD_CODE));
2526169689Skan
2527169689Skan  no_new_pseudos = 1;
2528169689Skan
2529169689Skan  /* Finalize layout changes.  */
2530169689Skan  FOR_EACH_BB (bb)
2531169689Skan    if (bb->next_bb != EXIT_BLOCK_PTR)
2532169689Skan      bb->aux = bb->next_bb;
2533169689Skan  cfg_layout_finalize ();
2534169689Skan  free_dominance_info (CDI_DOMINATORS);
2535169689Skan#endif /* INSN_SCHEDULING */
2536169689Skan  return 0;
2537169689Skan}
2538169689Skan
2539169689Skanstruct tree_opt_pass pass_sms =
2540169689Skan{
2541169689Skan  "sms",                                /* name */
2542169689Skan  gate_handle_sms,                      /* gate */
2543169689Skan  rest_of_handle_sms,                   /* execute */
2544169689Skan  NULL,                                 /* sub */
2545169689Skan  NULL,                                 /* next */
2546169689Skan  0,                                    /* static_pass_number */
2547169689Skan  TV_SMS,                               /* tv_id */
2548169689Skan  0,                                    /* properties_required */
2549169689Skan  0,                                    /* properties_provided */
2550169689Skan  0,                                    /* properties_destroyed */
2551169689Skan  TODO_dump_func,                       /* todo_flags_start */
2552169689Skan  TODO_dump_func |
2553169689Skan  TODO_ggc_collect,                     /* todo_flags_finish */
2554169689Skan  'm'                                   /* letter */
2555169689Skan};
2556169689Skan
2557