GCNILPSched.cpp revision 360784
1//===---------------------------- GCNILPSched.cpp - -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/CodeGen/ScheduleDAG.h"
14#include "llvm/Support/Debug.h"
15
16using namespace llvm;
17
18#define DEBUG_TYPE "machine-scheduler"
19
20namespace {
21
22class GCNILPScheduler {
23  struct Candidate : ilist_node<Candidate> {
24    SUnit *SU;
25
26    Candidate(SUnit *SU_)
27      : SU(SU_) {}
28  };
29
30  SpecificBumpPtrAllocator<Candidate> Alloc;
31  typedef simple_ilist<Candidate> Queue;
32  Queue PendingQueue;
33  Queue AvailQueue;
34  unsigned CurQueueId = 0;
35
36  std::vector<unsigned> SUNumbers;
37
38  /// CurCycle - The current scheduler state corresponds to this cycle.
39  unsigned CurCycle = 0;
40
41  unsigned getNodePriority(const SUnit *SU) const;
42
43  const SUnit *pickBest(const SUnit *left, const SUnit *right);
44  Candidate* pickCandidate();
45
46  void releasePending();
47  void advanceToCycle(unsigned NextCycle);
48  void releasePredecessors(const SUnit* SU);
49
50public:
51  std::vector<const SUnit*> schedule(ArrayRef<const SUnit*> TopRoots,
52                                     const ScheduleDAG &DAG);
53};
54} // namespace
55
56/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
57/// Smaller number is the higher priority.
58static unsigned
59CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
60  unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum];
61  if (SethiUllmanNumber != 0)
62    return SethiUllmanNumber;
63
64  unsigned Extra = 0;
65  for (const SDep &Pred : SU->Preds) {
66    if (Pred.isCtrl()) continue;  // ignore chain preds
67    SUnit *PredSU = Pred.getSUnit();
68    unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers);
69    if (PredSethiUllman > SethiUllmanNumber) {
70      SethiUllmanNumber = PredSethiUllman;
71      Extra = 0;
72    }
73    else if (PredSethiUllman == SethiUllmanNumber)
74      ++Extra;
75  }
76
77  SethiUllmanNumber += Extra;
78
79  if (SethiUllmanNumber == 0)
80    SethiUllmanNumber = 1;
81
82  return SethiUllmanNumber;
83}
84
85// Lower priority means schedule further down. For bottom-up scheduling, lower
86// priority SUs are scheduled before higher priority SUs.
87unsigned GCNILPScheduler::getNodePriority(const SUnit *SU) const {
88  assert(SU->NodeNum < SUNumbers.size());
89  if (SU->NumSuccs == 0 && SU->NumPreds != 0)
90    // If SU does not have a register use, i.e. it doesn't produce a value
91    // that would be consumed (e.g. store), then it terminates a chain of
92    // computation.  Give it a large SethiUllman number so it will be
93    // scheduled right before its predecessors that it doesn't lengthen
94    // their live ranges.
95    return 0xffff;
96
97  if (SU->NumPreds == 0 && SU->NumSuccs != 0)
98    // If SU does not have a register def, schedule it close to its uses
99    // because it does not lengthen any live ranges.
100    return 0;
101
102  return SUNumbers[SU->NodeNum];
103}
104
105/// closestSucc - Returns the scheduled cycle of the successor which is
106/// closest to the current cycle.
107static unsigned closestSucc(const SUnit *SU) {
108  unsigned MaxHeight = 0;
109  for (const SDep &Succ : SU->Succs) {
110    if (Succ.isCtrl()) continue;  // ignore chain succs
111    unsigned Height = Succ.getSUnit()->getHeight();
112    // If there are bunch of CopyToRegs stacked up, they should be considered
113    // to be at the same position.
114    if (Height > MaxHeight)
115      MaxHeight = Height;
116  }
117  return MaxHeight;
118}
119
120/// calcMaxScratches - Returns an cost estimate of the worse case requirement
121/// for scratch registers, i.e. number of data dependencies.
122static unsigned calcMaxScratches(const SUnit *SU) {
123  unsigned Scratches = 0;
124  for (const SDep &Pred : SU->Preds) {
125    if (Pred.isCtrl()) continue;  // ignore chain preds
126    Scratches++;
127  }
128  return Scratches;
129}
130
131// Return -1 if left has higher priority, 1 if right has higher priority.
132// Return 0 if latency-based priority is equivalent.
133static int BUCompareLatency(const SUnit *left, const SUnit *right) {
134  // Scheduling an instruction that uses a VReg whose postincrement has not yet
135  // been scheduled will induce a copy. Model this as an extra cycle of latency.
136  int LHeight = (int)left->getHeight();
137  int RHeight = (int)right->getHeight();
138
139  // If either node is scheduling for latency, sort them by height/depth
140  // and latency.
141
142  // If neither instruction stalls (!LStall && !RStall) and HazardRecognizer
143  // is enabled, grouping instructions by cycle, then its height is already
144  // covered so only its depth matters. We also reach this point if both stall
145  // but have the same height.
146  if (LHeight != RHeight)
147    return LHeight > RHeight ? 1 : -1;
148
149  int LDepth = left->getDepth();
150  int RDepth = right->getDepth();
151  if (LDepth != RDepth) {
152    LLVM_DEBUG(dbgs() << "  Comparing latency of SU (" << left->NodeNum
153                      << ") depth " << LDepth << " vs SU (" << right->NodeNum
154                      << ") depth " << RDepth << "\n");
155    return LDepth < RDepth ? 1 : -1;
156  }
157  if (left->Latency != right->Latency)
158    return left->Latency > right->Latency ? 1 : -1;
159
160  return 0;
161}
162
163const SUnit *GCNILPScheduler::pickBest(const SUnit *left, const SUnit *right)
164{
165  // TODO: add register pressure lowering checks
166
167  bool const DisableSchedCriticalPath = false;
168  int MaxReorderWindow = 6;
169  if (!DisableSchedCriticalPath) {
170    int spread = (int)left->getDepth() - (int)right->getDepth();
171    if (std::abs(spread) > MaxReorderWindow) {
172      LLVM_DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): "
173                        << left->getDepth() << " != SU(" << right->NodeNum
174                        << "): " << right->getDepth() << "\n");
175      return left->getDepth() < right->getDepth() ? right : left;
176    }
177  }
178
179  bool const DisableSchedHeight = false;
180  if (!DisableSchedHeight && left->getHeight() != right->getHeight()) {
181    int spread = (int)left->getHeight() - (int)right->getHeight();
182    if (std::abs(spread) > MaxReorderWindow)
183      return left->getHeight() > right->getHeight() ? right : left;
184  }
185
186  // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down.
187  unsigned LPriority = getNodePriority(left);
188  unsigned RPriority = getNodePriority(right);
189
190  if (LPriority != RPriority)
191    return LPriority > RPriority ? right : left;
192
193  // Try schedule def + use closer when Sethi-Ullman numbers are the same.
194  // e.g.
195  // t1 = op t2, c1
196  // t3 = op t4, c2
197  //
198  // and the following instructions are both ready.
199  // t2 = op c3
200  // t4 = op c4
201  //
202  // Then schedule t2 = op first.
203  // i.e.
204  // t4 = op c4
205  // t2 = op c3
206  // t1 = op t2, c1
207  // t3 = op t4, c2
208  //
209  // This creates more short live intervals.
210  unsigned LDist = closestSucc(left);
211  unsigned RDist = closestSucc(right);
212  if (LDist != RDist)
213    return LDist < RDist ? right : left;
214
215  // How many registers becomes live when the node is scheduled.
216  unsigned LScratch = calcMaxScratches(left);
217  unsigned RScratch = calcMaxScratches(right);
218  if (LScratch != RScratch)
219    return LScratch > RScratch ? right : left;
220
221  bool const DisableSchedCycles = false;
222  if (!DisableSchedCycles) {
223    int result = BUCompareLatency(left, right);
224    if (result != 0)
225      return result > 0 ? right : left;
226    return left;
227  }
228  else {
229    if (left->getHeight() != right->getHeight())
230      return (left->getHeight() > right->getHeight()) ? right : left;
231
232    if (left->getDepth() != right->getDepth())
233      return (left->getDepth() < right->getDepth()) ? right : left;
234  }
235
236  assert(left->NodeQueueId && right->NodeQueueId &&
237        "NodeQueueId cannot be zero");
238  return (left->NodeQueueId > right->NodeQueueId) ? right : left;
239}
240
241GCNILPScheduler::Candidate* GCNILPScheduler::pickCandidate() {
242  if (AvailQueue.empty())
243    return nullptr;
244  auto Best = AvailQueue.begin();
245  for (auto I = std::next(AvailQueue.begin()), E = AvailQueue.end(); I != E; ++I) {
246    auto NewBestSU = pickBest(Best->SU, I->SU);
247    if (NewBestSU != Best->SU) {
248      assert(NewBestSU == I->SU);
249      Best = I;
250    }
251  }
252  return &*Best;
253}
254
255void GCNILPScheduler::releasePending() {
256  // Check to see if any of the pending instructions are ready to issue.  If
257  // so, add them to the available queue.
258  for(auto I = PendingQueue.begin(), E = PendingQueue.end(); I != E;) {
259    auto &C = *I++;
260    if (C.SU->getHeight() <= CurCycle) {
261      PendingQueue.remove(C);
262      AvailQueue.push_back(C);
263      C.SU->NodeQueueId = CurQueueId++;
264    }
265  }
266}
267
268/// Move the scheduler state forward by the specified number of Cycles.
269void GCNILPScheduler::advanceToCycle(unsigned NextCycle) {
270  if (NextCycle <= CurCycle)
271    return;
272  CurCycle = NextCycle;
273  releasePending();
274}
275
276void GCNILPScheduler::releasePredecessors(const SUnit* SU) {
277  for (const auto &PredEdge : SU->Preds) {
278    auto PredSU = PredEdge.getSUnit();
279    if (PredEdge.isWeak())
280      continue;
281    assert(PredSU->isBoundaryNode() || PredSU->NumSuccsLeft > 0);
282
283    PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge.getLatency());
284
285    if (!PredSU->isBoundaryNode() && --PredSU->NumSuccsLeft == 0)
286      PendingQueue.push_front(*new (Alloc.Allocate()) Candidate(PredSU));
287  }
288}
289
290std::vector<const SUnit*>
291GCNILPScheduler::schedule(ArrayRef<const SUnit*> BotRoots,
292                          const ScheduleDAG &DAG) {
293  auto &SUnits = const_cast<ScheduleDAG&>(DAG).SUnits;
294
295  std::vector<SUnit> SUSavedCopy;
296  SUSavedCopy.resize(SUnits.size());
297
298  // we cannot save only those fields we touch: some of them are private
299  // so save units verbatim: this assumes SUnit should have value semantics
300  for (const SUnit &SU : SUnits)
301    SUSavedCopy[SU.NodeNum] = SU;
302
303  SUNumbers.assign(SUnits.size(), 0);
304  for (const SUnit &SU : SUnits)
305    CalcNodeSethiUllmanNumber(&SU, SUNumbers);
306
307  for (auto SU : BotRoots) {
308    AvailQueue.push_back(
309      *new (Alloc.Allocate()) Candidate(const_cast<SUnit*>(SU)));
310  }
311  releasePredecessors(&DAG.ExitSU);
312
313  std::vector<const SUnit*> Schedule;
314  Schedule.reserve(SUnits.size());
315  while (true) {
316    if (AvailQueue.empty() && !PendingQueue.empty()) {
317      auto EarliestSU = std::min_element(
318        PendingQueue.begin(), PendingQueue.end(),
319        [=](const Candidate& C1, const Candidate& C2) {
320        return C1.SU->getHeight() < C2.SU->getHeight();
321      })->SU;
322      advanceToCycle(std::max(CurCycle + 1, EarliestSU->getHeight()));
323    }
324    if (AvailQueue.empty())
325      break;
326
327    LLVM_DEBUG(dbgs() << "\n=== Picking candidate\n"
328                         "Ready queue:";
329               for (auto &C
330                    : AvailQueue) dbgs()
331               << ' ' << C.SU->NodeNum;
332               dbgs() << '\n';);
333
334    auto C = pickCandidate();
335    assert(C);
336    AvailQueue.remove(*C);
337    auto SU = C->SU;
338    LLVM_DEBUG(dbgs() << "Selected "; DAG.dumpNode(*SU));
339
340    advanceToCycle(SU->getHeight());
341
342    releasePredecessors(SU);
343    Schedule.push_back(SU);
344    SU->isScheduled = true;
345  }
346  assert(SUnits.size() == Schedule.size());
347
348  std::reverse(Schedule.begin(), Schedule.end());
349
350  // restore units
351  for (auto &SU : SUnits)
352    SU = SUSavedCopy[SU.NodeNum];
353
354  return Schedule;
355}
356
357namespace llvm {
358std::vector<const SUnit*> makeGCNILPScheduler(ArrayRef<const SUnit*> BotRoots,
359                                              const ScheduleDAG &DAG) {
360  GCNILPScheduler S;
361  return S.schedule(BotRoots, DAG);
362}
363}
364