SIMachineScheduler.h revision 360784
1//===-- SIMachineScheduler.h - SI Scheduler Interface -----------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// SI Machine Scheduler interface
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H
15#define LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H
16
17#include "SIInstrInfo.h"
18#include "llvm/CodeGen/MachineBasicBlock.h"
19#include "llvm/CodeGen/MachineScheduler.h"
20#include "llvm/CodeGen/RegisterPressure.h"
21#include "llvm/CodeGen/ScheduleDAG.h"
22#include <cassert>
23#include <cstdint>
24#include <map>
25#include <memory>
26#include <set>
27#include <vector>
28
29namespace llvm {
30
31enum SIScheduleCandReason {
32  NoCand,
33  RegUsage,
34  Latency,
35  Successor,
36  Depth,
37  NodeOrder
38};
39
40struct SISchedulerCandidate {
41  // The reason for this candidate.
42  SIScheduleCandReason Reason = NoCand;
43
44  // Set of reasons that apply to multiple candidates.
45  uint32_t RepeatReasonSet = 0;
46
47  SISchedulerCandidate() = default;
48
49  bool isRepeat(SIScheduleCandReason R) { return RepeatReasonSet & (1 << R); }
50  void setRepeat(SIScheduleCandReason R) { RepeatReasonSet |= (1 << R); }
51};
52
53class SIScheduleDAGMI;
54class SIScheduleBlockCreator;
55
56enum SIScheduleBlockLinkKind {
57  NoData,
58  Data
59};
60
61class SIScheduleBlock {
62  SIScheduleDAGMI *DAG;
63  SIScheduleBlockCreator *BC;
64
65  std::vector<SUnit*> SUnits;
66  std::map<unsigned, unsigned> NodeNum2Index;
67  std::vector<SUnit*> TopReadySUs;
68  std::vector<SUnit*> ScheduledSUnits;
69
70  /// The top of the unscheduled zone.
71  IntervalPressure TopPressure;
72  RegPressureTracker TopRPTracker;
73
74  // Pressure: number of said class of registers needed to
75  // store the live virtual and real registers.
76  // We do care only of SGPR32 and VGPR32 and do track only virtual registers.
77  // Pressure of additional registers required inside the block.
78  std::vector<unsigned> InternalAdditionnalPressure;
79  // Pressure of input and output registers
80  std::vector<unsigned> LiveInPressure;
81  std::vector<unsigned> LiveOutPressure;
82  // Registers required by the block, and outputs.
83  // We do track only virtual registers.
84  // Note that some registers are not 32 bits,
85  // and thus the pressure is not equal
86  // to the number of live registers.
87  std::set<unsigned> LiveInRegs;
88  std::set<unsigned> LiveOutRegs;
89
90  bool Scheduled = false;
91  bool HighLatencyBlock = false;
92
93  std::vector<unsigned> HasLowLatencyNonWaitedParent;
94
95  // Unique ID, the index of the Block in the SIScheduleDAGMI Blocks table.
96  unsigned ID;
97
98  std::vector<SIScheduleBlock*> Preds;  // All blocks predecessors.
99  // All blocks successors, and the kind of link
100  std::vector<std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind>> Succs;
101  unsigned NumHighLatencySuccessors = 0;
102
103public:
104  SIScheduleBlock(SIScheduleDAGMI *DAG, SIScheduleBlockCreator *BC,
105                  unsigned ID):
106    DAG(DAG), BC(BC), TopRPTracker(TopPressure), ID(ID) {}
107
108  ~SIScheduleBlock() = default;
109
110  unsigned getID() const { return ID; }
111
112  /// Functions for Block construction.
113  void addUnit(SUnit *SU);
114
115  // When all SUs have been added.
116  void finalizeUnits();
117
118  // Add block pred, which has instruction predecessor of SU.
119  void addPred(SIScheduleBlock *Pred);
120  void addSucc(SIScheduleBlock *Succ, SIScheduleBlockLinkKind Kind);
121
122  const std::vector<SIScheduleBlock*>& getPreds() const { return Preds; }
123  ArrayRef<std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind>>
124    getSuccs() const { return Succs; }
125
126  unsigned Height;  // Maximum topdown path length to block without outputs
127  unsigned Depth;   // Maximum bottomup path length to block without inputs
128
129  unsigned getNumHighLatencySuccessors() const {
130    return NumHighLatencySuccessors;
131  }
132
133  bool isHighLatencyBlock() { return HighLatencyBlock; }
134
135  // This is approximative.
136  // Ideally should take into accounts some instructions (rcp, etc)
137  // are 4 times slower.
138  int getCost() { return SUnits.size(); }
139
140  // The block Predecessors and Successors must be all registered
141  // before fastSchedule().
142  // Fast schedule with no particular requirement.
143  void fastSchedule();
144
145  std::vector<SUnit*> getScheduledUnits() { return ScheduledSUnits; }
146
147  // Complete schedule that will try to minimize reg pressure and
148  // low latencies, and will fill liveins and liveouts.
149  // Needs all MIs to be grouped between BeginBlock and EndBlock.
150  // The MIs can be moved after the scheduling,
151  // it is just used to allow correct track of live registers.
152  void schedule(MachineBasicBlock::iterator BeginBlock,
153                MachineBasicBlock::iterator EndBlock);
154
155  bool isScheduled() { return Scheduled; }
156
157  // Needs the block to be scheduled inside
158  // TODO: find a way to compute it.
159  std::vector<unsigned> &getInternalAdditionnalRegUsage() {
160    return InternalAdditionnalPressure;
161  }
162
163  std::set<unsigned> &getInRegs() { return LiveInRegs; }
164  std::set<unsigned> &getOutRegs() { return LiveOutRegs; }
165
166  void printDebug(bool Full);
167
168private:
169  struct SISchedCandidate : SISchedulerCandidate {
170    // The best SUnit candidate.
171    SUnit *SU = nullptr;
172
173    unsigned SGPRUsage;
174    unsigned VGPRUsage;
175    bool IsLowLatency;
176    unsigned LowLatencyOffset;
177    bool HasLowLatencyNonWaitedParent;
178
179    SISchedCandidate() = default;
180
181    bool isValid() const { return SU; }
182
183    // Copy the status of another candidate without changing policy.
184    void setBest(SISchedCandidate &Best) {
185      assert(Best.Reason != NoCand && "uninitialized Sched candidate");
186      SU = Best.SU;
187      Reason = Best.Reason;
188      SGPRUsage = Best.SGPRUsage;
189      VGPRUsage = Best.VGPRUsage;
190      IsLowLatency = Best.IsLowLatency;
191      LowLatencyOffset = Best.LowLatencyOffset;
192      HasLowLatencyNonWaitedParent = Best.HasLowLatencyNonWaitedParent;
193    }
194  };
195
196  void undoSchedule();
197
198  void undoReleaseSucc(SUnit *SU, SDep *SuccEdge);
199  void releaseSucc(SUnit *SU, SDep *SuccEdge);
200  // InOrOutBlock: restrict to links pointing inside the block (true),
201  // or restrict to links pointing outside the block (false).
202  void releaseSuccessors(SUnit *SU, bool InOrOutBlock);
203
204  void nodeScheduled(SUnit *SU);
205  void tryCandidateTopDown(SISchedCandidate &Cand, SISchedCandidate &TryCand);
206  void tryCandidateBottomUp(SISchedCandidate &Cand, SISchedCandidate &TryCand);
207  SUnit* pickNode();
208  void traceCandidate(const SISchedCandidate &Cand);
209  void initRegPressure(MachineBasicBlock::iterator BeginBlock,
210                       MachineBasicBlock::iterator EndBlock);
211};
212
213struct SIScheduleBlocks {
214  std::vector<SIScheduleBlock*> Blocks;
215  std::vector<int> TopDownIndex2Block;
216  std::vector<int> TopDownBlock2Index;
217};
218
219enum SISchedulerBlockCreatorVariant {
220  LatenciesAlone,
221  LatenciesGrouped,
222  LatenciesAlonePlusConsecutive
223};
224
225class SIScheduleBlockCreator {
226  SIScheduleDAGMI *DAG;
227  // unique_ptr handles freeing memory for us.
228  std::vector<std::unique_ptr<SIScheduleBlock>> BlockPtrs;
229  std::map<SISchedulerBlockCreatorVariant,
230           SIScheduleBlocks> Blocks;
231  std::vector<SIScheduleBlock*> CurrentBlocks;
232  std::vector<int> Node2CurrentBlock;
233
234  // Topological sort
235  // Maps topological index to the node number.
236  std::vector<int> TopDownIndex2Block;
237  std::vector<int> TopDownBlock2Index;
238  std::vector<int> BottomUpIndex2Block;
239
240  // 0 -> Color not given.
241  // 1 to SUnits.size() -> Reserved group (you should only add elements to them).
242  // Above -> Other groups.
243  int NextReservedID;
244  int NextNonReservedID;
245  std::vector<int> CurrentColoring;
246  std::vector<int> CurrentTopDownReservedDependencyColoring;
247  std::vector<int> CurrentBottomUpReservedDependencyColoring;
248
249public:
250  SIScheduleBlockCreator(SIScheduleDAGMI *DAG);
251
252  SIScheduleBlocks
253  getBlocks(SISchedulerBlockCreatorVariant BlockVariant);
254
255  bool isSUInBlock(SUnit *SU, unsigned ID);
256
257private:
258  // Give a Reserved color to every high latency.
259  void colorHighLatenciesAlone();
260
261  // Create groups of high latencies with a Reserved color.
262  void colorHighLatenciesGroups();
263
264  // Compute coloring for topdown and bottom traversals with
265  // different colors depending on dependencies on Reserved colors.
266  void colorComputeReservedDependencies();
267
268  // Give color to all non-colored SUs according to Reserved groups dependencies.
269  void colorAccordingToReservedDependencies();
270
271  // Divides Blocks having no bottom up or top down dependencies on Reserved groups.
272  // The new colors are computed according to the dependencies on the other blocks
273  // formed with colorAccordingToReservedDependencies.
274  void colorEndsAccordingToDependencies();
275
276  // Cut groups into groups with SUs in consecutive order (except for Reserved groups).
277  void colorForceConsecutiveOrderInGroup();
278
279  // Merge Constant loads that have all their users into another group to the group.
280  // (TODO: else if all their users depend on the same group, put them there)
281  void colorMergeConstantLoadsNextGroup();
282
283  // Merge SUs that have all their users into another group to the group
284  void colorMergeIfPossibleNextGroup();
285
286  // Merge SUs that have all their users into another group to the group,
287  // but only for Reserved groups.
288  void colorMergeIfPossibleNextGroupOnlyForReserved();
289
290  // Merge SUs that have all their users into another group to the group,
291  // but only if the group is no more than a few SUs.
292  void colorMergeIfPossibleSmallGroupsToNextGroup();
293
294  // Divides Blocks with important size.
295  // Idea of implementation: attribute new colors depending on topdown and
296  // bottom up links to other blocks.
297  void cutHugeBlocks();
298
299  // Put in one group all instructions with no users in this scheduling region
300  // (we'd want these groups be at the end).
301  void regroupNoUserInstructions();
302
303  // Give Reserved color to export instructions
304  void colorExports();
305
306  void createBlocksForVariant(SISchedulerBlockCreatorVariant BlockVariant);
307
308  void topologicalSort();
309
310  void scheduleInsideBlocks();
311
312  void fillStats();
313};
314
315enum SISchedulerBlockSchedulerVariant {
316  BlockLatencyRegUsage,
317  BlockRegUsageLatency,
318  BlockRegUsage
319};
320
321class SIScheduleBlockScheduler {
322  SIScheduleDAGMI *DAG;
323  SISchedulerBlockSchedulerVariant Variant;
324  std::vector<SIScheduleBlock*> Blocks;
325
326  std::vector<std::map<unsigned, unsigned>> LiveOutRegsNumUsages;
327  std::set<unsigned> LiveRegs;
328  // Num of schedulable unscheduled blocks reading the register.
329  std::map<unsigned, unsigned> LiveRegsConsumers;
330
331  std::vector<unsigned> LastPosHighLatencyParentScheduled;
332  int LastPosWaitedHighLatency;
333
334  std::vector<SIScheduleBlock*> BlocksScheduled;
335  unsigned NumBlockScheduled;
336  std::vector<SIScheduleBlock*> ReadyBlocks;
337
338  unsigned VregCurrentUsage;
339  unsigned SregCurrentUsage;
340
341  // Currently is only approximation.
342  unsigned maxVregUsage;
343  unsigned maxSregUsage;
344
345  std::vector<unsigned> BlockNumPredsLeft;
346  std::vector<unsigned> BlockNumSuccsLeft;
347
348public:
349  SIScheduleBlockScheduler(SIScheduleDAGMI *DAG,
350                           SISchedulerBlockSchedulerVariant Variant,
351                           SIScheduleBlocks BlocksStruct);
352  ~SIScheduleBlockScheduler() = default;
353
354  std::vector<SIScheduleBlock*> getBlocks() { return BlocksScheduled; }
355
356  unsigned getVGPRUsage() { return maxVregUsage; }
357  unsigned getSGPRUsage() { return maxSregUsage; }
358
359private:
360  struct SIBlockSchedCandidate : SISchedulerCandidate {
361    // The best Block candidate.
362    SIScheduleBlock *Block = nullptr;
363
364    bool IsHighLatency;
365    int VGPRUsageDiff;
366    unsigned NumSuccessors;
367    unsigned NumHighLatencySuccessors;
368    unsigned LastPosHighLatParentScheduled;
369    unsigned Height;
370
371    SIBlockSchedCandidate() = default;
372
373    bool isValid() const { return Block; }
374
375    // Copy the status of another candidate without changing policy.
376    void setBest(SIBlockSchedCandidate &Best) {
377      assert(Best.Reason != NoCand && "uninitialized Sched candidate");
378      Block = Best.Block;
379      Reason = Best.Reason;
380      IsHighLatency = Best.IsHighLatency;
381      VGPRUsageDiff = Best.VGPRUsageDiff;
382      NumSuccessors = Best.NumSuccessors;
383      NumHighLatencySuccessors = Best.NumHighLatencySuccessors;
384      LastPosHighLatParentScheduled = Best.LastPosHighLatParentScheduled;
385      Height = Best.Height;
386    }
387  };
388
389  bool tryCandidateLatency(SIBlockSchedCandidate &Cand,
390                           SIBlockSchedCandidate &TryCand);
391  bool tryCandidateRegUsage(SIBlockSchedCandidate &Cand,
392                            SIBlockSchedCandidate &TryCand);
393  SIScheduleBlock *pickBlock();
394
395  void addLiveRegs(std::set<unsigned> &Regs);
396  void decreaseLiveRegs(SIScheduleBlock *Block, std::set<unsigned> &Regs);
397  void releaseBlockSuccs(SIScheduleBlock *Parent);
398  void blockScheduled(SIScheduleBlock *Block);
399
400  // Check register pressure change
401  // by scheduling a block with these LiveIn and LiveOut.
402  std::vector<int> checkRegUsageImpact(std::set<unsigned> &InRegs,
403                                       std::set<unsigned> &OutRegs);
404
405  void schedule();
406};
407
408struct SIScheduleBlockResult {
409  std::vector<unsigned> SUs;
410  unsigned MaxSGPRUsage;
411  unsigned MaxVGPRUsage;
412};
413
414class SIScheduler {
415  SIScheduleDAGMI *DAG;
416  SIScheduleBlockCreator BlockCreator;
417
418public:
419  SIScheduler(SIScheduleDAGMI *DAG) : DAG(DAG), BlockCreator(DAG) {}
420
421  ~SIScheduler() = default;
422
423  struct SIScheduleBlockResult
424  scheduleVariant(SISchedulerBlockCreatorVariant BlockVariant,
425                  SISchedulerBlockSchedulerVariant ScheduleVariant);
426};
427
428class SIScheduleDAGMI final : public ScheduleDAGMILive {
429  const SIInstrInfo *SITII;
430  const SIRegisterInfo *SITRI;
431
432  std::vector<SUnit> SUnitsLinksBackup;
433
434  // For moveLowLatencies. After all Scheduling variants are tested.
435  std::vector<unsigned> ScheduledSUnits;
436  std::vector<unsigned> ScheduledSUnitsInv;
437
438  unsigned VGPRSetID;
439  unsigned SGPRSetID;
440
441public:
442  SIScheduleDAGMI(MachineSchedContext *C);
443
444  ~SIScheduleDAGMI() override;
445
446  // Entry point for the schedule.
447  void schedule() override;
448
449  // To init Block's RPTracker.
450  void initRPTracker(RegPressureTracker &RPTracker) {
451    RPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin, false, false);
452  }
453
454  MachineBasicBlock *getBB() { return BB; }
455  MachineBasicBlock::iterator getCurrentTop() { return CurrentTop; }
456  MachineBasicBlock::iterator getCurrentBottom() { return CurrentBottom; }
457  LiveIntervals *getLIS() { return LIS; }
458  MachineRegisterInfo *getMRI() { return &MRI; }
459  const TargetRegisterInfo *getTRI() { return TRI; }
460  ScheduleDAGTopologicalSort *GetTopo() { return &Topo; }
461  SUnit& getEntrySU() { return EntrySU; }
462  SUnit& getExitSU() { return ExitSU; }
463
464  void restoreSULinksLeft();
465
466  template<typename _Iterator> void fillVgprSgprCost(_Iterator First,
467                                                     _Iterator End,
468                                                     unsigned &VgprUsage,
469                                                     unsigned &SgprUsage);
470
471  std::set<unsigned> getInRegs() {
472    std::set<unsigned> InRegs;
473    for (const auto &RegMaskPair : RPTracker.getPressure().LiveInRegs) {
474      InRegs.insert(RegMaskPair.RegUnit);
475    }
476    return InRegs;
477  }
478
479  std::set<unsigned> getOutRegs() {
480    std::set<unsigned> OutRegs;
481    for (const auto &RegMaskPair : RPTracker.getPressure().LiveOutRegs) {
482      OutRegs.insert(RegMaskPair.RegUnit);
483    }
484    return OutRegs;
485  };
486
487  unsigned getVGPRSetID() const { return VGPRSetID; }
488  unsigned getSGPRSetID() const { return SGPRSetID; }
489
490private:
491  void topologicalSort();
492  // After scheduling is done, improve low latency placements.
493  void moveLowLatencies();
494
495public:
496  // Some stats for scheduling inside blocks.
497  std::vector<unsigned> IsLowLatencySU;
498  std::vector<unsigned> LowLatencyOffset;
499  std::vector<unsigned> IsHighLatencySU;
500  // Topological sort
501  // Maps topological index to the node number.
502  std::vector<int> TopDownIndex2SU;
503  std::vector<int> BottomUpIndex2SU;
504};
505
506} // end namespace llvm
507
508#endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H
509