SIPreAllocateWWMRegs.cpp revision 360784
1//===- SIPreAllocateWWMRegs.cpp - WWM Register Pre-allocation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// Pass to pre-allocated WWM registers
11//
12//===----------------------------------------------------------------------===//
13
14#include "AMDGPU.h"
15#include "AMDGPUSubtarget.h"
16#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
17#include "SIInstrInfo.h"
18#include "SIMachineFunctionInfo.h"
19#include "SIRegisterInfo.h"
20#include "llvm/ADT/PostOrderIterator.h"
21#include "llvm/CodeGen/LiveInterval.h"
22#include "llvm/CodeGen/LiveIntervals.h"
23#include "llvm/CodeGen/LiveRegMatrix.h"
24#include "llvm/CodeGen/MachineDominators.h"
25#include "llvm/CodeGen/MachineFunctionPass.h"
26#include "llvm/CodeGen/RegisterClassInfo.h"
27#include "llvm/CodeGen/VirtRegMap.h"
28#include "llvm/InitializePasses.h"
29
30using namespace llvm;
31
32#define DEBUG_TYPE "si-pre-allocate-wwm-regs"
33
34namespace {
35
36class SIPreAllocateWWMRegs : public MachineFunctionPass {
37private:
38  const SIInstrInfo *TII;
39  const SIRegisterInfo *TRI;
40  MachineRegisterInfo *MRI;
41  LiveIntervals *LIS;
42  LiveRegMatrix *Matrix;
43  VirtRegMap *VRM;
44  RegisterClassInfo RegClassInfo;
45
46  std::vector<unsigned> RegsToRewrite;
47
48public:
49  static char ID;
50
51  SIPreAllocateWWMRegs() : MachineFunctionPass(ID) {
52    initializeSIPreAllocateWWMRegsPass(*PassRegistry::getPassRegistry());
53  }
54
55  bool runOnMachineFunction(MachineFunction &MF) override;
56
57  void getAnalysisUsage(AnalysisUsage &AU) const override {
58    AU.addRequired<LiveIntervals>();
59    AU.addPreserved<LiveIntervals>();
60    AU.addRequired<VirtRegMap>();
61    AU.addRequired<LiveRegMatrix>();
62    AU.addPreserved<SlotIndexes>();
63    AU.setPreservesCFG();
64    MachineFunctionPass::getAnalysisUsage(AU);
65  }
66
67private:
68  bool processDef(MachineOperand &MO);
69  void rewriteRegs(MachineFunction &MF);
70};
71
72} // End anonymous namespace.
73
74INITIALIZE_PASS_BEGIN(SIPreAllocateWWMRegs, DEBUG_TYPE,
75                "SI Pre-allocate WWM Registers", false, false)
76INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
77INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
78INITIALIZE_PASS_DEPENDENCY(LiveRegMatrix)
79INITIALIZE_PASS_END(SIPreAllocateWWMRegs, DEBUG_TYPE,
80                "SI Pre-allocate WWM Registers", false, false)
81
82char SIPreAllocateWWMRegs::ID = 0;
83
84char &llvm::SIPreAllocateWWMRegsID = SIPreAllocateWWMRegs::ID;
85
86FunctionPass *llvm::createSIPreAllocateWWMRegsPass() {
87  return new SIPreAllocateWWMRegs();
88}
89
90bool SIPreAllocateWWMRegs::processDef(MachineOperand &MO) {
91  if (!MO.isReg())
92    return false;
93
94  Register Reg = MO.getReg();
95
96  if (!TRI->isVGPR(*MRI, Reg))
97    return false;
98
99  if (Register::isPhysicalRegister(Reg))
100    return false;
101
102  if (VRM->hasPhys(Reg))
103    return false;
104
105  LiveInterval &LI = LIS->getInterval(Reg);
106
107  for (unsigned PhysReg : RegClassInfo.getOrder(MRI->getRegClass(Reg))) {
108    if (!MRI->isPhysRegUsed(PhysReg) &&
109        Matrix->checkInterference(LI, PhysReg) == LiveRegMatrix::IK_Free) {
110      Matrix->assign(LI, PhysReg);
111      assert(PhysReg != 0);
112      RegsToRewrite.push_back(Reg);
113      return true;
114    }
115  }
116
117  llvm_unreachable("physreg not found for WWM expression");
118  return false;
119}
120
121void SIPreAllocateWWMRegs::rewriteRegs(MachineFunction &MF) {
122  for (MachineBasicBlock &MBB : MF) {
123    for (MachineInstr &MI : MBB) {
124      for (MachineOperand &MO : MI.operands()) {
125        if (!MO.isReg())
126          continue;
127
128        const Register VirtReg = MO.getReg();
129        if (Register::isPhysicalRegister(VirtReg))
130          continue;
131
132        if (!VRM->hasPhys(VirtReg))
133          continue;
134
135        Register PhysReg = VRM->getPhys(VirtReg);
136        const unsigned SubReg = MO.getSubReg();
137        if (SubReg != 0) {
138          PhysReg = TRI->getSubReg(PhysReg, SubReg);
139          MO.setSubReg(0);
140        }
141
142        MO.setReg(PhysReg);
143        MO.setIsRenamable(false);
144      }
145    }
146  }
147
148  SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
149
150  for (unsigned Reg : RegsToRewrite) {
151    LIS->removeInterval(Reg);
152
153    const Register PhysReg = VRM->getPhys(Reg);
154    assert(PhysReg != 0);
155    MFI->ReserveWWMRegister(PhysReg);
156  }
157
158  RegsToRewrite.clear();
159
160  // Update the set of reserved registers to include WWM ones.
161  MRI->freezeReservedRegs(MF);
162}
163
164bool SIPreAllocateWWMRegs::runOnMachineFunction(MachineFunction &MF) {
165  LLVM_DEBUG(dbgs() << "SIPreAllocateWWMRegs: function " << MF.getName() << "\n");
166
167  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
168
169  TII = ST.getInstrInfo();
170  TRI = &TII->getRegisterInfo();
171  MRI = &MF.getRegInfo();
172
173  LIS = &getAnalysis<LiveIntervals>();
174  Matrix = &getAnalysis<LiveRegMatrix>();
175  VRM = &getAnalysis<VirtRegMap>();
176
177  RegClassInfo.runOnMachineFunction(MF);
178
179  bool RegsAssigned = false;
180
181  // We use a reverse post-order traversal of the control-flow graph to
182  // guarantee that we visit definitions in dominance order. Since WWM
183  // expressions are guaranteed to never involve phi nodes, and we can only
184  // escape WWM through the special WWM instruction, this means that this is a
185  // perfect elimination order, so we can never do any better.
186  ReversePostOrderTraversal<MachineFunction*> RPOT(&MF);
187
188  for (MachineBasicBlock *MBB : RPOT) {
189    bool InWWM = false;
190    for (MachineInstr &MI : *MBB) {
191      if (MI.getOpcode() == AMDGPU::V_SET_INACTIVE_B32 ||
192          MI.getOpcode() == AMDGPU::V_SET_INACTIVE_B64)
193        RegsAssigned |= processDef(MI.getOperand(0));
194
195      if (MI.getOpcode() == AMDGPU::ENTER_WWM) {
196        LLVM_DEBUG(dbgs() << "entering WWM region: " << MI << "\n");
197        InWWM = true;
198        continue;
199      }
200
201      if (MI.getOpcode() == AMDGPU::EXIT_WWM) {
202        LLVM_DEBUG(dbgs() << "exiting WWM region: " << MI << "\n");
203        InWWM = false;
204      }
205
206      if (!InWWM)
207        continue;
208
209      LLVM_DEBUG(dbgs() << "processing " << MI << "\n");
210
211      for (MachineOperand &DefOpnd : MI.defs()) {
212        RegsAssigned |= processDef(DefOpnd);
213      }
214    }
215  }
216
217  if (!RegsAssigned)
218    return false;
219
220  rewriteRegs(MF);
221  return true;
222}
223