SIFixupVectorISel.cpp revision 360784
1//===-- SIFixupVectorISel.cpp - Fixup post ISel vector issues -------------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7/// \file 8/// SIFixupVectorISel pass cleans up post ISEL Vector issues. 9/// Currently this will convert GLOBAL_{LOAD|STORE}_* 10/// and GLOBAL_Atomic_* instructions into their _SADDR variants, 11/// feeding the sreg into the saddr field of the new instruction. 12/// We currently handle a REG_SEQUENCE feeding the vaddr 13/// and decompose it into a base and index. 14/// 15/// Transform: 16/// %17:vgpr_32, %19:sreg_64_xexec = V_ADD_I32_e64 %21:sgpr_32, %22:vgpr_32 17/// %18:vgpr_32, %20:sreg_64_xexec = V_ADDC_U32_e64 %25:vgpr_32, 18/// %24:vgpr_32, %19:sreg_64_xexec 19/// %16:vreg_64 = REG_SEQUENCE %17:vgpr_32, %sub0, %18:vgpr_32, %sub1 20/// %11:vreg_64 = COPY %16:vreg_64 21/// %10:vgpr_32 = GLOBAL_LOAD_DWORD killed %11:vreg_64, 16, 0, 0 22/// Into: 23/// %4:sreg_64_xexec = S_LOAD_DWORDX2_IMM %1:sgpr_64, 36, 0 24/// %14:vreg_64 = REG_SEQUENCE %6:vgpr_32, %sub0, %15:vgpr_32, %sub1 25/// %10:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %14:vreg_64, %4:sreg_64_xexec,16... 26/// 27//===----------------------------------------------------------------------===// 28// 29 30#include "AMDGPU.h" 31#include "AMDGPUSubtarget.h" 32#include "MCTargetDesc/AMDGPUMCTargetDesc.h" 33#include "llvm/ADT/Statistic.h" 34#include "llvm/CodeGen/MachineFunctionPass.h" 35#include "llvm/CodeGen/MachineInstrBuilder.h" 36#include "llvm/CodeGen/MachineRegisterInfo.h" 37#include "llvm/IR/Function.h" 38#include "llvm/IR/LLVMContext.h" 39#include "llvm/Support/Debug.h" 40#include "llvm/Target/TargetMachine.h" 41#define DEBUG_TYPE "si-fixup-vector-isel" 42 43using namespace llvm; 44 45static cl::opt<bool> EnableGlobalSGPRAddr( 46 "amdgpu-enable-global-sgpr-addr", 47 cl::desc("Enable use of SGPR regs for GLOBAL LOAD/STORE instructions"), 48 cl::init(false)); 49 50STATISTIC(NumSGPRGlobalOccurs, "Number of global ld/st opportunities"); 51STATISTIC(NumSGPRGlobalSaddrs, "Number of global sgpr instructions converted"); 52 53namespace { 54 55class SIFixupVectorISel : public MachineFunctionPass { 56public: 57 static char ID; 58 59public: 60 SIFixupVectorISel() : MachineFunctionPass(ID) { 61 initializeSIFixupVectorISelPass(*PassRegistry::getPassRegistry()); 62 } 63 64 bool runOnMachineFunction(MachineFunction &MF) override; 65 66 void getAnalysisUsage(AnalysisUsage &AU) const override { 67 AU.setPreservesCFG(); 68 MachineFunctionPass::getAnalysisUsage(AU); 69 } 70}; 71 72} // End anonymous namespace. 73 74INITIALIZE_PASS(SIFixupVectorISel, DEBUG_TYPE, 75 "SI Fixup Vector ISel", false, false) 76 77char SIFixupVectorISel::ID = 0; 78 79char &llvm::SIFixupVectorISelID = SIFixupVectorISel::ID; 80 81FunctionPass *llvm::createSIFixupVectorISelPass() { 82 return new SIFixupVectorISel(); 83} 84 85static bool findSRegBaseAndIndex(MachineOperand *Op, 86 unsigned &BaseReg, 87 unsigned &IndexReg, 88 MachineRegisterInfo &MRI, 89 const SIRegisterInfo *TRI) { 90 SmallVector<MachineOperand *, 8> Worklist; 91 Worklist.push_back(Op); 92 while (!Worklist.empty()) { 93 MachineOperand *WOp = Worklist.pop_back_val(); 94 if (!WOp->isReg() || !Register::isVirtualRegister(WOp->getReg())) 95 continue; 96 MachineInstr *DefInst = MRI.getUniqueVRegDef(WOp->getReg()); 97 switch (DefInst->getOpcode()) { 98 default: 99 continue; 100 case AMDGPU::COPY: 101 Worklist.push_back(&DefInst->getOperand(1)); 102 break; 103 case AMDGPU::REG_SEQUENCE: 104 if (DefInst->getNumOperands() != 5) 105 continue; 106 Worklist.push_back(&DefInst->getOperand(1)); 107 Worklist.push_back(&DefInst->getOperand(3)); 108 break; 109 case AMDGPU::V_ADD_I32_e64: 110 // The V_ADD_* and its analogous V_ADDCV_* are generated by 111 // a previous pass which lowered from an ADD_64_PSEUDO, 112 // which generates subregs to break up the 64 bit args. 113 if (DefInst->getOperand(2).getSubReg() != AMDGPU::NoSubRegister) 114 continue; 115 BaseReg = DefInst->getOperand(2).getReg(); 116 if (DefInst->getOperand(3).getSubReg() != AMDGPU::NoSubRegister) 117 continue; 118 IndexReg = DefInst->getOperand(3).getReg(); 119 // Chase the IndexReg. 120 MachineInstr *MI = MRI.getUniqueVRegDef(IndexReg); 121 if (!MI || !MI->isCopy()) 122 continue; 123 // Make sure the reg class is 64 bit for Index. 124 // If the Index register is a subreg, we want it to reference 125 // a 64 bit register which we will use as the Index reg. 126 const TargetRegisterClass *IdxRC, *BaseRC; 127 IdxRC = MRI.getRegClass(MI->getOperand(1).getReg()); 128 if (AMDGPU::getRegBitWidth(IdxRC->getID()) != 64) 129 continue; 130 IndexReg = MI->getOperand(1).getReg(); 131 // Chase the BaseReg. 132 MI = MRI.getUniqueVRegDef(BaseReg); 133 if (!MI || !MI->isCopy()) 134 continue; 135 // Make sure the register class is 64 bit for Base. 136 BaseReg = MI->getOperand(1).getReg(); 137 BaseRC = MRI.getRegClass(BaseReg); 138 if (AMDGPU::getRegBitWidth(BaseRC->getID()) != 64) 139 continue; 140 // Make sure Base is SReg and Index is VReg. 141 if (!TRI->isSGPRReg(MRI, BaseReg)) 142 return false; 143 if (!TRI->hasVGPRs(MRI.getRegClass(IndexReg))) 144 return false; 145 // clear any killed flags on Index and Base regs, used later. 146 MRI.clearKillFlags(IndexReg); 147 MRI.clearKillFlags(BaseReg); 148 return true; 149 } 150 } 151 return false; 152} 153 154// Identify Global LOAD|STORE/ATOMIC and try to convert to _SADDR. 155static bool fixupGlobalSaddr(MachineBasicBlock &MBB, 156 MachineFunction &MF, 157 MachineRegisterInfo &MRI, 158 const GCNSubtarget &ST, 159 const SIInstrInfo *TII, 160 const SIRegisterInfo *TRI) { 161 if (!EnableGlobalSGPRAddr) 162 return false; 163 bool FuncModified = false; 164 MachineBasicBlock::iterator I, Next; 165 for (I = MBB.begin(); I != MBB.end(); I = Next) { 166 Next = std::next(I); 167 MachineInstr &MI = *I; 168 int NewOpcd = AMDGPU::getGlobalSaddrOp(MI.getOpcode()); 169 if (NewOpcd < 0) 170 continue; 171 // Update our statistics on opportunities seen. 172 ++NumSGPRGlobalOccurs; 173 LLVM_DEBUG(dbgs() << "Global Mem opp " << MI << '\n'); 174 // Need a Base and Index or we cant transform to _SADDR. 175 unsigned BaseReg = 0; 176 unsigned IndexReg = 0; 177 MachineOperand *Op = TII->getNamedOperand(MI, AMDGPU::OpName::vaddr); 178 if (!findSRegBaseAndIndex(Op, BaseReg, IndexReg, MRI, TRI)) 179 continue; 180 ++NumSGPRGlobalSaddrs; 181 FuncModified = true; 182 // Create the new _SADDR Memory instruction. 183 bool HasVdst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst) != nullptr; 184 MachineOperand *VData = TII->getNamedOperand(MI, AMDGPU::OpName::vdata); 185 MachineInstr *NewGlob = nullptr; 186 NewGlob = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcd)); 187 if (HasVdst) 188 NewGlob->addOperand(MF, MI.getOperand(0)); 189 NewGlob->addOperand(MF, MachineOperand::CreateReg(IndexReg, false)); 190 if (VData) 191 NewGlob->addOperand(MF, *VData); 192 NewGlob->addOperand(MF, MachineOperand::CreateReg(BaseReg, false)); 193 NewGlob->addOperand(*TII->getNamedOperand(MI, AMDGPU::OpName::offset)); 194 195 MachineOperand *Glc = TII->getNamedOperand(MI, AMDGPU::OpName::glc); 196 // Atomics dont have a GLC, so omit the field if not there. 197 if (Glc) 198 NewGlob->addOperand(MF, *Glc); 199 200 MachineOperand *DLC = TII->getNamedOperand(MI, AMDGPU::OpName::dlc); 201 if (DLC) 202 NewGlob->addOperand(MF, *DLC); 203 204 NewGlob->addOperand(*TII->getNamedOperand(MI, AMDGPU::OpName::slc)); 205 // _D16 have an vdst_in operand, copy it in. 206 MachineOperand *VDstInOp = TII->getNamedOperand(MI, 207 AMDGPU::OpName::vdst_in); 208 if (VDstInOp) 209 NewGlob->addOperand(MF, *VDstInOp); 210 NewGlob->copyImplicitOps(MF, MI); 211 NewGlob->cloneMemRefs(MF, MI); 212 // Remove the old Global Memop instruction. 213 MI.eraseFromParent(); 214 LLVM_DEBUG(dbgs() << "New Global Mem " << *NewGlob << '\n'); 215 } 216 return FuncModified; 217} 218 219bool SIFixupVectorISel::runOnMachineFunction(MachineFunction &MF) { 220 if (skipFunction(MF.getFunction())) 221 return false; 222 223 MachineRegisterInfo &MRI = MF.getRegInfo(); 224 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 225 const SIInstrInfo *TII = ST.getInstrInfo(); 226 const SIRegisterInfo *TRI = ST.getRegisterInfo(); 227 228 bool FuncModified = false; 229 for (MachineBasicBlock &MBB : MF) { 230 // Cleanup missed Saddr opportunites from ISel. 231 FuncModified |= fixupGlobalSaddr(MBB, MF, MRI, ST, TII, TRI); 232 } 233 return FuncModified; 234} 235