1249259Sdim//===-- AMDGPUIndirectAddressing.cpp - Indirect Adressing Support ---------===// 2249259Sdim// 3249259Sdim// The LLVM Compiler Infrastructure 4249259Sdim// 5249259Sdim// This file is distributed under the University of Illinois Open Source 6249259Sdim// License. See LICENSE.TXT for details. 7249259Sdim// 8249259Sdim//===----------------------------------------------------------------------===// 9249259Sdim// 10249259Sdim/// \file 11249259Sdim/// 12249259Sdim/// Instructions can use indirect addressing to index the register file as if it 13249259Sdim/// were memory. This pass lowers RegisterLoad and RegisterStore instructions 14249259Sdim/// to either a COPY or a MOV that uses indirect addressing. 15249259Sdim// 16249259Sdim//===----------------------------------------------------------------------===// 17249259Sdim 18249259Sdim#include "AMDGPU.h" 19249259Sdim#include "R600InstrInfo.h" 20249259Sdim#include "R600MachineFunctionInfo.h" 21249259Sdim#include "llvm/CodeGen/MachineFunction.h" 22249259Sdim#include "llvm/CodeGen/MachineFunctionPass.h" 23249259Sdim#include "llvm/CodeGen/MachineInstrBuilder.h" 24249259Sdim#include "llvm/CodeGen/MachineRegisterInfo.h" 25249259Sdim#include "llvm/Support/Debug.h" 26249259Sdim 27249259Sdimusing namespace llvm; 28249259Sdim 29249259Sdimnamespace { 30249259Sdim 31249259Sdimclass AMDGPUIndirectAddressingPass : public MachineFunctionPass { 32249259Sdim 33249259Sdimprivate: 34249259Sdim static char ID; 35249259Sdim const AMDGPUInstrInfo *TII; 36249259Sdim 37249259Sdim bool regHasExplicitDef(MachineRegisterInfo &MRI, unsigned Reg) const; 38249259Sdim 39249259Sdimpublic: 40249259Sdim AMDGPUIndirectAddressingPass(TargetMachine &tm) : 41249259Sdim MachineFunctionPass(ID), 42249259Sdim TII(static_cast<const AMDGPUInstrInfo*>(tm.getInstrInfo())) 43249259Sdim { } 44249259Sdim 45249259Sdim virtual bool runOnMachineFunction(MachineFunction &MF); 46249259Sdim 47249259Sdim const char *getPassName() const { return "R600 Handle indirect addressing"; } 48249259Sdim 49249259Sdim}; 50249259Sdim 51249259Sdim} // End anonymous namespace 52249259Sdim 53249259Sdimchar AMDGPUIndirectAddressingPass::ID = 0; 54249259Sdim 55249259SdimFunctionPass *llvm::createAMDGPUIndirectAddressingPass(TargetMachine &tm) { 56249259Sdim return new AMDGPUIndirectAddressingPass(tm); 57249259Sdim} 58249259Sdim 59249259Sdimbool AMDGPUIndirectAddressingPass::runOnMachineFunction(MachineFunction &MF) { 60249259Sdim MachineRegisterInfo &MRI = MF.getRegInfo(); 61249259Sdim 62249259Sdim int IndirectBegin = TII->getIndirectIndexBegin(MF); 63249259Sdim int IndirectEnd = TII->getIndirectIndexEnd(MF); 64249259Sdim 65249259Sdim if (IndirectBegin == -1) { 66249259Sdim // No indirect addressing, we can skip this pass 67249259Sdim assert(IndirectEnd == -1); 68249259Sdim return false; 69249259Sdim } 70249259Sdim 71249259Sdim // The map keeps track of the indirect address that is represented by 72249259Sdim // each virtual register. The key is the register and the value is the 73249259Sdim // indirect address it uses. 74249259Sdim std::map<unsigned, unsigned> RegisterAddressMap; 75249259Sdim 76249259Sdim // First pass - Lower all of the RegisterStore instructions and track which 77249259Sdim // registers are live. 78249259Sdim for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); 79249259Sdim BB != BB_E; ++BB) { 80249259Sdim // This map keeps track of the current live indirect registers. 81249259Sdim // The key is the address and the value is the register 82249259Sdim std::map<unsigned, unsigned> LiveAddressRegisterMap; 83249259Sdim MachineBasicBlock &MBB = *BB; 84249259Sdim 85249259Sdim for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I); 86249259Sdim I != MBB.end(); I = Next) { 87249259Sdim Next = llvm::next(I); 88249259Sdim MachineInstr &MI = *I; 89249259Sdim 90249259Sdim if (!TII->isRegisterStore(MI)) { 91249259Sdim continue; 92249259Sdim } 93249259Sdim 94249259Sdim // Lower RegisterStore 95249259Sdim 96249259Sdim unsigned RegIndex = MI.getOperand(2).getImm(); 97249259Sdim unsigned Channel = MI.getOperand(3).getImm(); 98249259Sdim unsigned Address = TII->calculateIndirectAddress(RegIndex, Channel); 99249259Sdim const TargetRegisterClass *IndirectStoreRegClass = 100249259Sdim TII->getIndirectAddrStoreRegClass(MI.getOperand(0).getReg()); 101249259Sdim 102249259Sdim if (MI.getOperand(1).getReg() == AMDGPU::INDIRECT_BASE_ADDR) { 103249259Sdim // Direct register access. 104249259Sdim unsigned DstReg = MRI.createVirtualRegister(IndirectStoreRegClass); 105249259Sdim 106249259Sdim BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::COPY), DstReg) 107249259Sdim .addOperand(MI.getOperand(0)); 108249259Sdim 109249259Sdim RegisterAddressMap[DstReg] = Address; 110249259Sdim LiveAddressRegisterMap[Address] = DstReg; 111249259Sdim } else { 112249259Sdim // Indirect register access. 113249259Sdim MachineInstrBuilder MOV = TII->buildIndirectWrite(BB, I, 114249259Sdim MI.getOperand(0).getReg(), // Value 115249259Sdim Address, 116249259Sdim MI.getOperand(1).getReg()); // Offset 117249259Sdim for (int i = IndirectBegin; i <= IndirectEnd; ++i) { 118249259Sdim unsigned Addr = TII->calculateIndirectAddress(i, Channel); 119249259Sdim unsigned DstReg = MRI.createVirtualRegister(IndirectStoreRegClass); 120249259Sdim MOV.addReg(DstReg, RegState::Define | RegState::Implicit); 121249259Sdim RegisterAddressMap[DstReg] = Addr; 122249259Sdim LiveAddressRegisterMap[Addr] = DstReg; 123249259Sdim } 124249259Sdim } 125249259Sdim MI.eraseFromParent(); 126249259Sdim } 127249259Sdim 128249259Sdim // Update the live-ins of the succesor blocks 129249259Sdim for (MachineBasicBlock::succ_iterator Succ = MBB.succ_begin(), 130249259Sdim SuccEnd = MBB.succ_end(); 131249259Sdim SuccEnd != Succ; ++Succ) { 132249259Sdim std::map<unsigned, unsigned>::const_iterator Key, KeyEnd; 133249259Sdim for (Key = LiveAddressRegisterMap.begin(), 134249259Sdim KeyEnd = LiveAddressRegisterMap.end(); KeyEnd != Key; ++Key) { 135249259Sdim (*Succ)->addLiveIn(Key->second); 136249259Sdim } 137249259Sdim } 138249259Sdim } 139249259Sdim 140249259Sdim // Second pass - Lower the RegisterLoad instructions 141249259Sdim for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); 142249259Sdim BB != BB_E; ++BB) { 143249259Sdim // Key is the address and the value is the register 144249259Sdim std::map<unsigned, unsigned> LiveAddressRegisterMap; 145249259Sdim MachineBasicBlock &MBB = *BB; 146249259Sdim 147249259Sdim MachineBasicBlock::livein_iterator LI = MBB.livein_begin(); 148249259Sdim while (LI != MBB.livein_end()) { 149249259Sdim std::vector<unsigned> PhiRegisters; 150249259Sdim 151249259Sdim // Make sure this live in is used for indirect addressing 152249259Sdim if (RegisterAddressMap.find(*LI) == RegisterAddressMap.end()) { 153249259Sdim ++LI; 154249259Sdim continue; 155249259Sdim } 156249259Sdim 157249259Sdim unsigned Address = RegisterAddressMap[*LI]; 158249259Sdim LiveAddressRegisterMap[Address] = *LI; 159249259Sdim PhiRegisters.push_back(*LI); 160249259Sdim 161249259Sdim // Check if there are other live in registers which map to the same 162249259Sdim // indirect address. 163249259Sdim for (MachineBasicBlock::livein_iterator LJ = llvm::next(LI), 164249259Sdim LE = MBB.livein_end(); 165249259Sdim LJ != LE; ++LJ) { 166249259Sdim unsigned Reg = *LJ; 167249259Sdim if (RegisterAddressMap.find(Reg) == RegisterAddressMap.end()) { 168249259Sdim continue; 169249259Sdim } 170249259Sdim 171249259Sdim if (RegisterAddressMap[Reg] == Address) { 172249259Sdim PhiRegisters.push_back(Reg); 173249259Sdim } 174249259Sdim } 175249259Sdim 176249259Sdim if (PhiRegisters.size() == 1) { 177249259Sdim // We don't need to insert a Phi instruction, so we can just add the 178249259Sdim // registers to the live list for the block. 179249259Sdim LiveAddressRegisterMap[Address] = *LI; 180249259Sdim MBB.removeLiveIn(*LI); 181249259Sdim } else { 182249259Sdim // We need to insert a PHI, because we have the same address being 183249259Sdim // written in multiple predecessor blocks. 184249259Sdim const TargetRegisterClass *PhiDstClass = 185249259Sdim TII->getIndirectAddrStoreRegClass(*(PhiRegisters.begin())); 186249259Sdim unsigned PhiDstReg = MRI.createVirtualRegister(PhiDstClass); 187249259Sdim MachineInstrBuilder Phi = BuildMI(MBB, MBB.begin(), 188249259Sdim MBB.findDebugLoc(MBB.begin()), 189249259Sdim TII->get(AMDGPU::PHI), PhiDstReg); 190249259Sdim 191249259Sdim for (std::vector<unsigned>::const_iterator RI = PhiRegisters.begin(), 192249259Sdim RE = PhiRegisters.end(); 193249259Sdim RI != RE; ++RI) { 194249259Sdim unsigned Reg = *RI; 195249259Sdim MachineInstr *DefInst = MRI.getVRegDef(Reg); 196249259Sdim assert(DefInst); 197249259Sdim MachineBasicBlock *RegBlock = DefInst->getParent(); 198249259Sdim Phi.addReg(Reg); 199249259Sdim Phi.addMBB(RegBlock); 200249259Sdim MBB.removeLiveIn(Reg); 201249259Sdim } 202249259Sdim RegisterAddressMap[PhiDstReg] = Address; 203249259Sdim LiveAddressRegisterMap[Address] = PhiDstReg; 204249259Sdim } 205249259Sdim LI = MBB.livein_begin(); 206249259Sdim } 207249259Sdim 208249259Sdim for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I); 209249259Sdim I != MBB.end(); I = Next) { 210249259Sdim Next = llvm::next(I); 211249259Sdim MachineInstr &MI = *I; 212249259Sdim 213249259Sdim if (!TII->isRegisterLoad(MI)) { 214249259Sdim if (MI.getOpcode() == AMDGPU::PHI) { 215249259Sdim continue; 216249259Sdim } 217249259Sdim // Check for indirect register defs 218249259Sdim for (unsigned OpIdx = 0, NumOperands = MI.getNumOperands(); 219249259Sdim OpIdx < NumOperands; ++OpIdx) { 220249259Sdim MachineOperand &MO = MI.getOperand(OpIdx); 221249259Sdim if (MO.isReg() && MO.isDef() && 222249259Sdim RegisterAddressMap.find(MO.getReg()) != RegisterAddressMap.end()) { 223249259Sdim unsigned Reg = MO.getReg(); 224249259Sdim unsigned LiveAddress = RegisterAddressMap[Reg]; 225249259Sdim // Chain the live-ins 226249259Sdim if (LiveAddressRegisterMap.find(LiveAddress) != 227249259Sdim RegisterAddressMap.end()) { 228249259Sdim MI.addOperand(MachineOperand::CreateReg( 229249259Sdim LiveAddressRegisterMap[LiveAddress], 230249259Sdim false, // isDef 231249259Sdim true, // isImp 232249259Sdim true)); // isKill 233249259Sdim } 234249259Sdim LiveAddressRegisterMap[LiveAddress] = Reg; 235249259Sdim } 236249259Sdim } 237249259Sdim continue; 238249259Sdim } 239249259Sdim 240249259Sdim const TargetRegisterClass *SuperIndirectRegClass = 241249259Sdim TII->getSuperIndirectRegClass(); 242249259Sdim const TargetRegisterClass *IndirectLoadRegClass = 243249259Sdim TII->getIndirectAddrLoadRegClass(); 244249259Sdim unsigned IndirectReg = MRI.createVirtualRegister(SuperIndirectRegClass); 245249259Sdim 246249259Sdim unsigned RegIndex = MI.getOperand(2).getImm(); 247249259Sdim unsigned Channel = MI.getOperand(3).getImm(); 248249259Sdim unsigned Address = TII->calculateIndirectAddress(RegIndex, Channel); 249249259Sdim 250249259Sdim if (MI.getOperand(1).getReg() == AMDGPU::INDIRECT_BASE_ADDR) { 251249259Sdim // Direct register access 252249259Sdim unsigned Reg = LiveAddressRegisterMap[Address]; 253249259Sdim unsigned AddrReg = IndirectLoadRegClass->getRegister(Address); 254249259Sdim 255249259Sdim if (regHasExplicitDef(MRI, Reg)) { 256249259Sdim // If the register we are reading from has an explicit def, then that 257249259Sdim // means it was written via a direct register access (i.e. COPY 258249259Sdim // or other instruction that doesn't use indirect addressing). In 259249259Sdim // this case we know where the value has been stored, so we can just 260249259Sdim // issue a copy. 261249259Sdim BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::COPY), 262249259Sdim MI.getOperand(0).getReg()) 263249259Sdim .addReg(Reg); 264249259Sdim } else { 265249259Sdim // If the register we are reading has an implicit def, then that 266249259Sdim // means it was written by an indirect register access (i.e. An 267249259Sdim // instruction that uses indirect addressing. 268249259Sdim BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::COPY), 269249259Sdim MI.getOperand(0).getReg()) 270249259Sdim .addReg(AddrReg) 271249259Sdim .addReg(Reg, RegState::Implicit); 272249259Sdim } 273249259Sdim } else { 274249259Sdim // Indirect register access 275249259Sdim 276249259Sdim // Note on REQ_SEQUENCE instructons: You can't actually use the register 277249259Sdim // it defines unless you have an instruction that takes the defined 278249259Sdim // register class as an operand. 279249259Sdim 280249259Sdim MachineInstrBuilder Sequence = BuildMI(MBB, I, MBB.findDebugLoc(I), 281249259Sdim TII->get(AMDGPU::REG_SEQUENCE), 282249259Sdim IndirectReg); 283249259Sdim for (int i = IndirectBegin; i <= IndirectEnd; ++i) { 284249259Sdim unsigned Addr = TII->calculateIndirectAddress(i, Channel); 285249259Sdim if (LiveAddressRegisterMap.find(Addr) == LiveAddressRegisterMap.end()) { 286249259Sdim continue; 287249259Sdim } 288249259Sdim unsigned Reg = LiveAddressRegisterMap[Addr]; 289249259Sdim 290249259Sdim // We only need to use REG_SEQUENCE for explicit defs, since the 291249259Sdim // register coalescer won't do anything with the implicit defs. 292249259Sdim if (!regHasExplicitDef(MRI, Reg)) { 293249259Sdim continue; 294249259Sdim } 295249259Sdim 296249259Sdim // Insert a REQ_SEQUENCE instruction to force the register allocator 297249259Sdim // to allocate the virtual register to the correct physical register. 298249259Sdim Sequence.addReg(LiveAddressRegisterMap[Addr]); 299249259Sdim Sequence.addImm(TII->getRegisterInfo().getIndirectSubReg(Addr)); 300249259Sdim } 301249259Sdim MachineInstrBuilder Mov = TII->buildIndirectRead(BB, I, 302249259Sdim MI.getOperand(0).getReg(), // Value 303249259Sdim Address, 304249259Sdim MI.getOperand(1).getReg()); // Offset 305249259Sdim 306249259Sdim 307249259Sdim 308249259Sdim Mov.addReg(IndirectReg, RegState::Implicit | RegState::Kill); 309249259Sdim Mov.addReg(LiveAddressRegisterMap[Address], RegState::Implicit); 310249259Sdim 311249259Sdim } 312249259Sdim MI.eraseFromParent(); 313249259Sdim } 314249259Sdim } 315249259Sdim return false; 316249259Sdim} 317249259Sdim 318249259Sdimbool AMDGPUIndirectAddressingPass::regHasExplicitDef(MachineRegisterInfo &MRI, 319249259Sdim unsigned Reg) const { 320249259Sdim MachineInstr *DefInstr = MRI.getVRegDef(Reg); 321249259Sdim 322249259Sdim if (!DefInstr) { 323249259Sdim return false; 324249259Sdim } 325249259Sdim 326249259Sdim if (DefInstr->getOpcode() == AMDGPU::PHI) { 327249259Sdim bool Explicit = false; 328249259Sdim for (MachineInstr::const_mop_iterator I = DefInstr->operands_begin(), 329249259Sdim E = DefInstr->operands_end(); 330249259Sdim I != E; ++I) { 331249259Sdim const MachineOperand &MO = *I; 332249259Sdim if (!MO.isReg() || MO.isDef()) { 333249259Sdim continue; 334249259Sdim } 335249259Sdim 336249259Sdim Explicit = Explicit || regHasExplicitDef(MRI, MO.getReg()); 337249259Sdim } 338249259Sdim return Explicit; 339249259Sdim } 340249259Sdim 341249259Sdim return DefInstr->getOperand(0).isReg() && 342249259Sdim DefInstr->getOperand(0).getReg() == Reg; 343249259Sdim} 344