1249259Sdim//===-- AMDGPUIndirectAddressing.cpp - Indirect Adressing Support ---------===//
2249259Sdim//
3249259Sdim//                     The LLVM Compiler Infrastructure
4249259Sdim//
5249259Sdim// This file is distributed under the University of Illinois Open Source
6249259Sdim// License. See LICENSE.TXT for details.
7249259Sdim//
8249259Sdim//===----------------------------------------------------------------------===//
9249259Sdim//
10249259Sdim/// \file
11249259Sdim///
12249259Sdim/// Instructions can use indirect addressing to index the register file as if it
13249259Sdim/// were memory.  This pass lowers RegisterLoad and RegisterStore instructions
14249259Sdim/// to either a COPY or a MOV that uses indirect addressing.
15249259Sdim//
16249259Sdim//===----------------------------------------------------------------------===//
17249259Sdim
18249259Sdim#include "AMDGPU.h"
19249259Sdim#include "R600InstrInfo.h"
20249259Sdim#include "R600MachineFunctionInfo.h"
21249259Sdim#include "llvm/CodeGen/MachineFunction.h"
22249259Sdim#include "llvm/CodeGen/MachineFunctionPass.h"
23249259Sdim#include "llvm/CodeGen/MachineInstrBuilder.h"
24249259Sdim#include "llvm/CodeGen/MachineRegisterInfo.h"
25249259Sdim#include "llvm/Support/Debug.h"
26249259Sdim
27249259Sdimusing namespace llvm;
28249259Sdim
29249259Sdimnamespace {
30249259Sdim
31249259Sdimclass AMDGPUIndirectAddressingPass : public MachineFunctionPass {
32249259Sdim
33249259Sdimprivate:
34249259Sdim  static char ID;
35249259Sdim  const AMDGPUInstrInfo *TII;
36249259Sdim
37249259Sdim  bool regHasExplicitDef(MachineRegisterInfo &MRI, unsigned Reg) const;
38249259Sdim
39249259Sdimpublic:
40249259Sdim  AMDGPUIndirectAddressingPass(TargetMachine &tm) :
41249259Sdim    MachineFunctionPass(ID),
42249259Sdim    TII(static_cast<const AMDGPUInstrInfo*>(tm.getInstrInfo()))
43249259Sdim    { }
44249259Sdim
45249259Sdim  virtual bool runOnMachineFunction(MachineFunction &MF);
46249259Sdim
47249259Sdim  const char *getPassName() const { return "R600 Handle indirect addressing"; }
48249259Sdim
49249259Sdim};
50249259Sdim
51249259Sdim} // End anonymous namespace
52249259Sdim
53249259Sdimchar AMDGPUIndirectAddressingPass::ID = 0;
54249259Sdim
55249259SdimFunctionPass *llvm::createAMDGPUIndirectAddressingPass(TargetMachine &tm) {
56249259Sdim  return new AMDGPUIndirectAddressingPass(tm);
57249259Sdim}
58249259Sdim
59249259Sdimbool AMDGPUIndirectAddressingPass::runOnMachineFunction(MachineFunction &MF) {
60249259Sdim  MachineRegisterInfo &MRI = MF.getRegInfo();
61249259Sdim
62249259Sdim  int IndirectBegin = TII->getIndirectIndexBegin(MF);
63249259Sdim  int IndirectEnd = TII->getIndirectIndexEnd(MF);
64249259Sdim
65249259Sdim  if (IndirectBegin == -1) {
66249259Sdim    // No indirect addressing, we can skip this pass
67249259Sdim    assert(IndirectEnd == -1);
68249259Sdim    return false;
69249259Sdim  }
70249259Sdim
71249259Sdim  // The map keeps track of the indirect address that is represented by
72249259Sdim  // each virtual register. The key is the register and the value is the
73249259Sdim  // indirect address it uses.
74249259Sdim  std::map<unsigned, unsigned> RegisterAddressMap;
75249259Sdim
76249259Sdim  // First pass - Lower all of the RegisterStore instructions and track which
77249259Sdim  // registers are live.
78249259Sdim  for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
79249259Sdim                                                      BB != BB_E; ++BB) {
80249259Sdim    // This map keeps track of the current live indirect registers.
81249259Sdim    // The key is the address and the value is the register
82249259Sdim    std::map<unsigned, unsigned> LiveAddressRegisterMap;
83249259Sdim    MachineBasicBlock &MBB = *BB;
84249259Sdim
85249259Sdim    for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I);
86249259Sdim                               I != MBB.end(); I = Next) {
87249259Sdim      Next = llvm::next(I);
88249259Sdim      MachineInstr &MI = *I;
89249259Sdim
90249259Sdim      if (!TII->isRegisterStore(MI)) {
91249259Sdim        continue;
92249259Sdim      }
93249259Sdim
94249259Sdim      // Lower RegisterStore
95249259Sdim
96249259Sdim      unsigned RegIndex = MI.getOperand(2).getImm();
97249259Sdim      unsigned Channel = MI.getOperand(3).getImm();
98249259Sdim      unsigned Address = TII->calculateIndirectAddress(RegIndex, Channel);
99249259Sdim      const TargetRegisterClass *IndirectStoreRegClass =
100249259Sdim                   TII->getIndirectAddrStoreRegClass(MI.getOperand(0).getReg());
101249259Sdim
102249259Sdim      if (MI.getOperand(1).getReg() == AMDGPU::INDIRECT_BASE_ADDR) {
103249259Sdim        // Direct register access.
104249259Sdim        unsigned DstReg = MRI.createVirtualRegister(IndirectStoreRegClass);
105249259Sdim
106249259Sdim        BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::COPY), DstReg)
107249259Sdim                .addOperand(MI.getOperand(0));
108249259Sdim
109249259Sdim        RegisterAddressMap[DstReg] = Address;
110249259Sdim        LiveAddressRegisterMap[Address] = DstReg;
111249259Sdim      } else {
112249259Sdim        // Indirect register access.
113249259Sdim        MachineInstrBuilder MOV = TII->buildIndirectWrite(BB, I,
114249259Sdim                                           MI.getOperand(0).getReg(), // Value
115249259Sdim                                           Address,
116249259Sdim                                           MI.getOperand(1).getReg()); // Offset
117249259Sdim        for (int i = IndirectBegin; i <= IndirectEnd; ++i) {
118249259Sdim          unsigned Addr = TII->calculateIndirectAddress(i, Channel);
119249259Sdim          unsigned DstReg = MRI.createVirtualRegister(IndirectStoreRegClass);
120249259Sdim          MOV.addReg(DstReg, RegState::Define | RegState::Implicit);
121249259Sdim          RegisterAddressMap[DstReg] = Addr;
122249259Sdim          LiveAddressRegisterMap[Addr] = DstReg;
123249259Sdim        }
124249259Sdim      }
125249259Sdim      MI.eraseFromParent();
126249259Sdim    }
127249259Sdim
128249259Sdim    // Update the live-ins of the succesor blocks
129249259Sdim    for (MachineBasicBlock::succ_iterator Succ = MBB.succ_begin(),
130249259Sdim                                          SuccEnd = MBB.succ_end();
131249259Sdim                                          SuccEnd != Succ; ++Succ) {
132249259Sdim      std::map<unsigned, unsigned>::const_iterator Key, KeyEnd;
133249259Sdim      for (Key = LiveAddressRegisterMap.begin(),
134249259Sdim           KeyEnd = LiveAddressRegisterMap.end(); KeyEnd != Key; ++Key) {
135249259Sdim        (*Succ)->addLiveIn(Key->second);
136249259Sdim      }
137249259Sdim    }
138249259Sdim  }
139249259Sdim
140249259Sdim  // Second pass - Lower the RegisterLoad instructions
141249259Sdim  for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
142249259Sdim                                                      BB != BB_E; ++BB) {
143249259Sdim    // Key is the address and the value is the register
144249259Sdim    std::map<unsigned, unsigned> LiveAddressRegisterMap;
145249259Sdim    MachineBasicBlock &MBB = *BB;
146249259Sdim
147249259Sdim    MachineBasicBlock::livein_iterator LI = MBB.livein_begin();
148249259Sdim    while (LI != MBB.livein_end()) {
149249259Sdim      std::vector<unsigned> PhiRegisters;
150249259Sdim
151249259Sdim      // Make sure this live in is used for indirect addressing
152249259Sdim      if (RegisterAddressMap.find(*LI) == RegisterAddressMap.end()) {
153249259Sdim        ++LI;
154249259Sdim        continue;
155249259Sdim      }
156249259Sdim
157249259Sdim      unsigned Address = RegisterAddressMap[*LI];
158249259Sdim      LiveAddressRegisterMap[Address] = *LI;
159249259Sdim      PhiRegisters.push_back(*LI);
160249259Sdim
161249259Sdim      // Check if there are other live in registers which map to the same
162249259Sdim      // indirect address.
163249259Sdim      for (MachineBasicBlock::livein_iterator LJ = llvm::next(LI),
164249259Sdim                                              LE = MBB.livein_end();
165249259Sdim                                              LJ != LE; ++LJ) {
166249259Sdim        unsigned Reg = *LJ;
167249259Sdim        if (RegisterAddressMap.find(Reg) == RegisterAddressMap.end()) {
168249259Sdim          continue;
169249259Sdim        }
170249259Sdim
171249259Sdim        if (RegisterAddressMap[Reg] == Address) {
172249259Sdim          PhiRegisters.push_back(Reg);
173249259Sdim        }
174249259Sdim      }
175249259Sdim
176249259Sdim      if (PhiRegisters.size() == 1) {
177249259Sdim        // We don't need to insert a Phi instruction, so we can just add the
178249259Sdim        // registers to the live list for the block.
179249259Sdim        LiveAddressRegisterMap[Address] = *LI;
180249259Sdim        MBB.removeLiveIn(*LI);
181249259Sdim      } else {
182249259Sdim        // We need to insert a PHI, because we have the same address being
183249259Sdim        // written in multiple predecessor blocks.
184249259Sdim        const TargetRegisterClass *PhiDstClass =
185249259Sdim                   TII->getIndirectAddrStoreRegClass(*(PhiRegisters.begin()));
186249259Sdim        unsigned PhiDstReg = MRI.createVirtualRegister(PhiDstClass);
187249259Sdim        MachineInstrBuilder Phi = BuildMI(MBB, MBB.begin(),
188249259Sdim                                          MBB.findDebugLoc(MBB.begin()),
189249259Sdim                                          TII->get(AMDGPU::PHI), PhiDstReg);
190249259Sdim
191249259Sdim        for (std::vector<unsigned>::const_iterator RI = PhiRegisters.begin(),
192249259Sdim                                                   RE = PhiRegisters.end();
193249259Sdim                                                   RI != RE; ++RI) {
194249259Sdim          unsigned Reg = *RI;
195249259Sdim          MachineInstr *DefInst = MRI.getVRegDef(Reg);
196249259Sdim          assert(DefInst);
197249259Sdim          MachineBasicBlock *RegBlock = DefInst->getParent();
198249259Sdim          Phi.addReg(Reg);
199249259Sdim          Phi.addMBB(RegBlock);
200249259Sdim          MBB.removeLiveIn(Reg);
201249259Sdim        }
202249259Sdim        RegisterAddressMap[PhiDstReg] = Address;
203249259Sdim        LiveAddressRegisterMap[Address] = PhiDstReg;
204249259Sdim      }
205249259Sdim      LI = MBB.livein_begin();
206249259Sdim    }
207249259Sdim
208249259Sdim    for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I);
209249259Sdim                               I != MBB.end(); I = Next) {
210249259Sdim      Next = llvm::next(I);
211249259Sdim      MachineInstr &MI = *I;
212249259Sdim
213249259Sdim      if (!TII->isRegisterLoad(MI)) {
214249259Sdim        if (MI.getOpcode() == AMDGPU::PHI) {
215249259Sdim          continue;
216249259Sdim        }
217249259Sdim        // Check for indirect register defs
218249259Sdim        for (unsigned OpIdx = 0, NumOperands = MI.getNumOperands();
219249259Sdim                                 OpIdx < NumOperands; ++OpIdx) {
220249259Sdim          MachineOperand &MO = MI.getOperand(OpIdx);
221249259Sdim          if (MO.isReg() && MO.isDef() &&
222249259Sdim              RegisterAddressMap.find(MO.getReg()) != RegisterAddressMap.end()) {
223249259Sdim            unsigned Reg = MO.getReg();
224249259Sdim            unsigned LiveAddress = RegisterAddressMap[Reg];
225249259Sdim            // Chain the live-ins
226249259Sdim            if (LiveAddressRegisterMap.find(LiveAddress) !=
227249259Sdim                                                     RegisterAddressMap.end()) {
228249259Sdim              MI.addOperand(MachineOperand::CreateReg(
229249259Sdim                                  LiveAddressRegisterMap[LiveAddress],
230249259Sdim                                  false, // isDef
231249259Sdim                                  true,  // isImp
232249259Sdim                                  true));  // isKill
233249259Sdim            }
234249259Sdim            LiveAddressRegisterMap[LiveAddress] = Reg;
235249259Sdim          }
236249259Sdim        }
237249259Sdim        continue;
238249259Sdim      }
239249259Sdim
240249259Sdim      const TargetRegisterClass *SuperIndirectRegClass =
241249259Sdim                                                TII->getSuperIndirectRegClass();
242249259Sdim      const TargetRegisterClass *IndirectLoadRegClass =
243249259Sdim                                             TII->getIndirectAddrLoadRegClass();
244249259Sdim      unsigned IndirectReg = MRI.createVirtualRegister(SuperIndirectRegClass);
245249259Sdim
246249259Sdim      unsigned RegIndex = MI.getOperand(2).getImm();
247249259Sdim      unsigned Channel = MI.getOperand(3).getImm();
248249259Sdim      unsigned Address = TII->calculateIndirectAddress(RegIndex, Channel);
249249259Sdim
250249259Sdim      if (MI.getOperand(1).getReg() == AMDGPU::INDIRECT_BASE_ADDR) {
251249259Sdim        // Direct register access
252249259Sdim        unsigned Reg = LiveAddressRegisterMap[Address];
253249259Sdim        unsigned AddrReg = IndirectLoadRegClass->getRegister(Address);
254249259Sdim
255249259Sdim        if (regHasExplicitDef(MRI, Reg)) {
256249259Sdim          // If the register we are reading from has an explicit def, then that
257249259Sdim          // means it was written via a direct register access (i.e. COPY
258249259Sdim          // or other instruction that doesn't use indirect addressing).  In
259249259Sdim          // this case we know where the value has been stored, so we can just
260249259Sdim          // issue a copy.
261249259Sdim          BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::COPY),
262249259Sdim                  MI.getOperand(0).getReg())
263249259Sdim                  .addReg(Reg);
264249259Sdim        } else {
265249259Sdim          // If the register we are reading has an implicit def, then that
266249259Sdim          // means it was written by an indirect register access (i.e. An
267249259Sdim          // instruction that uses indirect addressing.
268249259Sdim          BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::COPY),
269249259Sdim                   MI.getOperand(0).getReg())
270249259Sdim                   .addReg(AddrReg)
271249259Sdim                   .addReg(Reg, RegState::Implicit);
272249259Sdim        }
273249259Sdim      } else {
274249259Sdim        // Indirect register access
275249259Sdim
276249259Sdim        // Note on REQ_SEQUENCE instructons: You can't actually use the register
277249259Sdim        // it defines unless  you have an instruction that takes the defined
278249259Sdim        // register class as an operand.
279249259Sdim
280249259Sdim        MachineInstrBuilder Sequence = BuildMI(MBB, I, MBB.findDebugLoc(I),
281249259Sdim                                               TII->get(AMDGPU::REG_SEQUENCE),
282249259Sdim                                               IndirectReg);
283249259Sdim        for (int i = IndirectBegin; i <= IndirectEnd; ++i) {
284249259Sdim          unsigned Addr = TII->calculateIndirectAddress(i, Channel);
285249259Sdim          if (LiveAddressRegisterMap.find(Addr) == LiveAddressRegisterMap.end()) {
286249259Sdim            continue;
287249259Sdim          }
288249259Sdim          unsigned Reg = LiveAddressRegisterMap[Addr];
289249259Sdim
290249259Sdim          // We only need to use REG_SEQUENCE for explicit defs, since the
291249259Sdim          // register coalescer won't do anything with the implicit defs.
292249259Sdim          if (!regHasExplicitDef(MRI, Reg)) {
293249259Sdim            continue;
294249259Sdim          }
295249259Sdim
296249259Sdim          // Insert a REQ_SEQUENCE instruction to force the register allocator
297249259Sdim          // to allocate the virtual register to the correct physical register.
298249259Sdim          Sequence.addReg(LiveAddressRegisterMap[Addr]);
299249259Sdim          Sequence.addImm(TII->getRegisterInfo().getIndirectSubReg(Addr));
300249259Sdim        }
301249259Sdim        MachineInstrBuilder Mov = TII->buildIndirectRead(BB, I,
302249259Sdim                                           MI.getOperand(0).getReg(), // Value
303249259Sdim                                           Address,
304249259Sdim                                           MI.getOperand(1).getReg()); // Offset
305249259Sdim
306249259Sdim
307249259Sdim
308249259Sdim        Mov.addReg(IndirectReg, RegState::Implicit | RegState::Kill);
309249259Sdim        Mov.addReg(LiveAddressRegisterMap[Address], RegState::Implicit);
310249259Sdim
311249259Sdim      }
312249259Sdim      MI.eraseFromParent();
313249259Sdim    }
314249259Sdim  }
315249259Sdim  return false;
316249259Sdim}
317249259Sdim
318249259Sdimbool AMDGPUIndirectAddressingPass::regHasExplicitDef(MachineRegisterInfo &MRI,
319249259Sdim                                                  unsigned Reg) const {
320249259Sdim  MachineInstr *DefInstr = MRI.getVRegDef(Reg);
321249259Sdim
322249259Sdim  if (!DefInstr) {
323249259Sdim    return false;
324249259Sdim  }
325249259Sdim
326249259Sdim  if (DefInstr->getOpcode() == AMDGPU::PHI) {
327249259Sdim    bool Explicit = false;
328249259Sdim    for (MachineInstr::const_mop_iterator I = DefInstr->operands_begin(),
329249259Sdim                                          E = DefInstr->operands_end();
330249259Sdim                                          I != E; ++I) {
331249259Sdim      const MachineOperand &MO = *I;
332249259Sdim      if (!MO.isReg() || MO.isDef()) {
333249259Sdim        continue;
334249259Sdim      }
335249259Sdim
336249259Sdim      Explicit = Explicit || regHasExplicitDef(MRI, MO.getReg());
337249259Sdim    }
338249259Sdim    return Explicit;
339249259Sdim  }
340249259Sdim
341249259Sdim  return DefInstr->getOperand(0).isReg() &&
342249259Sdim         DefInstr->getOperand(0).getReg() == Reg;
343249259Sdim}
344