R600ControlFlowFinalizer.cpp revision 360784
1//===- R600ControlFlowFinalizer.cpp - Finalize Control Flow Inst ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This pass compute turns all control flow pseudo instructions into native one
11/// computing their address on the fly; it also sets STACK_SIZE info.
12//
13//===----------------------------------------------------------------------===//
14
15#include "AMDGPU.h"
16#include "AMDGPUSubtarget.h"
17#include "R600Defines.h"
18#include "R600InstrInfo.h"
19#include "R600MachineFunctionInfo.h"
20#include "R600RegisterInfo.h"
21#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
22#include "llvm/ADT/STLExtras.h"
23#include "llvm/ADT/SmallVector.h"
24#include "llvm/ADT/StringRef.h"
25#include "llvm/CodeGen/MachineBasicBlock.h"
26#include "llvm/CodeGen/MachineFunction.h"
27#include "llvm/CodeGen/MachineFunctionPass.h"
28#include "llvm/CodeGen/MachineInstr.h"
29#include "llvm/CodeGen/MachineInstrBuilder.h"
30#include "llvm/CodeGen/MachineOperand.h"
31#include "llvm/IR/CallingConv.h"
32#include "llvm/IR/DebugLoc.h"
33#include "llvm/IR/Function.h"
34#include "llvm/Pass.h"
35#include "llvm/Support/Compiler.h"
36#include "llvm/Support/Debug.h"
37#include "llvm/Support/MathExtras.h"
38#include "llvm/Support/raw_ostream.h"
39#include <algorithm>
40#include <cassert>
41#include <cstdint>
42#include <set>
43#include <utility>
44#include <vector>
45
46using namespace llvm;
47
48#define DEBUG_TYPE "r600cf"
49
50namespace {
51
52struct CFStack {
53  enum StackItem {
54    ENTRY = 0,
55    SUB_ENTRY = 1,
56    FIRST_NON_WQM_PUSH = 2,
57    FIRST_NON_WQM_PUSH_W_FULL_ENTRY = 3
58  };
59
60  const R600Subtarget *ST;
61  std::vector<StackItem> BranchStack;
62  std::vector<StackItem> LoopStack;
63  unsigned MaxStackSize;
64  unsigned CurrentEntries = 0;
65  unsigned CurrentSubEntries = 0;
66
67  CFStack(const R600Subtarget *st, CallingConv::ID cc) : ST(st),
68      // We need to reserve a stack entry for CALL_FS in vertex shaders.
69      MaxStackSize(cc == CallingConv::AMDGPU_VS ? 1 : 0) {}
70
71  unsigned getLoopDepth();
72  bool branchStackContains(CFStack::StackItem);
73  bool requiresWorkAroundForInst(unsigned Opcode);
74  unsigned getSubEntrySize(CFStack::StackItem Item);
75  void updateMaxStackSize();
76  void pushBranch(unsigned Opcode, bool isWQM = false);
77  void pushLoop();
78  void popBranch();
79  void popLoop();
80};
81
82unsigned CFStack::getLoopDepth() {
83  return LoopStack.size();
84}
85
86bool CFStack::branchStackContains(CFStack::StackItem Item) {
87  for (std::vector<CFStack::StackItem>::const_iterator I = BranchStack.begin(),
88       E = BranchStack.end(); I != E; ++I) {
89    if (*I == Item)
90      return true;
91  }
92  return false;
93}
94
95bool CFStack::requiresWorkAroundForInst(unsigned Opcode) {
96  if (Opcode == R600::CF_ALU_PUSH_BEFORE && ST->hasCaymanISA() &&
97      getLoopDepth() > 1)
98    return true;
99
100  if (!ST->hasCFAluBug())
101    return false;
102
103  switch(Opcode) {
104  default: return false;
105  case R600::CF_ALU_PUSH_BEFORE:
106  case R600::CF_ALU_ELSE_AFTER:
107  case R600::CF_ALU_BREAK:
108  case R600::CF_ALU_CONTINUE:
109    if (CurrentSubEntries == 0)
110      return false;
111    if (ST->getWavefrontSize() == 64) {
112      // We are being conservative here.  We only require this work-around if
113      // CurrentSubEntries > 3 &&
114      // (CurrentSubEntries % 4 == 3 || CurrentSubEntries % 4 == 0)
115      //
116      // We have to be conservative, because we don't know for certain that
117      // our stack allocation algorithm for Evergreen/NI is correct.  Applying this
118      // work-around when CurrentSubEntries > 3 allows us to over-allocate stack
119      // resources without any problems.
120      return CurrentSubEntries > 3;
121    } else {
122      assert(ST->getWavefrontSize() == 32);
123      // We are being conservative here.  We only require the work-around if
124      // CurrentSubEntries > 7 &&
125      // (CurrentSubEntries % 8 == 7 || CurrentSubEntries % 8 == 0)
126      // See the comment on the wavefront size == 64 case for why we are
127      // being conservative.
128      return CurrentSubEntries > 7;
129    }
130  }
131}
132
133unsigned CFStack::getSubEntrySize(CFStack::StackItem Item) {
134  switch(Item) {
135  default:
136    return 0;
137  case CFStack::FIRST_NON_WQM_PUSH:
138  assert(!ST->hasCaymanISA());
139  if (ST->getGeneration() <= AMDGPUSubtarget::R700) {
140    // +1 For the push operation.
141    // +2 Extra space required.
142    return 3;
143  } else {
144    // Some documentation says that this is not necessary on Evergreen,
145    // but experimentation has show that we need to allocate 1 extra
146    // sub-entry for the first non-WQM push.
147    // +1 For the push operation.
148    // +1 Extra space required.
149    return 2;
150  }
151  case CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY:
152    assert(ST->getGeneration() >= AMDGPUSubtarget::EVERGREEN);
153    // +1 For the push operation.
154    // +1 Extra space required.
155    return 2;
156  case CFStack::SUB_ENTRY:
157    return 1;
158  }
159}
160
161void CFStack::updateMaxStackSize() {
162  unsigned CurrentStackSize =
163      CurrentEntries + (alignTo(CurrentSubEntries, 4) / 4);
164  MaxStackSize = std::max(CurrentStackSize, MaxStackSize);
165}
166
167void CFStack::pushBranch(unsigned Opcode, bool isWQM) {
168  CFStack::StackItem Item = CFStack::ENTRY;
169  switch(Opcode) {
170  case R600::CF_PUSH_EG:
171  case R600::CF_ALU_PUSH_BEFORE:
172    if (!isWQM) {
173      if (!ST->hasCaymanISA() &&
174          !branchStackContains(CFStack::FIRST_NON_WQM_PUSH))
175        Item = CFStack::FIRST_NON_WQM_PUSH;  // May not be required on Evergreen/NI
176                                             // See comment in
177                                             // CFStack::getSubEntrySize()
178      else if (CurrentEntries > 0 &&
179               ST->getGeneration() > AMDGPUSubtarget::EVERGREEN &&
180               !ST->hasCaymanISA() &&
181               !branchStackContains(CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY))
182        Item = CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY;
183      else
184        Item = CFStack::SUB_ENTRY;
185    } else
186      Item = CFStack::ENTRY;
187    break;
188  }
189  BranchStack.push_back(Item);
190  if (Item == CFStack::ENTRY)
191    CurrentEntries++;
192  else
193    CurrentSubEntries += getSubEntrySize(Item);
194  updateMaxStackSize();
195}
196
197void CFStack::pushLoop() {
198  LoopStack.push_back(CFStack::ENTRY);
199  CurrentEntries++;
200  updateMaxStackSize();
201}
202
203void CFStack::popBranch() {
204  CFStack::StackItem Top = BranchStack.back();
205  if (Top == CFStack::ENTRY)
206    CurrentEntries--;
207  else
208    CurrentSubEntries-= getSubEntrySize(Top);
209  BranchStack.pop_back();
210}
211
212void CFStack::popLoop() {
213  CurrentEntries--;
214  LoopStack.pop_back();
215}
216
217class R600ControlFlowFinalizer : public MachineFunctionPass {
218private:
219  using ClauseFile = std::pair<MachineInstr *, std::vector<MachineInstr *>>;
220
221  enum ControlFlowInstruction {
222    CF_TC,
223    CF_VC,
224    CF_CALL_FS,
225    CF_WHILE_LOOP,
226    CF_END_LOOP,
227    CF_LOOP_BREAK,
228    CF_LOOP_CONTINUE,
229    CF_JUMP,
230    CF_ELSE,
231    CF_POP,
232    CF_END
233  };
234
235  const R600InstrInfo *TII = nullptr;
236  const R600RegisterInfo *TRI = nullptr;
237  unsigned MaxFetchInst;
238  const R600Subtarget *ST = nullptr;
239
240  bool IsTrivialInst(MachineInstr &MI) const {
241    switch (MI.getOpcode()) {
242    case R600::KILL:
243    case R600::RETURN:
244      return true;
245    default:
246      return false;
247    }
248  }
249
250  const MCInstrDesc &getHWInstrDesc(ControlFlowInstruction CFI) const {
251    unsigned Opcode = 0;
252    bool isEg = (ST->getGeneration() >= AMDGPUSubtarget::EVERGREEN);
253    switch (CFI) {
254    case CF_TC:
255      Opcode = isEg ? R600::CF_TC_EG : R600::CF_TC_R600;
256      break;
257    case CF_VC:
258      Opcode = isEg ? R600::CF_VC_EG : R600::CF_VC_R600;
259      break;
260    case CF_CALL_FS:
261      Opcode = isEg ? R600::CF_CALL_FS_EG : R600::CF_CALL_FS_R600;
262      break;
263    case CF_WHILE_LOOP:
264      Opcode = isEg ? R600::WHILE_LOOP_EG : R600::WHILE_LOOP_R600;
265      break;
266    case CF_END_LOOP:
267      Opcode = isEg ? R600::END_LOOP_EG : R600::END_LOOP_R600;
268      break;
269    case CF_LOOP_BREAK:
270      Opcode = isEg ? R600::LOOP_BREAK_EG : R600::LOOP_BREAK_R600;
271      break;
272    case CF_LOOP_CONTINUE:
273      Opcode = isEg ? R600::CF_CONTINUE_EG : R600::CF_CONTINUE_R600;
274      break;
275    case CF_JUMP:
276      Opcode = isEg ? R600::CF_JUMP_EG : R600::CF_JUMP_R600;
277      break;
278    case CF_ELSE:
279      Opcode = isEg ? R600::CF_ELSE_EG : R600::CF_ELSE_R600;
280      break;
281    case CF_POP:
282      Opcode = isEg ? R600::POP_EG : R600::POP_R600;
283      break;
284    case CF_END:
285      if (ST->hasCaymanISA()) {
286        Opcode = R600::CF_END_CM;
287        break;
288      }
289      Opcode = isEg ? R600::CF_END_EG : R600::CF_END_R600;
290      break;
291    }
292    assert (Opcode && "No opcode selected");
293    return TII->get(Opcode);
294  }
295
296  bool isCompatibleWithClause(const MachineInstr &MI,
297                              std::set<unsigned> &DstRegs) const {
298    unsigned DstMI, SrcMI;
299    for (MachineInstr::const_mop_iterator I = MI.operands_begin(),
300                                          E = MI.operands_end();
301         I != E; ++I) {
302      const MachineOperand &MO = *I;
303      if (!MO.isReg())
304        continue;
305      if (MO.isDef()) {
306        Register Reg = MO.getReg();
307        if (R600::R600_Reg128RegClass.contains(Reg))
308          DstMI = Reg;
309        else
310          DstMI = TRI->getMatchingSuperReg(Reg,
311              AMDGPURegisterInfo::getSubRegFromChannel(TRI->getHWRegChan(Reg)),
312              &R600::R600_Reg128RegClass);
313      }
314      if (MO.isUse()) {
315        Register Reg = MO.getReg();
316        if (R600::R600_Reg128RegClass.contains(Reg))
317          SrcMI = Reg;
318        else
319          SrcMI = TRI->getMatchingSuperReg(Reg,
320              AMDGPURegisterInfo::getSubRegFromChannel(TRI->getHWRegChan(Reg)),
321              &R600::R600_Reg128RegClass);
322      }
323    }
324    if ((DstRegs.find(SrcMI) == DstRegs.end())) {
325      DstRegs.insert(DstMI);
326      return true;
327    } else
328      return false;
329  }
330
331  ClauseFile
332  MakeFetchClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I)
333      const {
334    MachineBasicBlock::iterator ClauseHead = I;
335    std::vector<MachineInstr *> ClauseContent;
336    unsigned AluInstCount = 0;
337    bool IsTex = TII->usesTextureCache(*ClauseHead);
338    std::set<unsigned> DstRegs;
339    for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) {
340      if (IsTrivialInst(*I))
341        continue;
342      if (AluInstCount >= MaxFetchInst)
343        break;
344      if ((IsTex && !TII->usesTextureCache(*I)) ||
345          (!IsTex && !TII->usesVertexCache(*I)))
346        break;
347      if (!isCompatibleWithClause(*I, DstRegs))
348        break;
349      AluInstCount ++;
350      ClauseContent.push_back(&*I);
351    }
352    MachineInstr *MIb = BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead),
353        getHWInstrDesc(IsTex?CF_TC:CF_VC))
354        .addImm(0) // ADDR
355        .addImm(AluInstCount - 1); // COUNT
356    return ClauseFile(MIb, std::move(ClauseContent));
357  }
358
359  void getLiteral(MachineInstr &MI, std::vector<MachineOperand *> &Lits) const {
360    static const unsigned LiteralRegs[] = {
361      R600::ALU_LITERAL_X,
362      R600::ALU_LITERAL_Y,
363      R600::ALU_LITERAL_Z,
364      R600::ALU_LITERAL_W
365    };
366    const SmallVector<std::pair<MachineOperand *, int64_t>, 3> Srcs =
367        TII->getSrcs(MI);
368    for (const auto &Src:Srcs) {
369      if (Src.first->getReg() != R600::ALU_LITERAL_X)
370        continue;
371      int64_t Imm = Src.second;
372      std::vector<MachineOperand *>::iterator It =
373          llvm::find_if(Lits, [&](MachineOperand *val) {
374            return val->isImm() && (val->getImm() == Imm);
375          });
376
377      // Get corresponding Operand
378      MachineOperand &Operand = MI.getOperand(
379          TII->getOperandIdx(MI.getOpcode(), R600::OpName::literal));
380
381      if (It != Lits.end()) {
382        // Reuse existing literal reg
383        unsigned Index = It - Lits.begin();
384        Src.first->setReg(LiteralRegs[Index]);
385      } else {
386        // Allocate new literal reg
387        assert(Lits.size() < 4 && "Too many literals in Instruction Group");
388        Src.first->setReg(LiteralRegs[Lits.size()]);
389        Lits.push_back(&Operand);
390      }
391    }
392  }
393
394  MachineBasicBlock::iterator insertLiterals(
395      MachineBasicBlock::iterator InsertPos,
396      const std::vector<unsigned> &Literals) const {
397    MachineBasicBlock *MBB = InsertPos->getParent();
398    for (unsigned i = 0, e = Literals.size(); i < e; i+=2) {
399      unsigned LiteralPair0 = Literals[i];
400      unsigned LiteralPair1 = (i + 1 < e)?Literals[i + 1]:0;
401      InsertPos = BuildMI(MBB, InsertPos->getDebugLoc(),
402          TII->get(R600::LITERALS))
403          .addImm(LiteralPair0)
404          .addImm(LiteralPair1);
405    }
406    return InsertPos;
407  }
408
409  ClauseFile
410  MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I)
411      const {
412    MachineInstr &ClauseHead = *I;
413    std::vector<MachineInstr *> ClauseContent;
414    I++;
415    for (MachineBasicBlock::instr_iterator E = MBB.instr_end(); I != E;) {
416      if (IsTrivialInst(*I)) {
417        ++I;
418        continue;
419      }
420      if (!I->isBundle() && !TII->isALUInstr(I->getOpcode()))
421        break;
422      std::vector<MachineOperand *>Literals;
423      if (I->isBundle()) {
424        MachineInstr &DeleteMI = *I;
425        MachineBasicBlock::instr_iterator BI = I.getInstrIterator();
426        while (++BI != E && BI->isBundledWithPred()) {
427          BI->unbundleFromPred();
428          for (MachineOperand &MO : BI->operands()) {
429            if (MO.isReg() && MO.isInternalRead())
430              MO.setIsInternalRead(false);
431          }
432          getLiteral(*BI, Literals);
433          ClauseContent.push_back(&*BI);
434        }
435        I = BI;
436        DeleteMI.eraseFromParent();
437      } else {
438        getLiteral(*I, Literals);
439        ClauseContent.push_back(&*I);
440        I++;
441      }
442      for (unsigned i = 0, e = Literals.size(); i < e; i += 2) {
443        MachineInstrBuilder MILit = BuildMI(MBB, I, I->getDebugLoc(),
444            TII->get(R600::LITERALS));
445        if (Literals[i]->isImm()) {
446            MILit.addImm(Literals[i]->getImm());
447        } else {
448            MILit.addGlobalAddress(Literals[i]->getGlobal(),
449                                   Literals[i]->getOffset());
450        }
451        if (i + 1 < e) {
452          if (Literals[i + 1]->isImm()) {
453            MILit.addImm(Literals[i + 1]->getImm());
454          } else {
455            MILit.addGlobalAddress(Literals[i + 1]->getGlobal(),
456                                   Literals[i + 1]->getOffset());
457          }
458        } else
459          MILit.addImm(0);
460        ClauseContent.push_back(MILit);
461      }
462    }
463    assert(ClauseContent.size() < 128 && "ALU clause is too big");
464    ClauseHead.getOperand(7).setImm(ClauseContent.size() - 1);
465    return ClauseFile(&ClauseHead, std::move(ClauseContent));
466  }
467
468  void EmitFetchClause(MachineBasicBlock::iterator InsertPos,
469                       const DebugLoc &DL, ClauseFile &Clause,
470                       unsigned &CfCount) {
471    CounterPropagateAddr(*Clause.first, CfCount);
472    MachineBasicBlock *BB = Clause.first->getParent();
473    BuildMI(BB, DL, TII->get(R600::FETCH_CLAUSE)).addImm(CfCount);
474    for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) {
475      BB->splice(InsertPos, BB, Clause.second[i]);
476    }
477    CfCount += 2 * Clause.second.size();
478  }
479
480  void EmitALUClause(MachineBasicBlock::iterator InsertPos, const DebugLoc &DL,
481                     ClauseFile &Clause, unsigned &CfCount) {
482    Clause.first->getOperand(0).setImm(0);
483    CounterPropagateAddr(*Clause.first, CfCount);
484    MachineBasicBlock *BB = Clause.first->getParent();
485    BuildMI(BB, DL, TII->get(R600::ALU_CLAUSE)).addImm(CfCount);
486    for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) {
487      BB->splice(InsertPos, BB, Clause.second[i]);
488    }
489    CfCount += Clause.second.size();
490  }
491
492  void CounterPropagateAddr(MachineInstr &MI, unsigned Addr) const {
493    MI.getOperand(0).setImm(Addr + MI.getOperand(0).getImm());
494  }
495  void CounterPropagateAddr(const std::set<MachineInstr *> &MIs,
496                            unsigned Addr) const {
497    for (MachineInstr *MI : MIs) {
498      CounterPropagateAddr(*MI, Addr);
499    }
500  }
501
502public:
503  static char ID;
504
505  R600ControlFlowFinalizer() : MachineFunctionPass(ID) {}
506
507  bool runOnMachineFunction(MachineFunction &MF) override {
508    ST = &MF.getSubtarget<R600Subtarget>();
509    MaxFetchInst = ST->getTexVTXClauseSize();
510    TII = ST->getInstrInfo();
511    TRI = ST->getRegisterInfo();
512
513    R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
514
515    CFStack CFStack(ST, MF.getFunction().getCallingConv());
516    for (MachineFunction::iterator MB = MF.begin(), ME = MF.end(); MB != ME;
517        ++MB) {
518      MachineBasicBlock &MBB = *MB;
519      unsigned CfCount = 0;
520      std::vector<std::pair<unsigned, std::set<MachineInstr *>>> LoopStack;
521      std::vector<MachineInstr * > IfThenElseStack;
522      if (MF.getFunction().getCallingConv() == CallingConv::AMDGPU_VS) {
523        BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()),
524            getHWInstrDesc(CF_CALL_FS));
525        CfCount++;
526      }
527      std::vector<ClauseFile> FetchClauses, AluClauses;
528      std::vector<MachineInstr *> LastAlu(1);
529      std::vector<MachineInstr *> ToPopAfter;
530
531      for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
532          I != E;) {
533        if (TII->usesTextureCache(*I) || TII->usesVertexCache(*I)) {
534          LLVM_DEBUG(dbgs() << CfCount << ":"; I->dump(););
535          FetchClauses.push_back(MakeFetchClause(MBB, I));
536          CfCount++;
537          LastAlu.back() = nullptr;
538          continue;
539        }
540
541        MachineBasicBlock::iterator MI = I;
542        if (MI->getOpcode() != R600::ENDIF)
543          LastAlu.back() = nullptr;
544        if (MI->getOpcode() == R600::CF_ALU)
545          LastAlu.back() = &*MI;
546        I++;
547        bool RequiresWorkAround =
548            CFStack.requiresWorkAroundForInst(MI->getOpcode());
549        switch (MI->getOpcode()) {
550        case R600::CF_ALU_PUSH_BEFORE:
551          if (RequiresWorkAround) {
552            LLVM_DEBUG(dbgs()
553                       << "Applying bug work-around for ALU_PUSH_BEFORE\n");
554            BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(R600::CF_PUSH_EG))
555                .addImm(CfCount + 1)
556                .addImm(1);
557            MI->setDesc(TII->get(R600::CF_ALU));
558            CfCount++;
559            CFStack.pushBranch(R600::CF_PUSH_EG);
560          } else
561            CFStack.pushBranch(R600::CF_ALU_PUSH_BEFORE);
562          LLVM_FALLTHROUGH;
563        case R600::CF_ALU:
564          I = MI;
565          AluClauses.push_back(MakeALUClause(MBB, I));
566          LLVM_DEBUG(dbgs() << CfCount << ":"; MI->dump(););
567          CfCount++;
568          break;
569        case R600::WHILELOOP: {
570          CFStack.pushLoop();
571          MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
572              getHWInstrDesc(CF_WHILE_LOOP))
573              .addImm(1);
574          std::pair<unsigned, std::set<MachineInstr *>> Pair(CfCount,
575              std::set<MachineInstr *>());
576          Pair.second.insert(MIb);
577          LoopStack.push_back(std::move(Pair));
578          MI->eraseFromParent();
579          CfCount++;
580          break;
581        }
582        case R600::ENDLOOP: {
583          CFStack.popLoop();
584          std::pair<unsigned, std::set<MachineInstr *>> Pair =
585              std::move(LoopStack.back());
586          LoopStack.pop_back();
587          CounterPropagateAddr(Pair.second, CfCount);
588          BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END_LOOP))
589              .addImm(Pair.first + 1);
590          MI->eraseFromParent();
591          CfCount++;
592          break;
593        }
594        case R600::IF_PREDICATE_SET: {
595          LastAlu.push_back(nullptr);
596          MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
597              getHWInstrDesc(CF_JUMP))
598              .addImm(0)
599              .addImm(0);
600          IfThenElseStack.push_back(MIb);
601          LLVM_DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
602          MI->eraseFromParent();
603          CfCount++;
604          break;
605        }
606        case R600::ELSE: {
607          MachineInstr * JumpInst = IfThenElseStack.back();
608          IfThenElseStack.pop_back();
609          CounterPropagateAddr(*JumpInst, CfCount);
610          MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
611              getHWInstrDesc(CF_ELSE))
612              .addImm(0)
613              .addImm(0);
614          LLVM_DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
615          IfThenElseStack.push_back(MIb);
616          MI->eraseFromParent();
617          CfCount++;
618          break;
619        }
620        case R600::ENDIF: {
621          CFStack.popBranch();
622          if (LastAlu.back()) {
623            ToPopAfter.push_back(LastAlu.back());
624          } else {
625            MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
626                getHWInstrDesc(CF_POP))
627                .addImm(CfCount + 1)
628                .addImm(1);
629            (void)MIb;
630            LLVM_DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
631            CfCount++;
632          }
633
634          MachineInstr *IfOrElseInst = IfThenElseStack.back();
635          IfThenElseStack.pop_back();
636          CounterPropagateAddr(*IfOrElseInst, CfCount);
637          IfOrElseInst->getOperand(1).setImm(1);
638          LastAlu.pop_back();
639          MI->eraseFromParent();
640          break;
641        }
642        case R600::BREAK: {
643          CfCount ++;
644          MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
645              getHWInstrDesc(CF_LOOP_BREAK))
646              .addImm(0);
647          LoopStack.back().second.insert(MIb);
648          MI->eraseFromParent();
649          break;
650        }
651        case R600::CONTINUE: {
652          MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
653              getHWInstrDesc(CF_LOOP_CONTINUE))
654              .addImm(0);
655          LoopStack.back().second.insert(MIb);
656          MI->eraseFromParent();
657          CfCount++;
658          break;
659        }
660        case R600::RETURN: {
661          DebugLoc DL = MBB.findDebugLoc(MI);
662          BuildMI(MBB, MI, DL, getHWInstrDesc(CF_END));
663          CfCount++;
664          if (CfCount % 2) {
665            BuildMI(MBB, I, DL, TII->get(R600::PAD));
666            CfCount++;
667          }
668          MI->eraseFromParent();
669          for (unsigned i = 0, e = FetchClauses.size(); i < e; i++)
670            EmitFetchClause(I, DL, FetchClauses[i], CfCount);
671          for (unsigned i = 0, e = AluClauses.size(); i < e; i++)
672            EmitALUClause(I, DL, AluClauses[i], CfCount);
673          break;
674        }
675        default:
676          if (TII->isExport(MI->getOpcode())) {
677            LLVM_DEBUG(dbgs() << CfCount << ":"; MI->dump(););
678            CfCount++;
679          }
680          break;
681        }
682      }
683      for (unsigned i = 0, e = ToPopAfter.size(); i < e; ++i) {
684        MachineInstr *Alu = ToPopAfter[i];
685        BuildMI(MBB, Alu, MBB.findDebugLoc((MachineBasicBlock::iterator)Alu),
686            TII->get(R600::CF_ALU_POP_AFTER))
687            .addImm(Alu->getOperand(0).getImm())
688            .addImm(Alu->getOperand(1).getImm())
689            .addImm(Alu->getOperand(2).getImm())
690            .addImm(Alu->getOperand(3).getImm())
691            .addImm(Alu->getOperand(4).getImm())
692            .addImm(Alu->getOperand(5).getImm())
693            .addImm(Alu->getOperand(6).getImm())
694            .addImm(Alu->getOperand(7).getImm())
695            .addImm(Alu->getOperand(8).getImm());
696        Alu->eraseFromParent();
697      }
698      MFI->CFStackSize = CFStack.MaxStackSize;
699    }
700
701    return false;
702  }
703
704  StringRef getPassName() const override {
705    return "R600 Control Flow Finalizer Pass";
706  }
707};
708
709} // end anonymous namespace
710
711INITIALIZE_PASS_BEGIN(R600ControlFlowFinalizer, DEBUG_TYPE,
712                     "R600 Control Flow Finalizer", false, false)
713INITIALIZE_PASS_END(R600ControlFlowFinalizer, DEBUG_TYPE,
714                    "R600 Control Flow Finalizer", false, false)
715
716char R600ControlFlowFinalizer::ID = 0;
717
718char &llvm::R600ControlFlowFinalizerID = R600ControlFlowFinalizer::ID;
719
720FunctionPass *llvm::createR600ControlFlowFinalizer() {
721  return new R600ControlFlowFinalizer();
722}
723