AMDILCFGStructurizer.cpp revision 360784
1//===- AMDILCFGStructurizer.cpp - CFG Structurizer ------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//==-----------------------------------------------------------------------===//
8
9#include "AMDGPU.h"
10#include "AMDGPUSubtarget.h"
11#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
12#include "R600InstrInfo.h"
13#include "R600RegisterInfo.h"
14#include "llvm/ADT/DepthFirstIterator.h"
15#include "llvm/ADT/SCCIterator.h"
16#include "llvm/ADT/SmallPtrSet.h"
17#include "llvm/ADT/SmallVector.h"
18#include "llvm/ADT/Statistic.h"
19#include "llvm/ADT/StringRef.h"
20#include "llvm/CodeGen/MachineBasicBlock.h"
21#include "llvm/CodeGen/MachineDominators.h"
22#include "llvm/CodeGen/MachineFunction.h"
23#include "llvm/CodeGen/MachineFunctionPass.h"
24#include "llvm/CodeGen/MachineInstr.h"
25#include "llvm/CodeGen/MachineInstrBuilder.h"
26#include "llvm/CodeGen/MachineJumpTableInfo.h"
27#include "llvm/CodeGen/MachineLoopInfo.h"
28#include "llvm/CodeGen/MachineOperand.h"
29#include "llvm/CodeGen/MachinePostDominators.h"
30#include "llvm/CodeGen/MachineRegisterInfo.h"
31#include "llvm/IR/DebugLoc.h"
32#include "llvm/IR/LLVMContext.h"
33#include "llvm/InitializePasses.h"
34#include "llvm/Pass.h"
35#include "llvm/Support/Debug.h"
36#include "llvm/Support/ErrorHandling.h"
37#include "llvm/Support/MachineValueType.h"
38#include "llvm/Support/raw_ostream.h"
39#include <cassert>
40#include <cstddef>
41#include <deque>
42#include <iterator>
43#include <map>
44#include <utility>
45#include <vector>
46
47using namespace llvm;
48
49#define DEBUG_TYPE "structcfg"
50
51#define DEFAULT_VEC_SLOTS 8
52
53// TODO: move-begin.
54
55//===----------------------------------------------------------------------===//
56//
57// Statistics for CFGStructurizer.
58//
59//===----------------------------------------------------------------------===//
60
61STATISTIC(numSerialPatternMatch,    "CFGStructurizer number of serial pattern "
62    "matched");
63STATISTIC(numIfPatternMatch,        "CFGStructurizer number of if pattern "
64    "matched");
65STATISTIC(numClonedBlock,           "CFGStructurizer cloned blocks");
66STATISTIC(numClonedInstr,           "CFGStructurizer cloned instructions");
67
68namespace llvm {
69
70void initializeAMDGPUCFGStructurizerPass(PassRegistry &);
71
72} // end namespace llvm
73
74namespace {
75
76//===----------------------------------------------------------------------===//
77//
78// Miscellaneous utility for CFGStructurizer.
79//
80//===----------------------------------------------------------------------===//
81
82#define SHOWNEWINSTR(i) LLVM_DEBUG(dbgs() << "New instr: " << *i << "\n");
83
84#define SHOWNEWBLK(b, msg)                                                     \
85  LLVM_DEBUG(dbgs() << msg << "BB" << b->getNumber() << "size " << b->size();  \
86             dbgs() << "\n";);
87
88#define SHOWBLK_DETAIL(b, msg)                                                 \
89  LLVM_DEBUG(if (b) {                                                          \
90    dbgs() << msg << "BB" << b->getNumber() << "size " << b->size();           \
91    b->print(dbgs());                                                          \
92    dbgs() << "\n";                                                            \
93  });
94
95#define INVALIDSCCNUM -1
96
97//===----------------------------------------------------------------------===//
98//
99// supporting data structure for CFGStructurizer
100//
101//===----------------------------------------------------------------------===//
102
103class BlockInformation {
104public:
105  bool IsRetired = false;
106  int SccNum = INVALIDSCCNUM;
107
108  BlockInformation() = default;
109};
110
111//===----------------------------------------------------------------------===//
112//
113// CFGStructurizer
114//
115//===----------------------------------------------------------------------===//
116
117class AMDGPUCFGStructurizer : public MachineFunctionPass {
118public:
119  using MBBVector = SmallVector<MachineBasicBlock *, 32>;
120  using MBBInfoMap = std::map<MachineBasicBlock *, BlockInformation *>;
121  using LoopLandInfoMap = std::map<MachineLoop *, MachineBasicBlock *>;
122
123  enum PathToKind {
124    Not_SinglePath = 0,
125    SinglePath_InPath = 1,
126    SinglePath_NotInPath = 2
127  };
128
129  static char ID;
130
131  AMDGPUCFGStructurizer() : MachineFunctionPass(ID) {
132    initializeAMDGPUCFGStructurizerPass(*PassRegistry::getPassRegistry());
133  }
134
135  StringRef getPassName() const override {
136    return "AMDGPU Control Flow Graph structurizer Pass";
137  }
138
139  void getAnalysisUsage(AnalysisUsage &AU) const override {
140    AU.addRequired<MachineDominatorTree>();
141    AU.addRequired<MachinePostDominatorTree>();
142    AU.addRequired<MachineLoopInfo>();
143    MachineFunctionPass::getAnalysisUsage(AU);
144  }
145
146  /// Perform the CFG structurization
147  bool run();
148
149  /// Perform the CFG preparation
150  /// This step will remove every unconditionnal/dead jump instructions and make
151  /// sure all loops have an exit block
152  bool prepare();
153
154  bool runOnMachineFunction(MachineFunction &MF) override {
155    TII = MF.getSubtarget<R600Subtarget>().getInstrInfo();
156    TRI = &TII->getRegisterInfo();
157    LLVM_DEBUG(MF.dump(););
158    OrderedBlks.clear();
159    Visited.clear();
160    FuncRep = &MF;
161    MLI = &getAnalysis<MachineLoopInfo>();
162    LLVM_DEBUG(dbgs() << "LoopInfo:\n"; PrintLoopinfo(*MLI););
163    MDT = &getAnalysis<MachineDominatorTree>();
164    LLVM_DEBUG(MDT->print(dbgs(), (const Module *)nullptr););
165    PDT = &getAnalysis<MachinePostDominatorTree>();
166    LLVM_DEBUG(PDT->print(dbgs()););
167    prepare();
168    run();
169    LLVM_DEBUG(MF.dump(););
170    return true;
171  }
172
173protected:
174  MachineDominatorTree *MDT;
175  MachinePostDominatorTree *PDT;
176  MachineLoopInfo *MLI;
177  const R600InstrInfo *TII = nullptr;
178  const R600RegisterInfo *TRI = nullptr;
179
180  // PRINT FUNCTIONS
181  /// Print the ordered Blocks.
182  void printOrderedBlocks() const {
183    size_t i = 0;
184    for (MBBVector::const_iterator iterBlk = OrderedBlks.begin(),
185        iterBlkEnd = OrderedBlks.end(); iterBlk != iterBlkEnd; ++iterBlk, ++i) {
186      dbgs() << "BB" << (*iterBlk)->getNumber();
187      dbgs() << "(" << getSCCNum(*iterBlk) << "," << (*iterBlk)->size() << ")";
188      if (i != 0 && i % 10 == 0) {
189        dbgs() << "\n";
190      } else {
191        dbgs() << " ";
192      }
193    }
194  }
195
196  static void PrintLoopinfo(const MachineLoopInfo &LoopInfo) {
197    for (MachineLoop::iterator iter = LoopInfo.begin(),
198         iterEnd = LoopInfo.end(); iter != iterEnd; ++iter) {
199      (*iter)->print(dbgs(), 0);
200    }
201  }
202
203  // UTILITY FUNCTIONS
204  int getSCCNum(MachineBasicBlock *MBB) const;
205  MachineBasicBlock *getLoopLandInfo(MachineLoop *LoopRep) const;
206  bool hasBackEdge(MachineBasicBlock *MBB) const;
207  bool isRetiredBlock(MachineBasicBlock *MBB) const;
208  bool isActiveLoophead(MachineBasicBlock *MBB) const;
209  PathToKind singlePathTo(MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB,
210      bool AllowSideEntry = true) const;
211  int countActiveBlock(MBBVector::const_iterator It,
212      MBBVector::const_iterator E) const;
213  bool needMigrateBlock(MachineBasicBlock *MBB) const;
214
215  // Utility Functions
216  void reversePredicateSetter(MachineBasicBlock::iterator I,
217                              MachineBasicBlock &MBB);
218  /// Compute the reversed DFS post order of Blocks
219  void orderBlocks(MachineFunction *MF);
220
221  // Function originally from CFGStructTraits
222  void insertInstrEnd(MachineBasicBlock *MBB, int NewOpcode,
223                      const DebugLoc &DL = DebugLoc());
224  MachineInstr *insertInstrBefore(MachineBasicBlock *MBB, int NewOpcode,
225                                  const DebugLoc &DL = DebugLoc());
226  MachineInstr *insertInstrBefore(MachineBasicBlock::iterator I, int NewOpcode);
227  void insertCondBranchBefore(MachineBasicBlock::iterator I, int NewOpcode,
228                              const DebugLoc &DL);
229  void insertCondBranchBefore(MachineBasicBlock *MBB,
230                              MachineBasicBlock::iterator I, int NewOpcode,
231                              int RegNum, const DebugLoc &DL);
232
233  static int getBranchNzeroOpcode(int OldOpcode);
234  static int getBranchZeroOpcode(int OldOpcode);
235  static int getContinueNzeroOpcode(int OldOpcode);
236  static int getContinueZeroOpcode(int OldOpcode);
237  static MachineBasicBlock *getTrueBranch(MachineInstr *MI);
238  static void setTrueBranch(MachineInstr *MI, MachineBasicBlock *MBB);
239  static MachineBasicBlock *getFalseBranch(MachineBasicBlock *MBB,
240      MachineInstr *MI);
241  static bool isCondBranch(MachineInstr *MI);
242  static bool isUncondBranch(MachineInstr *MI);
243  static DebugLoc getLastDebugLocInBB(MachineBasicBlock *MBB);
244  static MachineInstr *getNormalBlockBranchInstr(MachineBasicBlock *MBB);
245
246  /// The correct naming for this is getPossibleLoopendBlockBranchInstr.
247  ///
248  /// BB with backward-edge could have move instructions after the branch
249  /// instruction.  Such move instruction "belong to" the loop backward-edge.
250  MachineInstr *getLoopendBlockBranchInstr(MachineBasicBlock *MBB);
251
252  static MachineInstr *getReturnInstr(MachineBasicBlock *MBB);
253  static bool isReturnBlock(MachineBasicBlock *MBB);
254  static void cloneSuccessorList(MachineBasicBlock *DstMBB,
255      MachineBasicBlock *SrcMBB);
256  static MachineBasicBlock *clone(MachineBasicBlock *MBB);
257
258  /// MachineBasicBlock::ReplaceUsesOfBlockWith doesn't serve the purpose
259  /// because the AMDGPU instruction is not recognized as terminator fix this
260  /// and retire this routine
261  void replaceInstrUseOfBlockWith(MachineBasicBlock *SrcMBB,
262      MachineBasicBlock *OldMBB, MachineBasicBlock *NewBlk);
263
264  static void wrapup(MachineBasicBlock *MBB);
265
266  int patternMatch(MachineBasicBlock *MBB);
267  int patternMatchGroup(MachineBasicBlock *MBB);
268  int serialPatternMatch(MachineBasicBlock *MBB);
269  int ifPatternMatch(MachineBasicBlock *MBB);
270  int loopendPatternMatch();
271  int mergeLoop(MachineLoop *LoopRep);
272
273  /// return true iff src1Blk->succ_size() == 0 && src1Blk and src2Blk are in
274  /// the same loop with LoopLandInfo without explicitly keeping track of
275  /// loopContBlks and loopBreakBlks, this is a method to get the information.
276  bool isSameloopDetachedContbreak(MachineBasicBlock *Src1MBB,
277      MachineBasicBlock *Src2MBB);
278  int handleJumpintoIf(MachineBasicBlock *HeadMBB,
279      MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB);
280  int handleJumpintoIfImp(MachineBasicBlock *HeadMBB,
281      MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB);
282  int improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
283      MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB,
284      MachineBasicBlock **LandMBBPtr);
285  void showImproveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
286      MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB,
287      MachineBasicBlock *LandMBB, bool Detail = false);
288  int cloneOnSideEntryTo(MachineBasicBlock *PreMBB,
289      MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB);
290  void mergeSerialBlock(MachineBasicBlock *DstMBB,
291      MachineBasicBlock *SrcMBB);
292
293  void mergeIfthenelseBlock(MachineInstr *BranchMI,
294      MachineBasicBlock *MBB, MachineBasicBlock *TrueMBB,
295      MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB);
296  void mergeLooplandBlock(MachineBasicBlock *DstMBB,
297      MachineBasicBlock *LandMBB);
298  void mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB,
299      MachineBasicBlock *LandMBB);
300  void settleLoopcontBlock(MachineBasicBlock *ContingMBB,
301      MachineBasicBlock *ContMBB);
302
303  /// normalizeInfiniteLoopExit change
304  ///   B1:
305  ///        uncond_br LoopHeader
306  ///
307  /// to
308  ///   B1:
309  ///        cond_br 1 LoopHeader dummyExit
310  /// and return the newly added dummy exit block
311  MachineBasicBlock *normalizeInfiniteLoopExit(MachineLoop *LoopRep);
312  void removeUnconditionalBranch(MachineBasicBlock *MBB);
313
314  /// Remove duplicate branches instructions in a block.
315  /// For instance
316  /// B0:
317  ///    cond_br X B1 B2
318  ///    cond_br X B1 B2
319  /// is transformed to
320  /// B0:
321  ///    cond_br X B1 B2
322  void removeRedundantConditionalBranch(MachineBasicBlock *MBB);
323
324  void addDummyExitBlock(SmallVectorImpl<MachineBasicBlock *> &RetMBB);
325  void removeSuccessor(MachineBasicBlock *MBB);
326  MachineBasicBlock *cloneBlockForPredecessor(MachineBasicBlock *MBB,
327      MachineBasicBlock *PredMBB);
328  void migrateInstruction(MachineBasicBlock *SrcMBB,
329      MachineBasicBlock *DstMBB, MachineBasicBlock::iterator I);
330  void recordSccnum(MachineBasicBlock *MBB, int SCCNum);
331  void retireBlock(MachineBasicBlock *MBB);
332
333private:
334  MBBInfoMap BlockInfoMap;
335  LoopLandInfoMap LLInfoMap;
336  std::map<MachineLoop *, bool> Visited;
337  MachineFunction *FuncRep;
338  SmallVector<MachineBasicBlock *, DEFAULT_VEC_SLOTS> OrderedBlks;
339};
340
341} // end anonymous namespace
342
343char AMDGPUCFGStructurizer::ID = 0;
344
345int AMDGPUCFGStructurizer::getSCCNum(MachineBasicBlock *MBB) const {
346  MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB);
347  if (It == BlockInfoMap.end())
348    return INVALIDSCCNUM;
349  return (*It).second->SccNum;
350}
351
352MachineBasicBlock *AMDGPUCFGStructurizer::getLoopLandInfo(MachineLoop *LoopRep)
353    const {
354  LoopLandInfoMap::const_iterator It = LLInfoMap.find(LoopRep);
355  if (It == LLInfoMap.end())
356    return nullptr;
357  return (*It).second;
358}
359
360bool AMDGPUCFGStructurizer::hasBackEdge(MachineBasicBlock *MBB) const {
361  MachineLoop *LoopRep = MLI->getLoopFor(MBB);
362  if (!LoopRep)
363    return false;
364  MachineBasicBlock *LoopHeader = LoopRep->getHeader();
365  return MBB->isSuccessor(LoopHeader);
366}
367
368bool AMDGPUCFGStructurizer::isRetiredBlock(MachineBasicBlock *MBB) const {
369  MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB);
370  if (It == BlockInfoMap.end())
371    return false;
372  return (*It).second->IsRetired;
373}
374
375bool AMDGPUCFGStructurizer::isActiveLoophead(MachineBasicBlock *MBB) const {
376  MachineLoop *LoopRep = MLI->getLoopFor(MBB);
377  while (LoopRep && LoopRep->getHeader() == MBB) {
378    MachineBasicBlock *LoopLand = getLoopLandInfo(LoopRep);
379    if(!LoopLand)
380      return true;
381    if (!isRetiredBlock(LoopLand))
382      return true;
383    LoopRep = LoopRep->getParentLoop();
384  }
385  return false;
386}
387
388AMDGPUCFGStructurizer::PathToKind AMDGPUCFGStructurizer::singlePathTo(
389    MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB,
390    bool AllowSideEntry) const {
391  assert(DstMBB);
392  if (SrcMBB == DstMBB)
393    return SinglePath_InPath;
394  while (SrcMBB && SrcMBB->succ_size() == 1) {
395    SrcMBB = *SrcMBB->succ_begin();
396    if (SrcMBB == DstMBB)
397      return SinglePath_InPath;
398    if (!AllowSideEntry && SrcMBB->pred_size() > 1)
399      return Not_SinglePath;
400  }
401  if (SrcMBB && SrcMBB->succ_size()==0)
402    return SinglePath_NotInPath;
403  return Not_SinglePath;
404}
405
406int AMDGPUCFGStructurizer::countActiveBlock(MBBVector::const_iterator It,
407    MBBVector::const_iterator E) const {
408  int Count = 0;
409  while (It != E) {
410    if (!isRetiredBlock(*It))
411      ++Count;
412    ++It;
413  }
414  return Count;
415}
416
417bool AMDGPUCFGStructurizer::needMigrateBlock(MachineBasicBlock *MBB) const {
418  unsigned BlockSizeThreshold = 30;
419  unsigned CloneInstrThreshold = 100;
420  bool MultiplePreds = MBB && (MBB->pred_size() > 1);
421
422  if(!MultiplePreds)
423    return false;
424  unsigned BlkSize = MBB->size();
425  return ((BlkSize > BlockSizeThreshold) &&
426      (BlkSize * (MBB->pred_size() - 1) > CloneInstrThreshold));
427}
428
429void AMDGPUCFGStructurizer::reversePredicateSetter(
430    MachineBasicBlock::iterator I, MachineBasicBlock &MBB) {
431  assert(I.isValid() && "Expected valid iterator");
432  for (;; --I) {
433    if (I == MBB.end())
434      continue;
435    if (I->getOpcode() == R600::PRED_X) {
436      switch (I->getOperand(2).getImm()) {
437      case R600::PRED_SETE_INT:
438        I->getOperand(2).setImm(R600::PRED_SETNE_INT);
439        return;
440      case R600::PRED_SETNE_INT:
441        I->getOperand(2).setImm(R600::PRED_SETE_INT);
442        return;
443      case R600::PRED_SETE:
444        I->getOperand(2).setImm(R600::PRED_SETNE);
445        return;
446      case R600::PRED_SETNE:
447        I->getOperand(2).setImm(R600::PRED_SETE);
448        return;
449      default:
450        llvm_unreachable("PRED_X Opcode invalid!");
451      }
452    }
453  }
454}
455
456void AMDGPUCFGStructurizer::insertInstrEnd(MachineBasicBlock *MBB,
457                                           int NewOpcode, const DebugLoc &DL) {
458  MachineInstr *MI =
459      MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DL);
460  MBB->push_back(MI);
461  //assume the instruction doesn't take any reg operand ...
462  SHOWNEWINSTR(MI);
463}
464
465MachineInstr *AMDGPUCFGStructurizer::insertInstrBefore(MachineBasicBlock *MBB,
466                                                       int NewOpcode,
467                                                       const DebugLoc &DL) {
468  MachineInstr *MI =
469      MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DL);
470  if (MBB->begin() != MBB->end())
471    MBB->insert(MBB->begin(), MI);
472  else
473    MBB->push_back(MI);
474  SHOWNEWINSTR(MI);
475  return MI;
476}
477
478MachineInstr *AMDGPUCFGStructurizer::insertInstrBefore(
479    MachineBasicBlock::iterator I, int NewOpcode) {
480  MachineInstr *OldMI = &(*I);
481  MachineBasicBlock *MBB = OldMI->getParent();
482  MachineInstr *NewMBB =
483      MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DebugLoc());
484  MBB->insert(I, NewMBB);
485  //assume the instruction doesn't take any reg operand ...
486  SHOWNEWINSTR(NewMBB);
487  return NewMBB;
488}
489
490void AMDGPUCFGStructurizer::insertCondBranchBefore(
491    MachineBasicBlock::iterator I, int NewOpcode, const DebugLoc &DL) {
492  MachineInstr *OldMI = &(*I);
493  MachineBasicBlock *MBB = OldMI->getParent();
494  MachineFunction *MF = MBB->getParent();
495  MachineInstr *NewMI = MF->CreateMachineInstr(TII->get(NewOpcode), DL);
496  MBB->insert(I, NewMI);
497  MachineInstrBuilder MIB(*MF, NewMI);
498  MIB.addReg(OldMI->getOperand(1).getReg(), false);
499  SHOWNEWINSTR(NewMI);
500  //erase later oldInstr->eraseFromParent();
501}
502
503void AMDGPUCFGStructurizer::insertCondBranchBefore(
504    MachineBasicBlock *blk, MachineBasicBlock::iterator I, int NewOpcode,
505    int RegNum, const DebugLoc &DL) {
506  MachineFunction *MF = blk->getParent();
507  MachineInstr *NewInstr = MF->CreateMachineInstr(TII->get(NewOpcode), DL);
508  //insert before
509  blk->insert(I, NewInstr);
510  MachineInstrBuilder(*MF, NewInstr).addReg(RegNum, false);
511  SHOWNEWINSTR(NewInstr);
512}
513
514int AMDGPUCFGStructurizer::getBranchNzeroOpcode(int OldOpcode) {
515  switch(OldOpcode) {
516  case R600::JUMP_COND:
517  case R600::JUMP: return R600::IF_PREDICATE_SET;
518  case R600::BRANCH_COND_i32:
519  case R600::BRANCH_COND_f32: return R600::IF_LOGICALNZ_f32;
520  default: llvm_unreachable("internal error");
521  }
522  return -1;
523}
524
525int AMDGPUCFGStructurizer::getBranchZeroOpcode(int OldOpcode) {
526  switch(OldOpcode) {
527  case R600::JUMP_COND:
528  case R600::JUMP: return R600::IF_PREDICATE_SET;
529  case R600::BRANCH_COND_i32:
530  case R600::BRANCH_COND_f32: return R600::IF_LOGICALZ_f32;
531  default: llvm_unreachable("internal error");
532  }
533  return -1;
534}
535
536int AMDGPUCFGStructurizer::getContinueNzeroOpcode(int OldOpcode) {
537  switch(OldOpcode) {
538  case R600::JUMP_COND:
539  case R600::JUMP: return R600::CONTINUE_LOGICALNZ_i32;
540  default: llvm_unreachable("internal error");
541  }
542  return -1;
543}
544
545int AMDGPUCFGStructurizer::getContinueZeroOpcode(int OldOpcode) {
546  switch(OldOpcode) {
547  case R600::JUMP_COND:
548  case R600::JUMP: return R600::CONTINUE_LOGICALZ_i32;
549  default: llvm_unreachable("internal error");
550  }
551  return -1;
552}
553
554MachineBasicBlock *AMDGPUCFGStructurizer::getTrueBranch(MachineInstr *MI) {
555  return MI->getOperand(0).getMBB();
556}
557
558void AMDGPUCFGStructurizer::setTrueBranch(MachineInstr *MI,
559    MachineBasicBlock *MBB) {
560  MI->getOperand(0).setMBB(MBB);
561}
562
563MachineBasicBlock *
564AMDGPUCFGStructurizer::getFalseBranch(MachineBasicBlock *MBB,
565    MachineInstr *MI) {
566  assert(MBB->succ_size() == 2);
567  MachineBasicBlock *TrueBranch = getTrueBranch(MI);
568  MachineBasicBlock::succ_iterator It = MBB->succ_begin();
569  MachineBasicBlock::succ_iterator Next = It;
570  ++Next;
571  return (*It == TrueBranch) ? *Next : *It;
572}
573
574bool AMDGPUCFGStructurizer::isCondBranch(MachineInstr *MI) {
575  switch (MI->getOpcode()) {
576    case R600::JUMP_COND:
577    case R600::BRANCH_COND_i32:
578    case R600::BRANCH_COND_f32: return true;
579  default:
580    return false;
581  }
582  return false;
583}
584
585bool AMDGPUCFGStructurizer::isUncondBranch(MachineInstr *MI) {
586  switch (MI->getOpcode()) {
587  case R600::JUMP:
588  case R600::BRANCH:
589    return true;
590  default:
591    return false;
592  }
593  return false;
594}
595
596DebugLoc AMDGPUCFGStructurizer::getLastDebugLocInBB(MachineBasicBlock *MBB) {
597  //get DebugLoc from the first MachineBasicBlock instruction with debug info
598  DebugLoc DL;
599  for (MachineBasicBlock::iterator It = MBB->begin(); It != MBB->end();
600      ++It) {
601    MachineInstr *instr = &(*It);
602    if (instr->getDebugLoc())
603      DL = instr->getDebugLoc();
604  }
605  return DL;
606}
607
608MachineInstr *AMDGPUCFGStructurizer::getNormalBlockBranchInstr(
609    MachineBasicBlock *MBB) {
610  MachineBasicBlock::reverse_iterator It = MBB->rbegin();
611  MachineInstr *MI = &*It;
612  if (MI && (isCondBranch(MI) || isUncondBranch(MI)))
613    return MI;
614  return nullptr;
615}
616
617MachineInstr *AMDGPUCFGStructurizer::getLoopendBlockBranchInstr(
618    MachineBasicBlock *MBB) {
619  for (MachineBasicBlock::reverse_iterator It = MBB->rbegin(), E = MBB->rend();
620      It != E; ++It) {
621    // FIXME: Simplify
622    MachineInstr *MI = &*It;
623    if (MI) {
624      if (isCondBranch(MI) || isUncondBranch(MI))
625        return MI;
626      else if (!TII->isMov(MI->getOpcode()))
627        break;
628    }
629  }
630  return nullptr;
631}
632
633MachineInstr *AMDGPUCFGStructurizer::getReturnInstr(MachineBasicBlock *MBB) {
634  MachineBasicBlock::reverse_iterator It = MBB->rbegin();
635  if (It != MBB->rend()) {
636    MachineInstr *instr = &(*It);
637    if (instr->getOpcode() == R600::RETURN)
638      return instr;
639  }
640  return nullptr;
641}
642
643bool AMDGPUCFGStructurizer::isReturnBlock(MachineBasicBlock *MBB) {
644  MachineInstr *MI = getReturnInstr(MBB);
645  bool IsReturn = (MBB->succ_size() == 0);
646  if (MI)
647    assert(IsReturn);
648  else if (IsReturn)
649    LLVM_DEBUG(dbgs() << "BB" << MBB->getNumber()
650                      << " is return block without RETURN instr\n";);
651  return  IsReturn;
652}
653
654void AMDGPUCFGStructurizer::cloneSuccessorList(MachineBasicBlock *DstMBB,
655    MachineBasicBlock *SrcMBB) {
656  for (MachineBasicBlock::succ_iterator It = SrcMBB->succ_begin(),
657       iterEnd = SrcMBB->succ_end(); It != iterEnd; ++It)
658    DstMBB->addSuccessor(*It);  // *iter's predecessor is also taken care of
659}
660
661MachineBasicBlock *AMDGPUCFGStructurizer::clone(MachineBasicBlock *MBB) {
662  MachineFunction *Func = MBB->getParent();
663  MachineBasicBlock *NewMBB = Func->CreateMachineBasicBlock();
664  Func->push_back(NewMBB);  //insert to function
665  for (const MachineInstr &It : *MBB)
666    NewMBB->push_back(Func->CloneMachineInstr(&It));
667  return NewMBB;
668}
669
670void AMDGPUCFGStructurizer::replaceInstrUseOfBlockWith(
671    MachineBasicBlock *SrcMBB, MachineBasicBlock *OldMBB,
672    MachineBasicBlock *NewBlk) {
673  MachineInstr *BranchMI = getLoopendBlockBranchInstr(SrcMBB);
674  if (BranchMI && isCondBranch(BranchMI) &&
675      getTrueBranch(BranchMI) == OldMBB)
676    setTrueBranch(BranchMI, NewBlk);
677}
678
679void AMDGPUCFGStructurizer::wrapup(MachineBasicBlock *MBB) {
680  assert((!MBB->getParent()->getJumpTableInfo()
681          || MBB->getParent()->getJumpTableInfo()->isEmpty())
682         && "found a jump table");
683
684   //collect continue right before endloop
685   SmallVector<MachineInstr *, DEFAULT_VEC_SLOTS> ContInstr;
686   MachineBasicBlock::iterator Pre = MBB->begin();
687   MachineBasicBlock::iterator E = MBB->end();
688   MachineBasicBlock::iterator It = Pre;
689   while (It != E) {
690     if (Pre->getOpcode() == R600::CONTINUE
691         && It->getOpcode() == R600::ENDLOOP)
692       ContInstr.push_back(&*Pre);
693     Pre = It;
694     ++It;
695   }
696
697   //delete continue right before endloop
698   for (unsigned i = 0; i < ContInstr.size(); ++i)
699      ContInstr[i]->eraseFromParent();
700
701   // TODO to fix up jump table so later phase won't be confused.  if
702   // (jumpTableInfo->isEmpty() == false) { need to clean the jump table, but
703   // there isn't such an interface yet.  alternatively, replace all the other
704   // blocks in the jump table with the entryBlk //}
705}
706
707bool AMDGPUCFGStructurizer::prepare() {
708  bool Changed = false;
709
710  //FIXME: if not reducible flow graph, make it so ???
711
712  LLVM_DEBUG(dbgs() << "AMDGPUCFGStructurizer::prepare\n";);
713
714  orderBlocks(FuncRep);
715
716  SmallVector<MachineBasicBlock *, DEFAULT_VEC_SLOTS> RetBlks;
717
718  // Add an ExitBlk to loop that don't have one
719  for (MachineLoopInfo::iterator It = MLI->begin(),
720       E = MLI->end(); It != E; ++It) {
721    MachineLoop *LoopRep = (*It);
722    MBBVector ExitingMBBs;
723    LoopRep->getExitingBlocks(ExitingMBBs);
724
725    if (ExitingMBBs.size() == 0) {
726      MachineBasicBlock* DummyExitBlk = normalizeInfiniteLoopExit(LoopRep);
727      if (DummyExitBlk)
728        RetBlks.push_back(DummyExitBlk);
729    }
730  }
731
732  // Remove unconditional branch instr.
733  // Add dummy exit block iff there are multiple returns.
734  for (SmallVectorImpl<MachineBasicBlock *>::const_iterator
735       It = OrderedBlks.begin(), E = OrderedBlks.end(); It != E; ++It) {
736    MachineBasicBlock *MBB = *It;
737    removeUnconditionalBranch(MBB);
738    removeRedundantConditionalBranch(MBB);
739    if (isReturnBlock(MBB)) {
740      RetBlks.push_back(MBB);
741    }
742    assert(MBB->succ_size() <= 2);
743  }
744
745  if (RetBlks.size() >= 2) {
746    addDummyExitBlock(RetBlks);
747    Changed = true;
748  }
749
750  return Changed;
751}
752
753bool AMDGPUCFGStructurizer::run() {
754  //Assume reducible CFG...
755  LLVM_DEBUG(dbgs() << "AMDGPUCFGStructurizer::run\n");
756
757#ifdef STRESSTEST
758  //Use the worse block ordering to test the algorithm.
759  ReverseVector(orderedBlks);
760#endif
761
762  LLVM_DEBUG(dbgs() << "Ordered blocks:\n"; printOrderedBlocks(););
763  int NumIter = 0;
764  bool Finish = false;
765  MachineBasicBlock *MBB;
766  bool MakeProgress = false;
767  int NumRemainedBlk = countActiveBlock(OrderedBlks.begin(),
768                                        OrderedBlks.end());
769
770  do {
771    ++NumIter;
772    LLVM_DEBUG(dbgs() << "numIter = " << NumIter
773                      << ", numRemaintedBlk = " << NumRemainedBlk << "\n";);
774
775    SmallVectorImpl<MachineBasicBlock *>::const_iterator It =
776        OrderedBlks.begin();
777    SmallVectorImpl<MachineBasicBlock *>::const_iterator E =
778        OrderedBlks.end();
779
780    SmallVectorImpl<MachineBasicBlock *>::const_iterator SccBeginIter =
781        It;
782    MachineBasicBlock *SccBeginMBB = nullptr;
783    int SccNumBlk = 0;  // The number of active blocks, init to a
784                        // maximum possible number.
785    int SccNumIter;     // Number of iteration in this SCC.
786
787    while (It != E) {
788      MBB = *It;
789
790      if (!SccBeginMBB) {
791        SccBeginIter = It;
792        SccBeginMBB = MBB;
793        SccNumIter = 0;
794        SccNumBlk = NumRemainedBlk; // Init to maximum possible number.
795        LLVM_DEBUG(dbgs() << "start processing SCC" << getSCCNum(SccBeginMBB);
796                   dbgs() << "\n";);
797      }
798
799      if (!isRetiredBlock(MBB))
800        patternMatch(MBB);
801
802      ++It;
803
804      bool ContNextScc = true;
805      if (It == E
806          || getSCCNum(SccBeginMBB) != getSCCNum(*It)) {
807        // Just finish one scc.
808        ++SccNumIter;
809        int sccRemainedNumBlk = countActiveBlock(SccBeginIter, It);
810        if (sccRemainedNumBlk != 1 && sccRemainedNumBlk >= SccNumBlk) {
811          LLVM_DEBUG(dbgs() << "Can't reduce SCC " << getSCCNum(MBB)
812                            << ", sccNumIter = " << SccNumIter;
813                     dbgs() << "doesn't make any progress\n";);
814          ContNextScc = true;
815        } else if (sccRemainedNumBlk != 1 && sccRemainedNumBlk < SccNumBlk) {
816          SccNumBlk = sccRemainedNumBlk;
817          It = SccBeginIter;
818          ContNextScc = false;
819          LLVM_DEBUG(dbgs() << "repeat processing SCC" << getSCCNum(MBB)
820                            << "sccNumIter = " << SccNumIter << '\n';);
821        } else {
822          // Finish the current scc.
823          ContNextScc = true;
824        }
825      } else {
826        // Continue on next component in the current scc.
827        ContNextScc = false;
828      }
829
830      if (ContNextScc)
831        SccBeginMBB = nullptr;
832    } //while, "one iteration" over the function.
833
834    MachineBasicBlock *EntryMBB =
835        *GraphTraits<MachineFunction *>::nodes_begin(FuncRep);
836    if (EntryMBB->succ_size() == 0) {
837      Finish = true;
838      LLVM_DEBUG(dbgs() << "Reduce to one block\n";);
839    } else {
840      int NewnumRemainedBlk
841        = countActiveBlock(OrderedBlks.begin(), OrderedBlks.end());
842      // consider cloned blocks ??
843      if (NewnumRemainedBlk == 1 || NewnumRemainedBlk < NumRemainedBlk) {
844        MakeProgress = true;
845        NumRemainedBlk = NewnumRemainedBlk;
846      } else {
847        MakeProgress = false;
848        LLVM_DEBUG(dbgs() << "No progress\n";);
849      }
850    }
851  } while (!Finish && MakeProgress);
852
853  // Misc wrap up to maintain the consistency of the Function representation.
854  wrapup(*GraphTraits<MachineFunction *>::nodes_begin(FuncRep));
855
856  // Detach retired Block, release memory.
857  for (MBBInfoMap::iterator It = BlockInfoMap.begin(), E = BlockInfoMap.end();
858      It != E; ++It) {
859    if ((*It).second && (*It).second->IsRetired) {
860      assert(((*It).first)->getNumber() != -1);
861      LLVM_DEBUG(dbgs() << "Erase BB" << ((*It).first)->getNumber() << "\n";);
862      (*It).first->eraseFromParent();  //Remove from the parent Function.
863    }
864    delete (*It).second;
865  }
866  BlockInfoMap.clear();
867  LLInfoMap.clear();
868
869  if (!Finish) {
870    LLVM_DEBUG(FuncRep->viewCFG());
871    report_fatal_error("IRREDUCIBLE_CFG");
872  }
873
874  return true;
875}
876
877void AMDGPUCFGStructurizer::orderBlocks(MachineFunction *MF) {
878  int SccNum = 0;
879  MachineBasicBlock *MBB;
880  for (scc_iterator<MachineFunction *> It = scc_begin(MF); !It.isAtEnd();
881       ++It, ++SccNum) {
882    const std::vector<MachineBasicBlock *> &SccNext = *It;
883    for (std::vector<MachineBasicBlock *>::const_iterator
884         blockIter = SccNext.begin(), blockEnd = SccNext.end();
885         blockIter != blockEnd; ++blockIter) {
886      MBB = *blockIter;
887      OrderedBlks.push_back(MBB);
888      recordSccnum(MBB, SccNum);
889    }
890  }
891
892  // walk through all the block in func to check for unreachable
893  for (auto *MBB : nodes(MF)) {
894    SccNum = getSCCNum(MBB);
895    if (SccNum == INVALIDSCCNUM)
896      dbgs() << "unreachable block BB" << MBB->getNumber() << "\n";
897  }
898}
899
900int AMDGPUCFGStructurizer::patternMatch(MachineBasicBlock *MBB) {
901  int NumMatch = 0;
902  int CurMatch;
903
904  LLVM_DEBUG(dbgs() << "Begin patternMatch BB" << MBB->getNumber() << "\n";);
905
906  while ((CurMatch = patternMatchGroup(MBB)) > 0)
907    NumMatch += CurMatch;
908
909  LLVM_DEBUG(dbgs() << "End patternMatch BB" << MBB->getNumber()
910                    << ", numMatch = " << NumMatch << "\n";);
911
912  return NumMatch;
913}
914
915int AMDGPUCFGStructurizer::patternMatchGroup(MachineBasicBlock *MBB) {
916  int NumMatch = 0;
917  NumMatch += loopendPatternMatch();
918  NumMatch += serialPatternMatch(MBB);
919  NumMatch += ifPatternMatch(MBB);
920  return NumMatch;
921}
922
923int AMDGPUCFGStructurizer::serialPatternMatch(MachineBasicBlock *MBB) {
924  if (MBB->succ_size() != 1)
925    return 0;
926
927  MachineBasicBlock *childBlk = *MBB->succ_begin();
928  if (childBlk->pred_size() != 1 || isActiveLoophead(childBlk))
929    return 0;
930
931  mergeSerialBlock(MBB, childBlk);
932  ++numSerialPatternMatch;
933  return 1;
934}
935
936int AMDGPUCFGStructurizer::ifPatternMatch(MachineBasicBlock *MBB) {
937  //two edges
938  if (MBB->succ_size() != 2)
939    return 0;
940  if (hasBackEdge(MBB))
941    return 0;
942  MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB);
943  if (!BranchMI)
944    return 0;
945
946  assert(isCondBranch(BranchMI));
947  int NumMatch = 0;
948
949  MachineBasicBlock *TrueMBB = getTrueBranch(BranchMI);
950  NumMatch += serialPatternMatch(TrueMBB);
951  NumMatch += ifPatternMatch(TrueMBB);
952  MachineBasicBlock *FalseMBB = getFalseBranch(MBB, BranchMI);
953  NumMatch += serialPatternMatch(FalseMBB);
954  NumMatch += ifPatternMatch(FalseMBB);
955  MachineBasicBlock *LandBlk;
956  int Cloned = 0;
957
958  assert (!TrueMBB->succ_empty() || !FalseMBB->succ_empty());
959  // TODO: Simplify
960  if (TrueMBB->succ_size() == 1 && FalseMBB->succ_size() == 1
961    && *TrueMBB->succ_begin() == *FalseMBB->succ_begin()) {
962    // Diamond pattern
963    LandBlk = *TrueMBB->succ_begin();
964  } else if (TrueMBB->succ_size() == 1 && *TrueMBB->succ_begin() == FalseMBB) {
965    // Triangle pattern, false is empty
966    LandBlk = FalseMBB;
967    FalseMBB = nullptr;
968  } else if (FalseMBB->succ_size() == 1
969             && *FalseMBB->succ_begin() == TrueMBB) {
970    // Triangle pattern, true is empty
971    // We reverse the predicate to make a triangle, empty false pattern;
972    std::swap(TrueMBB, FalseMBB);
973    reversePredicateSetter(MBB->end(), *MBB);
974    LandBlk = FalseMBB;
975    FalseMBB = nullptr;
976  } else if (FalseMBB->succ_size() == 1
977             && isSameloopDetachedContbreak(TrueMBB, FalseMBB)) {
978    LandBlk = *FalseMBB->succ_begin();
979  } else if (TrueMBB->succ_size() == 1
980    && isSameloopDetachedContbreak(FalseMBB, TrueMBB)) {
981    LandBlk = *TrueMBB->succ_begin();
982  } else {
983    return NumMatch + handleJumpintoIf(MBB, TrueMBB, FalseMBB);
984  }
985
986  // improveSimpleJumpinfoIf can handle the case where landBlk == NULL but the
987  // new BB created for landBlk==NULL may introduce new challenge to the
988  // reduction process.
989  if (LandBlk &&
990      ((TrueMBB && TrueMBB->pred_size() > 1)
991      || (FalseMBB && FalseMBB->pred_size() > 1))) {
992     Cloned += improveSimpleJumpintoIf(MBB, TrueMBB, FalseMBB, &LandBlk);
993  }
994
995  if (TrueMBB && TrueMBB->pred_size() > 1) {
996    TrueMBB = cloneBlockForPredecessor(TrueMBB, MBB);
997    ++Cloned;
998  }
999
1000  if (FalseMBB && FalseMBB->pred_size() > 1) {
1001    FalseMBB = cloneBlockForPredecessor(FalseMBB, MBB);
1002    ++Cloned;
1003  }
1004
1005  mergeIfthenelseBlock(BranchMI, MBB, TrueMBB, FalseMBB, LandBlk);
1006
1007  ++numIfPatternMatch;
1008
1009  numClonedBlock += Cloned;
1010
1011  return 1 + Cloned + NumMatch;
1012}
1013
1014int AMDGPUCFGStructurizer::loopendPatternMatch() {
1015  std::deque<MachineLoop *> NestedLoops;
1016  for (auto &It: *MLI)
1017    for (MachineLoop *ML : depth_first(It))
1018      NestedLoops.push_front(ML);
1019
1020  if (NestedLoops.empty())
1021    return 0;
1022
1023  // Process nested loop outside->inside (we did push_front),
1024  // so "continue" to a outside loop won't be mistaken as "break"
1025  // of the current loop.
1026  int Num = 0;
1027  for (MachineLoop *ExaminedLoop : NestedLoops) {
1028    if (ExaminedLoop->getNumBlocks() == 0 || Visited[ExaminedLoop])
1029      continue;
1030    LLVM_DEBUG(dbgs() << "Processing:\n"; ExaminedLoop->dump(););
1031    int NumBreak = mergeLoop(ExaminedLoop);
1032    if (NumBreak == -1)
1033      break;
1034    Num += NumBreak;
1035  }
1036  return Num;
1037}
1038
1039int AMDGPUCFGStructurizer::mergeLoop(MachineLoop *LoopRep) {
1040  MachineBasicBlock *LoopHeader = LoopRep->getHeader();
1041  MBBVector ExitingMBBs;
1042  LoopRep->getExitingBlocks(ExitingMBBs);
1043  assert(!ExitingMBBs.empty() && "Infinite Loop not supported");
1044  LLVM_DEBUG(dbgs() << "Loop has " << ExitingMBBs.size()
1045                    << " exiting blocks\n";);
1046  // We assume a single ExitBlk
1047  MBBVector ExitBlks;
1048  LoopRep->getExitBlocks(ExitBlks);
1049  SmallPtrSet<MachineBasicBlock *, 2> ExitBlkSet;
1050  for (unsigned i = 0, e = ExitBlks.size(); i < e; ++i)
1051    ExitBlkSet.insert(ExitBlks[i]);
1052  assert(ExitBlkSet.size() == 1);
1053  MachineBasicBlock *ExitBlk = *ExitBlks.begin();
1054  assert(ExitBlk && "Loop has several exit block");
1055  MBBVector LatchBlks;
1056  for (auto *LB : inverse_children<MachineBasicBlock*>(LoopHeader))
1057    if (LoopRep->contains(LB))
1058      LatchBlks.push_back(LB);
1059
1060  for (unsigned i = 0, e = ExitingMBBs.size(); i < e; ++i)
1061    mergeLoopbreakBlock(ExitingMBBs[i], ExitBlk);
1062  for (unsigned i = 0, e = LatchBlks.size(); i < e; ++i)
1063    settleLoopcontBlock(LatchBlks[i], LoopHeader);
1064  int Match = 0;
1065  do {
1066    Match = 0;
1067    Match += serialPatternMatch(LoopHeader);
1068    Match += ifPatternMatch(LoopHeader);
1069  } while (Match > 0);
1070  mergeLooplandBlock(LoopHeader, ExitBlk);
1071  MachineLoop *ParentLoop = LoopRep->getParentLoop();
1072  if (ParentLoop)
1073    MLI->changeLoopFor(LoopHeader, ParentLoop);
1074  else
1075    MLI->removeBlock(LoopHeader);
1076  Visited[LoopRep] = true;
1077  return 1;
1078}
1079
1080bool AMDGPUCFGStructurizer::isSameloopDetachedContbreak(
1081    MachineBasicBlock *Src1MBB, MachineBasicBlock *Src2MBB) {
1082  if (Src1MBB->succ_size() == 0) {
1083    MachineLoop *LoopRep = MLI->getLoopFor(Src1MBB);
1084    if (LoopRep&& LoopRep == MLI->getLoopFor(Src2MBB)) {
1085      MachineBasicBlock *&TheEntry = LLInfoMap[LoopRep];
1086      if (TheEntry) {
1087        LLVM_DEBUG(dbgs() << "isLoopContBreakBlock yes src1 = BB"
1088                          << Src1MBB->getNumber() << " src2 = BB"
1089                          << Src2MBB->getNumber() << "\n";);
1090        return true;
1091      }
1092    }
1093  }
1094  return false;
1095}
1096
1097int AMDGPUCFGStructurizer::handleJumpintoIf(MachineBasicBlock *HeadMBB,
1098    MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB) {
1099  int Num = handleJumpintoIfImp(HeadMBB, TrueMBB, FalseMBB);
1100  if (Num == 0) {
1101    LLVM_DEBUG(dbgs() << "handleJumpintoIf swap trueBlk and FalseBlk"
1102                      << "\n";);
1103    Num = handleJumpintoIfImp(HeadMBB, FalseMBB, TrueMBB);
1104  }
1105  return Num;
1106}
1107
1108int AMDGPUCFGStructurizer::handleJumpintoIfImp(MachineBasicBlock *HeadMBB,
1109    MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB) {
1110  int Num = 0;
1111  MachineBasicBlock *DownBlk;
1112
1113  //trueBlk could be the common post dominator
1114  DownBlk = TrueMBB;
1115
1116  LLVM_DEBUG(dbgs() << "handleJumpintoIfImp head = BB" << HeadMBB->getNumber()
1117                    << " true = BB" << TrueMBB->getNumber()
1118                    << ", numSucc=" << TrueMBB->succ_size() << " false = BB"
1119                    << FalseMBB->getNumber() << "\n";);
1120
1121  while (DownBlk) {
1122    LLVM_DEBUG(dbgs() << "check down = BB" << DownBlk->getNumber(););
1123
1124    if (singlePathTo(FalseMBB, DownBlk) == SinglePath_InPath) {
1125      LLVM_DEBUG(dbgs() << " working\n";);
1126
1127      Num += cloneOnSideEntryTo(HeadMBB, TrueMBB, DownBlk);
1128      Num += cloneOnSideEntryTo(HeadMBB, FalseMBB, DownBlk);
1129
1130      numClonedBlock += Num;
1131      Num += serialPatternMatch(*HeadMBB->succ_begin());
1132      Num += serialPatternMatch(*std::next(HeadMBB->succ_begin()));
1133      Num += ifPatternMatch(HeadMBB);
1134      assert(Num > 0);
1135
1136      break;
1137    }
1138    LLVM_DEBUG(dbgs() << " not working\n";);
1139    DownBlk = (DownBlk->succ_size() == 1) ? (*DownBlk->succ_begin()) : nullptr;
1140  } // walk down the postDomTree
1141
1142  return Num;
1143}
1144
1145#ifndef NDEBUG
1146void AMDGPUCFGStructurizer::showImproveSimpleJumpintoIf(
1147    MachineBasicBlock *HeadMBB, MachineBasicBlock *TrueMBB,
1148    MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB, bool Detail) {
1149  dbgs() << "head = BB" << HeadMBB->getNumber()
1150         << " size = " << HeadMBB->size();
1151  if (Detail) {
1152    dbgs() << "\n";
1153    HeadMBB->print(dbgs());
1154    dbgs() << "\n";
1155  }
1156
1157  if (TrueMBB) {
1158    dbgs() << ", true = BB" << TrueMBB->getNumber() << " size = "
1159           << TrueMBB->size() << " numPred = " << TrueMBB->pred_size();
1160    if (Detail) {
1161      dbgs() << "\n";
1162      TrueMBB->print(dbgs());
1163      dbgs() << "\n";
1164    }
1165  }
1166  if (FalseMBB) {
1167    dbgs() << ", false = BB" << FalseMBB->getNumber() << " size = "
1168           << FalseMBB->size() << " numPred = " << FalseMBB->pred_size();
1169    if (Detail) {
1170      dbgs() << "\n";
1171      FalseMBB->print(dbgs());
1172      dbgs() << "\n";
1173    }
1174  }
1175  if (LandMBB) {
1176    dbgs() << ", land = BB" << LandMBB->getNumber() << " size = "
1177           << LandMBB->size() << " numPred = " << LandMBB->pred_size();
1178    if (Detail) {
1179      dbgs() << "\n";
1180      LandMBB->print(dbgs());
1181      dbgs() << "\n";
1182    }
1183  }
1184
1185  dbgs() << "\n";
1186}
1187#endif
1188
1189int AMDGPUCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
1190    MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB,
1191    MachineBasicBlock **LandMBBPtr) {
1192  bool MigrateTrue = false;
1193  bool MigrateFalse = false;
1194
1195  MachineBasicBlock *LandBlk = *LandMBBPtr;
1196
1197  assert((!TrueMBB || TrueMBB->succ_size() <= 1)
1198         && (!FalseMBB || FalseMBB->succ_size() <= 1));
1199
1200  if (TrueMBB == FalseMBB)
1201    return 0;
1202
1203  MigrateTrue = needMigrateBlock(TrueMBB);
1204  MigrateFalse = needMigrateBlock(FalseMBB);
1205
1206  if (!MigrateTrue && !MigrateFalse)
1207    return 0;
1208
1209  // If we need to migrate either trueBlk and falseBlk, migrate the rest that
1210  // have more than one predecessors.  without doing this, its predecessor
1211  // rather than headBlk will have undefined value in initReg.
1212  if (!MigrateTrue && TrueMBB && TrueMBB->pred_size() > 1)
1213    MigrateTrue = true;
1214  if (!MigrateFalse && FalseMBB && FalseMBB->pred_size() > 1)
1215    MigrateFalse = true;
1216
1217  LLVM_DEBUG(
1218      dbgs() << "before improveSimpleJumpintoIf: ";
1219      showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0););
1220
1221  // org: headBlk => if () {trueBlk} else {falseBlk} => landBlk
1222  //
1223  // new: headBlk => if () {initReg = 1; org trueBlk branch} else
1224  //      {initReg = 0; org falseBlk branch }
1225  //      => landBlk => if (initReg) {org trueBlk} else {org falseBlk}
1226  //      => org landBlk
1227  //      if landBlk->pred_size() > 2, put the about if-else inside
1228  //      if (initReg !=2) {...}
1229  //
1230  // add initReg = initVal to headBlk
1231
1232  const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
1233  if (!MigrateTrue || !MigrateFalse) {
1234    // XXX: We have an opportunity here to optimize the "branch into if" case
1235    // here.  Branch into if looks like this:
1236    //                        entry
1237    //                       /     |
1238    //           diamond_head       branch_from
1239    //             /      \           |
1240    // diamond_false        diamond_true
1241    //             \      /
1242    //               done
1243    //
1244    // The diamond_head block begins the "if" and the diamond_true block
1245    // is the block being "branched into".
1246    //
1247    // If MigrateTrue is true, then TrueBB is the block being "branched into"
1248    // and if MigrateFalse is true, then FalseBB is the block being
1249    // "branched into"
1250    //
1251    // Here is the pseudo code for how I think the optimization should work:
1252    // 1. Insert MOV GPR0, 0 before the branch instruction in diamond_head.
1253    // 2. Insert MOV GPR0, 1 before the branch instruction in branch_from.
1254    // 3. Move the branch instruction from diamond_head into its own basic
1255    //    block (new_block).
1256    // 4. Add an unconditional branch from diamond_head to new_block
1257    // 5. Replace the branch instruction in branch_from with an unconditional
1258    //    branch to new_block.  If branch_from has multiple predecessors, then
1259    //    we need to replace the True/False block in the branch
1260    //    instruction instead of replacing it.
1261    // 6. Change the condition of the branch instruction in new_block from
1262    //    COND to (COND || GPR0)
1263    //
1264    // In order insert these MOV instruction, we will need to use the
1265    // RegisterScavenger.  Usually liveness stops being tracked during
1266    // the late machine optimization passes, however if we implement
1267    // bool TargetRegisterInfo::requiresRegisterScavenging(
1268    //                                                const MachineFunction &MF)
1269    // and have it return true, liveness will be tracked correctly
1270    // by generic optimization passes.  We will also need to make sure that
1271    // all of our target-specific passes that run after regalloc and before
1272    // the CFGStructurizer track liveness and we will need to modify this pass
1273    // to correctly track liveness.
1274    //
1275    // After the above changes, the new CFG should look like this:
1276    //                        entry
1277    //                       /     |
1278    //           diamond_head       branch_from
1279    //                       \     /
1280    //                      new_block
1281    //                      /      |
1282    //         diamond_false        diamond_true
1283    //                      \      /
1284    //                        done
1285    //
1286    // Without this optimization, we are forced to duplicate the diamond_true
1287    // block and we will end up with a CFG like this:
1288    //
1289    //                        entry
1290    //                       /     |
1291    //           diamond_head       branch_from
1292    //             /      \                   |
1293    // diamond_false        diamond_true      diamond_true (duplicate)
1294    //             \      /                   |
1295    //               done --------------------|
1296    //
1297    // Duplicating diamond_true can be very costly especially if it has a
1298    // lot of instructions.
1299    return 0;
1300  }
1301
1302  int NumNewBlk = 0;
1303
1304  bool LandBlkHasOtherPred = (LandBlk->pred_size() > 2);
1305
1306  //insert R600::ENDIF to avoid special case "input landBlk == NULL"
1307  MachineBasicBlock::iterator I = insertInstrBefore(LandBlk, R600::ENDIF);
1308
1309  if (LandBlkHasOtherPred) {
1310    report_fatal_error("Extra register needed to handle CFG");
1311    Register CmpResReg =
1312        HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC);
1313    report_fatal_error("Extra compare instruction needed to handle CFG");
1314    insertCondBranchBefore(LandBlk, I, R600::IF_PREDICATE_SET,
1315        CmpResReg, DebugLoc());
1316  }
1317
1318  // XXX: We are running this after RA, so creating virtual registers will
1319  // cause an assertion failure in the PostRA scheduling pass.
1320  Register InitReg =
1321      HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC);
1322  insertCondBranchBefore(LandBlk, I, R600::IF_PREDICATE_SET, InitReg,
1323      DebugLoc());
1324
1325  if (MigrateTrue) {
1326    migrateInstruction(TrueMBB, LandBlk, I);
1327    // need to uncondionally insert the assignment to ensure a path from its
1328    // predecessor rather than headBlk has valid value in initReg if
1329    // (initVal != 1).
1330    report_fatal_error("Extra register needed to handle CFG");
1331  }
1332  insertInstrBefore(I, R600::ELSE);
1333
1334  if (MigrateFalse) {
1335    migrateInstruction(FalseMBB, LandBlk, I);
1336    // need to uncondionally insert the assignment to ensure a path from its
1337    // predecessor rather than headBlk has valid value in initReg if
1338    // (initVal != 0)
1339    report_fatal_error("Extra register needed to handle CFG");
1340  }
1341
1342  if (LandBlkHasOtherPred) {
1343    // add endif
1344    insertInstrBefore(I, R600::ENDIF);
1345
1346    // put initReg = 2 to other predecessors of landBlk
1347    for (MachineBasicBlock::pred_iterator PI = LandBlk->pred_begin(),
1348         PE = LandBlk->pred_end(); PI != PE; ++PI) {
1349      MachineBasicBlock *MBB = *PI;
1350      if (MBB != TrueMBB && MBB != FalseMBB)
1351        report_fatal_error("Extra register needed to handle CFG");
1352    }
1353  }
1354  LLVM_DEBUG(
1355      dbgs() << "result from improveSimpleJumpintoIf: ";
1356      showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0););
1357
1358  // update landBlk
1359  *LandMBBPtr = LandBlk;
1360
1361  return NumNewBlk;
1362}
1363
1364void AMDGPUCFGStructurizer::mergeSerialBlock(MachineBasicBlock *DstMBB,
1365    MachineBasicBlock *SrcMBB) {
1366  LLVM_DEBUG(dbgs() << "serialPattern BB" << DstMBB->getNumber() << " <= BB"
1367                    << SrcMBB->getNumber() << "\n";);
1368  DstMBB->splice(DstMBB->end(), SrcMBB, SrcMBB->begin(), SrcMBB->end());
1369
1370  DstMBB->removeSuccessor(SrcMBB, true);
1371  cloneSuccessorList(DstMBB, SrcMBB);
1372
1373  removeSuccessor(SrcMBB);
1374  MLI->removeBlock(SrcMBB);
1375  retireBlock(SrcMBB);
1376}
1377
1378void AMDGPUCFGStructurizer::mergeIfthenelseBlock(MachineInstr *BranchMI,
1379    MachineBasicBlock *MBB, MachineBasicBlock *TrueMBB,
1380    MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB) {
1381  assert (TrueMBB);
1382  LLVM_DEBUG(dbgs() << "ifPattern BB" << MBB->getNumber(); dbgs() << "{  ";
1383             if (TrueMBB) { dbgs() << "BB" << TrueMBB->getNumber(); } dbgs()
1384             << "  } else ";
1385             dbgs() << "{  "; if (FalseMBB) {
1386               dbgs() << "BB" << FalseMBB->getNumber();
1387             } dbgs() << "  }\n ";
1388             dbgs() << "landBlock: "; if (!LandMBB) { dbgs() << "NULL"; } else {
1389               dbgs() << "BB" << LandMBB->getNumber();
1390             } dbgs() << "\n";);
1391
1392  int OldOpcode = BranchMI->getOpcode();
1393  DebugLoc BranchDL = BranchMI->getDebugLoc();
1394
1395//    transform to
1396//    if cond
1397//       trueBlk
1398//    else
1399//       falseBlk
1400//    endif
1401//    landBlk
1402
1403  MachineBasicBlock::iterator I = BranchMI;
1404  insertCondBranchBefore(I, getBranchNzeroOpcode(OldOpcode),
1405      BranchDL);
1406
1407  if (TrueMBB) {
1408    MBB->splice(I, TrueMBB, TrueMBB->begin(), TrueMBB->end());
1409    MBB->removeSuccessor(TrueMBB, true);
1410    if (LandMBB && TrueMBB->succ_size()!=0)
1411      TrueMBB->removeSuccessor(LandMBB, true);
1412    retireBlock(TrueMBB);
1413    MLI->removeBlock(TrueMBB);
1414  }
1415
1416  if (FalseMBB) {
1417    insertInstrBefore(I, R600::ELSE);
1418    MBB->splice(I, FalseMBB, FalseMBB->begin(),
1419                   FalseMBB->end());
1420    MBB->removeSuccessor(FalseMBB, true);
1421    if (LandMBB && FalseMBB->succ_size() != 0)
1422      FalseMBB->removeSuccessor(LandMBB, true);
1423    retireBlock(FalseMBB);
1424    MLI->removeBlock(FalseMBB);
1425  }
1426  insertInstrBefore(I, R600::ENDIF);
1427
1428  BranchMI->eraseFromParent();
1429
1430  if (LandMBB && TrueMBB && FalseMBB)
1431    MBB->addSuccessor(LandMBB);
1432}
1433
1434void AMDGPUCFGStructurizer::mergeLooplandBlock(MachineBasicBlock *DstBlk,
1435    MachineBasicBlock *LandMBB) {
1436  LLVM_DEBUG(dbgs() << "loopPattern header = BB" << DstBlk->getNumber()
1437                    << " land = BB" << LandMBB->getNumber() << "\n";);
1438
1439  insertInstrBefore(DstBlk, R600::WHILELOOP, DebugLoc());
1440  insertInstrEnd(DstBlk, R600::ENDLOOP, DebugLoc());
1441  DstBlk->replaceSuccessor(DstBlk, LandMBB);
1442}
1443
1444void AMDGPUCFGStructurizer::mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB,
1445    MachineBasicBlock *LandMBB) {
1446  LLVM_DEBUG(dbgs() << "loopbreakPattern exiting = BB"
1447                    << ExitingMBB->getNumber() << " land = BB"
1448                    << LandMBB->getNumber() << "\n";);
1449  MachineInstr *BranchMI = getLoopendBlockBranchInstr(ExitingMBB);
1450  assert(BranchMI && isCondBranch(BranchMI));
1451  DebugLoc DL = BranchMI->getDebugLoc();
1452  MachineBasicBlock *TrueBranch = getTrueBranch(BranchMI);
1453  MachineBasicBlock::iterator I = BranchMI;
1454  if (TrueBranch != LandMBB)
1455    reversePredicateSetter(I, *I->getParent());
1456  insertCondBranchBefore(ExitingMBB, I, R600::IF_PREDICATE_SET, R600::PREDICATE_BIT, DL);
1457  insertInstrBefore(I, R600::BREAK);
1458  insertInstrBefore(I, R600::ENDIF);
1459  //now branchInst can be erase safely
1460  BranchMI->eraseFromParent();
1461  //now take care of successors, retire blocks
1462  ExitingMBB->removeSuccessor(LandMBB, true);
1463}
1464
1465void AMDGPUCFGStructurizer::settleLoopcontBlock(MachineBasicBlock *ContingMBB,
1466    MachineBasicBlock *ContMBB) {
1467  LLVM_DEBUG(dbgs() << "settleLoopcontBlock conting = BB"
1468                    << ContingMBB->getNumber() << ", cont = BB"
1469                    << ContMBB->getNumber() << "\n";);
1470
1471  MachineInstr *MI = getLoopendBlockBranchInstr(ContingMBB);
1472  if (MI) {
1473    assert(isCondBranch(MI));
1474    MachineBasicBlock::iterator I = MI;
1475    MachineBasicBlock *TrueBranch = getTrueBranch(MI);
1476    int OldOpcode = MI->getOpcode();
1477    DebugLoc DL = MI->getDebugLoc();
1478
1479    bool UseContinueLogical = ((&*ContingMBB->rbegin()) == MI);
1480
1481    if (!UseContinueLogical) {
1482      int BranchOpcode =
1483          TrueBranch == ContMBB ? getBranchNzeroOpcode(OldOpcode) :
1484          getBranchZeroOpcode(OldOpcode);
1485      insertCondBranchBefore(I, BranchOpcode, DL);
1486      // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
1487      insertInstrEnd(ContingMBB, R600::CONTINUE, DL);
1488      insertInstrEnd(ContingMBB, R600::ENDIF, DL);
1489    } else {
1490      int BranchOpcode =
1491          TrueBranch == ContMBB ? getContinueNzeroOpcode(OldOpcode) :
1492          getContinueZeroOpcode(OldOpcode);
1493      insertCondBranchBefore(I, BranchOpcode, DL);
1494    }
1495
1496    MI->eraseFromParent();
1497  } else {
1498    // if we've arrived here then we've already erased the branch instruction
1499    // travel back up the basic block to see the last reference of our debug
1500    // location we've just inserted that reference here so it should be
1501    // representative insertEnd to ensure phi-moves, if exist, go before the
1502    // continue-instr.
1503    insertInstrEnd(ContingMBB, R600::CONTINUE,
1504        getLastDebugLocInBB(ContingMBB));
1505  }
1506}
1507
1508int AMDGPUCFGStructurizer::cloneOnSideEntryTo(MachineBasicBlock *PreMBB,
1509    MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB) {
1510  int Cloned = 0;
1511  assert(PreMBB->isSuccessor(SrcMBB));
1512  while (SrcMBB && SrcMBB != DstMBB) {
1513    assert(SrcMBB->succ_size() == 1);
1514    if (SrcMBB->pred_size() > 1) {
1515      SrcMBB = cloneBlockForPredecessor(SrcMBB, PreMBB);
1516      ++Cloned;
1517    }
1518
1519    PreMBB = SrcMBB;
1520    SrcMBB = *SrcMBB->succ_begin();
1521  }
1522
1523  return Cloned;
1524}
1525
1526MachineBasicBlock *
1527AMDGPUCFGStructurizer::cloneBlockForPredecessor(MachineBasicBlock *MBB,
1528    MachineBasicBlock *PredMBB) {
1529  assert(PredMBB->isSuccessor(MBB) &&
1530         "succBlk is not a prececessor of curBlk");
1531
1532  MachineBasicBlock *CloneMBB = clone(MBB);  //clone instructions
1533  replaceInstrUseOfBlockWith(PredMBB, MBB, CloneMBB);
1534  //srcBlk, oldBlk, newBlk
1535
1536  PredMBB->replaceSuccessor(MBB, CloneMBB);
1537
1538  // add all successor to cloneBlk
1539  cloneSuccessorList(CloneMBB, MBB);
1540
1541  numClonedInstr += MBB->size();
1542
1543  LLVM_DEBUG(dbgs() << "Cloned block: "
1544                    << "BB" << MBB->getNumber() << "size " << MBB->size()
1545                    << "\n";);
1546
1547  SHOWNEWBLK(CloneMBB, "result of Cloned block: ");
1548
1549  return CloneMBB;
1550}
1551
1552void AMDGPUCFGStructurizer::migrateInstruction(MachineBasicBlock *SrcMBB,
1553    MachineBasicBlock *DstMBB, MachineBasicBlock::iterator I) {
1554  MachineBasicBlock::iterator SpliceEnd;
1555  //look for the input branchinstr, not the AMDGPU branchinstr
1556  MachineInstr *BranchMI = getNormalBlockBranchInstr(SrcMBB);
1557  if (!BranchMI) {
1558    LLVM_DEBUG(dbgs() << "migrateInstruction don't see branch instr\n";);
1559    SpliceEnd = SrcMBB->end();
1560  } else {
1561    LLVM_DEBUG(dbgs() << "migrateInstruction see branch instr: " << *BranchMI);
1562    SpliceEnd = BranchMI;
1563  }
1564  LLVM_DEBUG(dbgs() << "migrateInstruction before splice dstSize = "
1565                    << DstMBB->size() << "srcSize = " << SrcMBB->size()
1566                    << "\n";);
1567
1568  //splice insert before insertPos
1569  DstMBB->splice(I, SrcMBB, SrcMBB->begin(), SpliceEnd);
1570
1571  LLVM_DEBUG(dbgs() << "migrateInstruction after splice dstSize = "
1572                    << DstMBB->size() << "srcSize = " << SrcMBB->size()
1573                    << '\n';);
1574}
1575
1576MachineBasicBlock *
1577AMDGPUCFGStructurizer::normalizeInfiniteLoopExit(MachineLoop* LoopRep) {
1578  MachineBasicBlock *LoopHeader = LoopRep->getHeader();
1579  MachineBasicBlock *LoopLatch = LoopRep->getLoopLatch();
1580
1581  if (!LoopHeader || !LoopLatch)
1582    return nullptr;
1583  MachineInstr *BranchMI = getLoopendBlockBranchInstr(LoopLatch);
1584  // Is LoopRep an infinite loop ?
1585  if (!BranchMI || !isUncondBranch(BranchMI))
1586    return nullptr;
1587
1588  MachineBasicBlock *DummyExitBlk = FuncRep->CreateMachineBasicBlock();
1589  FuncRep->push_back(DummyExitBlk);  //insert to function
1590  SHOWNEWBLK(DummyExitBlk, "DummyExitBlock to normalize infiniteLoop: ");
1591  LLVM_DEBUG(dbgs() << "Old branch instr: " << *BranchMI << "\n";);
1592  LLVMContext &Ctx = LoopHeader->getParent()->getFunction().getContext();
1593  Ctx.emitError("Extra register needed to handle CFG");
1594  return nullptr;
1595}
1596
1597void AMDGPUCFGStructurizer::removeUnconditionalBranch(MachineBasicBlock *MBB) {
1598  MachineInstr *BranchMI;
1599
1600  // I saw two unconditional branch in one basic block in example
1601  // test_fc_do_while_or.c need to fix the upstream on this to remove the loop.
1602  while ((BranchMI = getLoopendBlockBranchInstr(MBB))
1603          && isUncondBranch(BranchMI)) {
1604    LLVM_DEBUG(dbgs() << "Removing uncond branch instr: " << *BranchMI);
1605    BranchMI->eraseFromParent();
1606  }
1607}
1608
1609void AMDGPUCFGStructurizer::removeRedundantConditionalBranch(
1610    MachineBasicBlock *MBB) {
1611  if (MBB->succ_size() != 2)
1612    return;
1613  MachineBasicBlock *MBB1 = *MBB->succ_begin();
1614  MachineBasicBlock *MBB2 = *std::next(MBB->succ_begin());
1615  if (MBB1 != MBB2)
1616    return;
1617
1618  MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB);
1619  assert(BranchMI && isCondBranch(BranchMI));
1620  LLVM_DEBUG(dbgs() << "Removing unneeded cond branch instr: " << *BranchMI);
1621  BranchMI->eraseFromParent();
1622  SHOWNEWBLK(MBB1, "Removing redundant successor");
1623  MBB->removeSuccessor(MBB1, true);
1624}
1625
1626void AMDGPUCFGStructurizer::addDummyExitBlock(
1627    SmallVectorImpl<MachineBasicBlock*> &RetMBB) {
1628  MachineBasicBlock *DummyExitBlk = FuncRep->CreateMachineBasicBlock();
1629  FuncRep->push_back(DummyExitBlk);  //insert to function
1630  insertInstrEnd(DummyExitBlk, R600::RETURN);
1631
1632  for (SmallVectorImpl<MachineBasicBlock *>::iterator It = RetMBB.begin(),
1633       E = RetMBB.end(); It != E; ++It) {
1634    MachineBasicBlock *MBB = *It;
1635    MachineInstr *MI = getReturnInstr(MBB);
1636    if (MI)
1637      MI->eraseFromParent();
1638    MBB->addSuccessor(DummyExitBlk);
1639    LLVM_DEBUG(dbgs() << "Add dummyExitBlock to BB" << MBB->getNumber()
1640                      << " successors\n";);
1641  }
1642  SHOWNEWBLK(DummyExitBlk, "DummyExitBlock: ");
1643}
1644
1645void AMDGPUCFGStructurizer::removeSuccessor(MachineBasicBlock *MBB) {
1646  while (MBB->succ_size())
1647    MBB->removeSuccessor(*MBB->succ_begin());
1648}
1649
1650void AMDGPUCFGStructurizer::recordSccnum(MachineBasicBlock *MBB,
1651    int SccNum) {
1652  BlockInformation *&srcBlkInfo = BlockInfoMap[MBB];
1653  if (!srcBlkInfo)
1654    srcBlkInfo = new BlockInformation();
1655  srcBlkInfo->SccNum = SccNum;
1656}
1657
1658void AMDGPUCFGStructurizer::retireBlock(MachineBasicBlock *MBB) {
1659  LLVM_DEBUG(dbgs() << "Retiring BB" << MBB->getNumber() << "\n";);
1660
1661  BlockInformation *&SrcBlkInfo = BlockInfoMap[MBB];
1662
1663  if (!SrcBlkInfo)
1664    SrcBlkInfo = new BlockInformation();
1665
1666  SrcBlkInfo->IsRetired = true;
1667  assert(MBB->succ_size() == 0 && MBB->pred_size() == 0
1668         && "can't retire block yet");
1669}
1670
1671INITIALIZE_PASS_BEGIN(AMDGPUCFGStructurizer, "amdgpustructurizer",
1672                      "AMDGPU CFG Structurizer", false, false)
1673INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
1674INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
1675INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
1676INITIALIZE_PASS_END(AMDGPUCFGStructurizer, "amdgpustructurizer",
1677                      "AMDGPU CFG Structurizer", false, false)
1678
1679FunctionPass *llvm::createAMDGPUCFGStructurizerPass() {
1680  return new AMDGPUCFGStructurizer();
1681}
1682