1//===-- AMDILCFGStructurizer.cpp - CFG Structurizer -----------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8/// \file
9//==-----------------------------------------------------------------------===//
10
11#define DEBUGME 0
12#define DEBUG_TYPE "structcfg"
13
14#include "AMDGPUInstrInfo.h"
15#include "AMDIL.h"
16#include "llvm/ADT/SCCIterator.h"
17#include "llvm/ADT/SmallVector.h"
18#include "llvm/ADT/Statistic.h"
19#include "llvm/Analysis/DominatorInternals.h"
20#include "llvm/Analysis/Dominators.h"
21#include "llvm/CodeGen/MachineDominators.h"
22#include "llvm/CodeGen/MachineFunction.h"
23#include "llvm/CodeGen/MachineFunctionAnalysis.h"
24#include "llvm/CodeGen/MachineFunctionPass.h"
25#include "llvm/CodeGen/MachineInstrBuilder.h"
26#include "llvm/CodeGen/MachineJumpTableInfo.h"
27#include "llvm/CodeGen/MachineLoopInfo.h"
28#include "llvm/CodeGen/MachinePostDominators.h"
29#include "llvm/CodeGen/MachineRegisterInfo.h"
30#include "llvm/Target/TargetInstrInfo.h"
31
32using namespace llvm;
33
34// TODO: move-begin.
35
36//===----------------------------------------------------------------------===//
37//
38// Statistics for CFGStructurizer.
39//
40//===----------------------------------------------------------------------===//
41
42STATISTIC(numSerialPatternMatch,    "CFGStructurizer number of serial pattern "
43    "matched");
44STATISTIC(numIfPatternMatch,        "CFGStructurizer number of if pattern "
45    "matched");
46STATISTIC(numLoopbreakPatternMatch, "CFGStructurizer number of loop-break "
47    "pattern matched");
48STATISTIC(numLoopcontPatternMatch,  "CFGStructurizer number of loop-continue "
49    "pattern matched");
50STATISTIC(numLoopPatternMatch,      "CFGStructurizer number of loop pattern "
51    "matched");
52STATISTIC(numClonedBlock,           "CFGStructurizer cloned blocks");
53STATISTIC(numClonedInstr,           "CFGStructurizer cloned instructions");
54
55//===----------------------------------------------------------------------===//
56//
57// Miscellaneous utility for CFGStructurizer.
58//
59//===----------------------------------------------------------------------===//
60namespace llvmCFGStruct {
61#define SHOWNEWINSTR(i) \
62  if (DEBUGME) errs() << "New instr: " << *i << "\n"
63
64#define SHOWNEWBLK(b, msg) \
65if (DEBUGME) { \
66  errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
67  errs() << "\n"; \
68}
69
70#define SHOWBLK_DETAIL(b, msg) \
71if (DEBUGME) { \
72  if (b) { \
73  errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
74  b->print(errs()); \
75  errs() << "\n"; \
76  } \
77}
78
79#define INVALIDSCCNUM -1
80#define INVALIDREGNUM 0
81
82template<class LoopinfoT>
83void PrintLoopinfo(const LoopinfoT &LoopInfo, llvm::raw_ostream &OS) {
84  for (typename LoopinfoT::iterator iter = LoopInfo.begin(),
85       iterEnd = LoopInfo.end();
86       iter != iterEnd; ++iter) {
87    (*iter)->print(OS, 0);
88  }
89}
90
91template<class NodeT>
92void ReverseVector(SmallVector<NodeT *, DEFAULT_VEC_SLOTS> &Src) {
93  size_t sz = Src.size();
94  for (size_t i = 0; i < sz/2; ++i) {
95    NodeT *t = Src[i];
96    Src[i] = Src[sz - i - 1];
97    Src[sz - i - 1] = t;
98  }
99}
100
101} //end namespace llvmCFGStruct
102
103//===----------------------------------------------------------------------===//
104//
105// supporting data structure for CFGStructurizer
106//
107//===----------------------------------------------------------------------===//
108
109namespace llvmCFGStruct {
110template<class PassT>
111struct CFGStructTraits {
112};
113
114template <class InstrT>
115class BlockInformation {
116public:
117  bool isRetired;
118  int  sccNum;
119  //SmallVector<InstrT*, DEFAULT_VEC_SLOTS> succInstr;
120  //Instructions defining the corresponding successor.
121  BlockInformation() : isRetired(false), sccNum(INVALIDSCCNUM) {}
122};
123
124template <class BlockT, class InstrT, class RegiT>
125class LandInformation {
126public:
127  BlockT *landBlk;
128  std::set<RegiT> breakInitRegs;  //Registers that need to "reg = 0", before
129                                  //WHILELOOP(thisloop) init before entering
130                                  //thisloop.
131  std::set<RegiT> contInitRegs;   //Registers that need to "reg = 0", after
132                                  //WHILELOOP(thisloop) init after entering
133                                  //thisloop.
134  std::set<RegiT> endbranchInitRegs; //Init before entering this loop, at loop
135                                     //land block, branch cond on this reg.
136  std::set<RegiT> breakOnRegs;       //registers that need to "if (reg) break
137                                     //endif" after ENDLOOP(thisloop) break
138                                     //outerLoopOf(thisLoop).
139  std::set<RegiT> contOnRegs;       //registers that need to "if (reg) continue
140                                    //endif" after ENDLOOP(thisloop) continue on
141                                    //outerLoopOf(thisLoop).
142  LandInformation() : landBlk(NULL) {}
143};
144
145} //end of namespace llvmCFGStruct
146
147//===----------------------------------------------------------------------===//
148//
149// CFGStructurizer
150//
151//===----------------------------------------------------------------------===//
152
153namespace llvmCFGStruct {
154// bixia TODO: port it to BasicBlock, not just MachineBasicBlock.
155template<class PassT>
156class  CFGStructurizer {
157public:
158  typedef enum {
159    Not_SinglePath = 0,
160    SinglePath_InPath = 1,
161    SinglePath_NotInPath = 2
162  } PathToKind;
163
164public:
165  typedef typename PassT::InstructionType         InstrT;
166  typedef typename PassT::FunctionType            FuncT;
167  typedef typename PassT::DominatortreeType       DomTreeT;
168  typedef typename PassT::PostDominatortreeType   PostDomTreeT;
169  typedef typename PassT::DomTreeNodeType         DomTreeNodeT;
170  typedef typename PassT::LoopinfoType            LoopInfoT;
171
172  typedef GraphTraits<FuncT *>                    FuncGTraits;
173  //typedef FuncGTraits::nodes_iterator BlockIterator;
174  typedef typename FuncT::iterator                BlockIterator;
175
176  typedef typename FuncGTraits::NodeType          BlockT;
177  typedef GraphTraits<BlockT *>                   BlockGTraits;
178  typedef GraphTraits<Inverse<BlockT *> >         InvBlockGTraits;
179  //typedef BlockGTraits::succ_iterator InstructionIterator;
180  typedef typename BlockT::iterator               InstrIterator;
181
182  typedef CFGStructTraits<PassT>                  CFGTraits;
183  typedef BlockInformation<InstrT>                BlockInfo;
184  typedef std::map<BlockT *, BlockInfo *>         BlockInfoMap;
185
186  typedef int                                     RegiT;
187  typedef typename PassT::LoopType                LoopT;
188  typedef LandInformation<BlockT, InstrT, RegiT>  LoopLandInfo;
189        typedef std::map<LoopT *, LoopLandInfo *> LoopLandInfoMap;
190        //landing info for loop break
191  typedef SmallVector<BlockT *, 32>               BlockTSmallerVector;
192
193public:
194  CFGStructurizer();
195  ~CFGStructurizer();
196
197  /// Perform the CFG structurization
198  bool run(FuncT &Func, PassT &Pass, const AMDGPURegisterInfo *tri);
199
200  /// Perform the CFG preparation
201  bool prepare(FuncT &Func, PassT &Pass, const AMDGPURegisterInfo *tri);
202
203private:
204  void reversePredicateSetter(typename BlockT::iterator);
205  void   orderBlocks();
206  void   printOrderedBlocks(llvm::raw_ostream &OS);
207  int patternMatch(BlockT *CurBlock);
208  int patternMatchGroup(BlockT *CurBlock);
209
210  int serialPatternMatch(BlockT *CurBlock);
211  int ifPatternMatch(BlockT *CurBlock);
212  int switchPatternMatch(BlockT *CurBlock);
213  int loopendPatternMatch(BlockT *CurBlock);
214  int loopPatternMatch(BlockT *CurBlock);
215
216  int loopbreakPatternMatch(LoopT *LoopRep, BlockT *LoopHeader);
217  int loopcontPatternMatch(LoopT *LoopRep, BlockT *LoopHeader);
218  //int loopWithoutBreak(BlockT *);
219
220  void handleLoopbreak (BlockT *ExitingBlock, LoopT *ExitingLoop,
221                        BlockT *ExitBlock, LoopT *exitLoop, BlockT *landBlock);
222  void handleLoopcontBlock(BlockT *ContingBlock, LoopT *contingLoop,
223                           BlockT *ContBlock, LoopT *contLoop);
224  bool isSameloopDetachedContbreak(BlockT *Src1Block, BlockT *Src2Block);
225  int handleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
226                       BlockT *FalseBlock);
227  int handleJumpintoIfImp(BlockT *HeadBlock, BlockT *TrueBlock,
228                          BlockT *FalseBlock);
229  int improveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
230                              BlockT *FalseBlock, BlockT **LandBlockPtr);
231  void showImproveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
232                                   BlockT *FalseBlock, BlockT *LandBlock,
233                                   bool Detail = false);
234  PathToKind singlePathTo(BlockT *SrcBlock, BlockT *DstBlock,
235                          bool AllowSideEntry = true);
236  BlockT *singlePathEnd(BlockT *srcBlock, BlockT *DstBlock,
237                        bool AllowSideEntry = true);
238  int cloneOnSideEntryTo(BlockT *PreBlock, BlockT *SrcBlock, BlockT *DstBlock);
239  void mergeSerialBlock(BlockT *DstBlock, BlockT *srcBlock);
240
241  void mergeIfthenelseBlock(InstrT *BranchInstr, BlockT *CurBlock,
242                            BlockT *TrueBlock, BlockT *FalseBlock,
243                            BlockT *LandBlock);
244  void mergeLooplandBlock(BlockT *DstBlock, LoopLandInfo *LoopLand);
245  void mergeLoopbreakBlock(BlockT *ExitingBlock, BlockT *ExitBlock,
246                           BlockT *ExitLandBlock, RegiT SetReg);
247  void settleLoopcontBlock(BlockT *ContingBlock, BlockT *ContBlock,
248                           RegiT SetReg);
249  BlockT *relocateLoopcontBlock(LoopT *ParentLoopRep, LoopT *LoopRep,
250                                std::set<BlockT*> &ExitBlockSet,
251                                BlockT *ExitLandBlk);
252  BlockT *addLoopEndbranchBlock(LoopT *LoopRep,
253                                BlockTSmallerVector &ExitingBlocks,
254                                BlockTSmallerVector &ExitBlocks);
255  BlockT *normalizeInfiniteLoopExit(LoopT *LoopRep);
256  void removeUnconditionalBranch(BlockT *SrcBlock);
257  void removeRedundantConditionalBranch(BlockT *SrcBlock);
258  void addDummyExitBlock(SmallVector<BlockT *, DEFAULT_VEC_SLOTS> &RetBlocks);
259
260  void removeSuccessor(BlockT *SrcBlock);
261  BlockT *cloneBlockForPredecessor(BlockT *CurBlock, BlockT *PredBlock);
262  BlockT *exitingBlock2ExitBlock (LoopT *LoopRep, BlockT *exitingBlock);
263
264  void migrateInstruction(BlockT *SrcBlock, BlockT *DstBlock,
265                          InstrIterator InsertPos);
266
267  void recordSccnum(BlockT *SrcBlock, int SCCNum);
268  int getSCCNum(BlockT *srcBlk);
269
270  void retireBlock(BlockT *DstBlock, BlockT *SrcBlock);
271  bool isRetiredBlock(BlockT *SrcBlock);
272  bool isActiveLoophead(BlockT *CurBlock);
273  bool needMigrateBlock(BlockT *Block);
274
275  BlockT *recordLoopLandBlock(LoopT *LoopRep, BlockT *LandBlock,
276                              BlockTSmallerVector &exitBlocks,
277                              std::set<BlockT*> &ExitBlockSet);
278  void setLoopLandBlock(LoopT *LoopRep, BlockT *Block = NULL);
279  BlockT *getLoopLandBlock(LoopT *LoopRep);
280  LoopLandInfo *getLoopLandInfo(LoopT *LoopRep);
281
282  void addLoopBreakOnReg(LoopT *LoopRep, RegiT RegNum);
283  void addLoopContOnReg(LoopT *LoopRep, RegiT RegNum);
284  void addLoopBreakInitReg(LoopT *LoopRep, RegiT RegNum);
285  void addLoopContInitReg(LoopT *LoopRep, RegiT RegNum);
286  void addLoopEndbranchInitReg(LoopT *LoopRep, RegiT RegNum);
287
288  bool hasBackEdge(BlockT *curBlock);
289  unsigned getLoopDepth  (LoopT *LoopRep);
290  int countActiveBlock(
291    typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator IterStart,
292    typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator IterEnd);
293    BlockT *findNearestCommonPostDom(std::set<BlockT *>&);
294  BlockT *findNearestCommonPostDom(BlockT *Block1, BlockT *Block2);
295
296private:
297  DomTreeT *domTree;
298  PostDomTreeT *postDomTree;
299  LoopInfoT *loopInfo;
300  PassT *passRep;
301  FuncT *funcRep;
302
303  BlockInfoMap blockInfoMap;
304  LoopLandInfoMap loopLandInfoMap;
305  SmallVector<BlockT *, DEFAULT_VEC_SLOTS> orderedBlks;
306  const AMDGPURegisterInfo *TRI;
307
308};  //template class CFGStructurizer
309
310template<class PassT> CFGStructurizer<PassT>::CFGStructurizer()
311  : domTree(NULL), postDomTree(NULL), loopInfo(NULL) {
312}
313
314template<class PassT> CFGStructurizer<PassT>::~CFGStructurizer() {
315  for (typename BlockInfoMap::iterator I = blockInfoMap.begin(),
316       E = blockInfoMap.end(); I != E; ++I) {
317    delete I->second;
318  }
319}
320
321template<class PassT>
322bool CFGStructurizer<PassT>::prepare(FuncT &func, PassT &pass,
323                                     const AMDGPURegisterInfo * tri) {
324  passRep = &pass;
325  funcRep = &func;
326  TRI = tri;
327
328  bool changed = false;
329
330  //FIXME: if not reducible flow graph, make it so ???
331
332  if (DEBUGME) {
333        errs() << "AMDGPUCFGStructurizer::prepare\n";
334  }
335
336  loopInfo = CFGTraits::getLoopInfo(pass);
337  if (DEBUGME) {
338    errs() << "LoopInfo:\n";
339    PrintLoopinfo(*loopInfo, errs());
340  }
341
342  orderBlocks();
343  if (DEBUGME) {
344    errs() << "Ordered blocks:\n";
345    printOrderedBlocks(errs());
346  }
347
348  SmallVector<BlockT *, DEFAULT_VEC_SLOTS> retBlks;
349
350  for (typename LoopInfoT::iterator iter = loopInfo->begin(),
351       iterEnd = loopInfo->end();
352       iter != iterEnd; ++iter) {
353    LoopT* loopRep = (*iter);
354    BlockTSmallerVector exitingBlks;
355    loopRep->getExitingBlocks(exitingBlks);
356
357    if (exitingBlks.size() == 0) {
358      BlockT* dummyExitBlk = normalizeInfiniteLoopExit(loopRep);
359      if (dummyExitBlk != NULL)
360        retBlks.push_back(dummyExitBlk);
361    }
362  }
363
364  // Remove unconditional branch instr.
365  // Add dummy exit block iff there are multiple returns.
366
367  for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
368       iterBlk = orderedBlks.begin(), iterEndBlk = orderedBlks.end();
369       iterBlk != iterEndBlk;
370       ++iterBlk) {
371    BlockT *curBlk = *iterBlk;
372    removeUnconditionalBranch(curBlk);
373    removeRedundantConditionalBranch(curBlk);
374    if (CFGTraits::isReturnBlock(curBlk)) {
375      retBlks.push_back(curBlk);
376    }
377    assert(curBlk->succ_size() <= 2);
378  } //for
379
380  if (retBlks.size() >= 2) {
381    addDummyExitBlock(retBlks);
382    changed = true;
383  }
384
385  return changed;
386} //CFGStructurizer::prepare
387
388template<class PassT>
389bool CFGStructurizer<PassT>::run(FuncT &func, PassT &pass,
390    const AMDGPURegisterInfo * tri) {
391  passRep = &pass;
392  funcRep = &func;
393  TRI = tri;
394
395  //Assume reducible CFG...
396  if (DEBUGME) {
397    errs() << "AMDGPUCFGStructurizer::run\n";
398    func.viewCFG();
399  }
400
401  domTree = CFGTraits::getDominatorTree(pass);
402  if (DEBUGME) {
403    domTree->print(errs(), (const llvm::Module*)0);
404  }
405
406  postDomTree = CFGTraits::getPostDominatorTree(pass);
407  if (DEBUGME) {
408    postDomTree->print(errs());
409  }
410
411  loopInfo = CFGTraits::getLoopInfo(pass);
412  if (DEBUGME) {
413    errs() << "LoopInfo:\n";
414    PrintLoopinfo(*loopInfo, errs());
415  }
416
417  orderBlocks();
418#ifdef STRESSTEST
419  //Use the worse block ordering to test the algorithm.
420  ReverseVector(orderedBlks);
421#endif
422
423  if (DEBUGME) {
424    errs() << "Ordered blocks:\n";
425    printOrderedBlocks(errs());
426  }
427  int numIter = 0;
428  bool finish = false;
429  BlockT *curBlk;
430  bool makeProgress = false;
431  int numRemainedBlk = countActiveBlock(orderedBlks.begin(),
432                                        orderedBlks.end());
433
434  do {
435    ++numIter;
436    if (DEBUGME) {
437      errs() << "numIter = " << numIter
438             << ", numRemaintedBlk = " << numRemainedBlk << "\n";
439    }
440
441    typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
442      iterBlk = orderedBlks.begin();
443    typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
444      iterBlkEnd = orderedBlks.end();
445
446    typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
447      sccBeginIter = iterBlk;
448    BlockT *sccBeginBlk = NULL;
449    int sccNumBlk = 0;  // The number of active blocks, init to a
450                        // maximum possible number.
451    int sccNumIter;     // Number of iteration in this SCC.
452
453    while (iterBlk != iterBlkEnd) {
454      curBlk = *iterBlk;
455
456      if (sccBeginBlk == NULL) {
457        sccBeginIter = iterBlk;
458        sccBeginBlk = curBlk;
459        sccNumIter = 0;
460        sccNumBlk = numRemainedBlk; // Init to maximum possible number.
461        if (DEBUGME) {
462              errs() << "start processing SCC" << getSCCNum(sccBeginBlk);
463              errs() << "\n";
464        }
465      }
466
467      if (!isRetiredBlock(curBlk)) {
468        patternMatch(curBlk);
469      }
470
471      ++iterBlk;
472
473      bool contNextScc = true;
474      if (iterBlk == iterBlkEnd
475          || getSCCNum(sccBeginBlk) != getSCCNum(*iterBlk)) {
476        // Just finish one scc.
477        ++sccNumIter;
478        int sccRemainedNumBlk = countActiveBlock(sccBeginIter, iterBlk);
479        if (sccRemainedNumBlk != 1 && sccRemainedNumBlk >= sccNumBlk) {
480          if (DEBUGME) {
481            errs() << "Can't reduce SCC " << getSCCNum(curBlk)
482                   << ", sccNumIter = " << sccNumIter;
483            errs() << "doesn't make any progress\n";
484          }
485          contNextScc = true;
486        } else if (sccRemainedNumBlk != 1 && sccRemainedNumBlk < sccNumBlk) {
487          sccNumBlk = sccRemainedNumBlk;
488          iterBlk = sccBeginIter;
489          contNextScc = false;
490          if (DEBUGME) {
491            errs() << "repeat processing SCC" << getSCCNum(curBlk)
492                   << "sccNumIter = " << sccNumIter << "\n";
493            func.viewCFG();
494          }
495        } else {
496          // Finish the current scc.
497          contNextScc = true;
498        }
499      } else {
500        // Continue on next component in the current scc.
501        contNextScc = false;
502      }
503
504      if (contNextScc) {
505        sccBeginBlk = NULL;
506      }
507    } //while, "one iteration" over the function.
508
509    BlockT *entryBlk = FuncGTraits::nodes_begin(&func);
510    if (entryBlk->succ_size() == 0) {
511      finish = true;
512      if (DEBUGME) {
513        errs() << "Reduce to one block\n";
514      }
515    } else {
516      int newnumRemainedBlk
517        = countActiveBlock(orderedBlks.begin(), orderedBlks.end());
518      // consider cloned blocks ??
519      if (newnumRemainedBlk == 1 || newnumRemainedBlk < numRemainedBlk) {
520        makeProgress = true;
521        numRemainedBlk = newnumRemainedBlk;
522      } else {
523        makeProgress = false;
524        if (DEBUGME) {
525          errs() << "No progress\n";
526        }
527      }
528    }
529  } while (!finish && makeProgress);
530
531  // Misc wrap up to maintain the consistency of the Function representation.
532  CFGTraits::wrapup(FuncGTraits::nodes_begin(&func));
533
534  // Detach retired Block, release memory.
535  for (typename BlockInfoMap::iterator iterMap = blockInfoMap.begin(),
536       iterEndMap = blockInfoMap.end(); iterMap != iterEndMap; ++iterMap) {
537    if ((*iterMap).second && (*iterMap).second->isRetired) {
538      assert(((*iterMap).first)->getNumber() != -1);
539      if (DEBUGME) {
540        errs() << "Erase BB" << ((*iterMap).first)->getNumber() << "\n";
541      }
542      (*iterMap).first->eraseFromParent();  //Remove from the parent Function.
543    }
544    delete (*iterMap).second;
545  }
546  blockInfoMap.clear();
547
548  // clear loopLandInfoMap
549  for (typename LoopLandInfoMap::iterator iterMap = loopLandInfoMap.begin(),
550       iterEndMap = loopLandInfoMap.end(); iterMap != iterEndMap; ++iterMap) {
551    delete (*iterMap).second;
552  }
553  loopLandInfoMap.clear();
554
555  if (DEBUGME) {
556    func.viewCFG();
557  }
558
559  if (!finish) {
560    assert(!"IRREDUCIBL_CF");
561  }
562
563  return true;
564} //CFGStructurizer::run
565
566/// Print the ordered Blocks.
567///
568template<class PassT>
569void CFGStructurizer<PassT>::printOrderedBlocks(llvm::raw_ostream &os) {
570  size_t i = 0;
571  for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
572      iterBlk = orderedBlks.begin(), iterBlkEnd = orderedBlks.end();
573       iterBlk != iterBlkEnd;
574       ++iterBlk, ++i) {
575    os << "BB" << (*iterBlk)->getNumber();
576    os << "(" << getSCCNum(*iterBlk) << "," << (*iterBlk)->size() << ")";
577    if (i != 0 && i % 10 == 0) {
578      os << "\n";
579    } else {
580      os << " ";
581    }
582  }
583} //printOrderedBlocks
584
585/// Compute the reversed DFS post order of Blocks
586///
587template<class PassT> void CFGStructurizer<PassT>::orderBlocks() {
588  int sccNum = 0;
589  BlockT *bb;
590  for (scc_iterator<FuncT *> sccIter = scc_begin(funcRep),
591       sccEnd = scc_end(funcRep); sccIter != sccEnd; ++sccIter, ++sccNum) {
592    std::vector<BlockT *> &sccNext = *sccIter;
593    for (typename std::vector<BlockT *>::const_iterator
594         blockIter = sccNext.begin(), blockEnd = sccNext.end();
595         blockIter != blockEnd; ++blockIter) {
596      bb = *blockIter;
597      orderedBlks.push_back(bb);
598      recordSccnum(bb, sccNum);
599    }
600  }
601
602  //walk through all the block in func to check for unreachable
603  for (BlockIterator blockIter1 = FuncGTraits::nodes_begin(funcRep),
604       blockEnd1 = FuncGTraits::nodes_end(funcRep);
605       blockIter1 != blockEnd1; ++blockIter1) {
606    BlockT *bb = &(*blockIter1);
607    sccNum = getSCCNum(bb);
608    if (sccNum == INVALIDSCCNUM) {
609      errs() << "unreachable block BB" << bb->getNumber() << "\n";
610    }
611  }
612} //orderBlocks
613
614template<class PassT> int CFGStructurizer<PassT>::patternMatch(BlockT *curBlk) {
615  int numMatch = 0;
616  int curMatch;
617
618  if (DEBUGME) {
619        errs() << "Begin patternMatch BB" << curBlk->getNumber() << "\n";
620  }
621
622  while ((curMatch = patternMatchGroup(curBlk)) > 0) {
623    numMatch += curMatch;
624  }
625
626  if (DEBUGME) {
627        errs() << "End patternMatch BB" << curBlk->getNumber()
628      << ", numMatch = " << numMatch << "\n";
629  }
630
631  return numMatch;
632} //patternMatch
633
634template<class PassT>
635int CFGStructurizer<PassT>::patternMatchGroup(BlockT *curBlk) {
636  int numMatch = 0;
637  numMatch += serialPatternMatch(curBlk);
638  numMatch += ifPatternMatch(curBlk);
639  numMatch += loopendPatternMatch(curBlk);
640  numMatch += loopPatternMatch(curBlk);
641  return numMatch;
642}//patternMatchGroup
643
644template<class PassT>
645int CFGStructurizer<PassT>::serialPatternMatch(BlockT *curBlk) {
646  if (curBlk->succ_size() != 1) {
647    return 0;
648  }
649
650  BlockT *childBlk = *curBlk->succ_begin();
651  if (childBlk->pred_size() != 1 || isActiveLoophead(childBlk)) {
652    return 0;
653  }
654
655  mergeSerialBlock(curBlk, childBlk);
656  ++numSerialPatternMatch;
657  return 1;
658} //serialPatternMatch
659
660template<class PassT>
661int CFGStructurizer<PassT>::ifPatternMatch(BlockT *curBlk) {
662  //two edges
663  if (curBlk->succ_size() != 2) {
664    return 0;
665  }
666
667  if (hasBackEdge(curBlk)) {
668    return 0;
669  }
670
671  InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(curBlk);
672  if (branchInstr == NULL) {
673    return 0;
674  }
675
676  assert(CFGTraits::isCondBranch(branchInstr));
677
678  BlockT *trueBlk = CFGTraits::getTrueBranch(branchInstr);
679  BlockT *falseBlk = CFGTraits::getFalseBranch(curBlk, branchInstr);
680  BlockT *landBlk;
681  int cloned = 0;
682
683  // TODO: Simplify
684  if (trueBlk->succ_size() == 1 && falseBlk->succ_size() == 1
685    && *trueBlk->succ_begin() == *falseBlk->succ_begin()) {
686    landBlk = *trueBlk->succ_begin();
687  } else if (trueBlk->succ_size() == 0 && falseBlk->succ_size() == 0) {
688    landBlk = NULL;
689  } else if (trueBlk->succ_size() == 1 && *trueBlk->succ_begin() == falseBlk) {
690    landBlk = falseBlk;
691    falseBlk = NULL;
692  } else if (falseBlk->succ_size() == 1
693             && *falseBlk->succ_begin() == trueBlk) {
694    landBlk = trueBlk;
695    trueBlk = NULL;
696  } else if (falseBlk->succ_size() == 1
697             && isSameloopDetachedContbreak(trueBlk, falseBlk)) {
698    landBlk = *falseBlk->succ_begin();
699  } else if (trueBlk->succ_size() == 1
700    && isSameloopDetachedContbreak(falseBlk, trueBlk)) {
701    landBlk = *trueBlk->succ_begin();
702  } else {
703    return handleJumpintoIf(curBlk, trueBlk, falseBlk);
704  }
705
706  // improveSimpleJumpinfoIf can handle the case where landBlk == NULL but the
707  // new BB created for landBlk==NULL may introduce new challenge to the
708  // reduction process.
709  if (landBlk != NULL &&
710      ((trueBlk && trueBlk->pred_size() > 1)
711      || (falseBlk && falseBlk->pred_size() > 1))) {
712     cloned += improveSimpleJumpintoIf(curBlk, trueBlk, falseBlk, &landBlk);
713  }
714
715  if (trueBlk && trueBlk->pred_size() > 1) {
716    trueBlk = cloneBlockForPredecessor(trueBlk, curBlk);
717    ++cloned;
718  }
719
720  if (falseBlk && falseBlk->pred_size() > 1) {
721    falseBlk = cloneBlockForPredecessor(falseBlk, curBlk);
722    ++cloned;
723  }
724
725  mergeIfthenelseBlock(branchInstr, curBlk, trueBlk, falseBlk, landBlk);
726
727  ++numIfPatternMatch;
728
729  numClonedBlock += cloned;
730
731  return 1 + cloned;
732} //ifPatternMatch
733
734template<class PassT>
735int CFGStructurizer<PassT>::switchPatternMatch(BlockT *curBlk) {
736  return 0;
737} //switchPatternMatch
738
739template<class PassT>
740int CFGStructurizer<PassT>::loopendPatternMatch(BlockT *curBlk) {
741  LoopT *loopRep = loopInfo->getLoopFor(curBlk);
742  typename std::vector<LoopT *> nestedLoops;
743  while (loopRep) {
744    nestedLoops.push_back(loopRep);
745    loopRep = loopRep->getParentLoop();
746  }
747
748  if (nestedLoops.size() == 0) {
749    return 0;
750  }
751
752  // Process nested loop outside->inside, so "continue" to a outside loop won't
753  // be mistaken as "break" of the current loop.
754  int num = 0;
755  for (typename std::vector<LoopT *>::reverse_iterator
756       iter = nestedLoops.rbegin(), iterEnd = nestedLoops.rend();
757       iter != iterEnd; ++iter) {
758    loopRep = *iter;
759
760    if (getLoopLandBlock(loopRep) != NULL) {
761      continue;
762    }
763
764    BlockT *loopHeader = loopRep->getHeader();
765
766    int numBreak = loopbreakPatternMatch(loopRep, loopHeader);
767
768    if (numBreak == -1) {
769      break;
770    }
771
772    int numCont = loopcontPatternMatch(loopRep, loopHeader);
773    num += numBreak + numCont;
774  }
775
776  return num;
777} //loopendPatternMatch
778
779template<class PassT>
780int CFGStructurizer<PassT>::loopPatternMatch(BlockT *curBlk) {
781  if (curBlk->succ_size() != 0) {
782    return 0;
783  }
784
785  int numLoop = 0;
786  LoopT *loopRep = loopInfo->getLoopFor(curBlk);
787  while (loopRep && loopRep->getHeader() == curBlk) {
788    LoopLandInfo *loopLand = getLoopLandInfo(loopRep);
789    if (loopLand) {
790      BlockT *landBlk = loopLand->landBlk;
791      assert(landBlk);
792      if (!isRetiredBlock(landBlk)) {
793        mergeLooplandBlock(curBlk, loopLand);
794        ++numLoop;
795      }
796    }
797    loopRep = loopRep->getParentLoop();
798  }
799
800  numLoopPatternMatch += numLoop;
801
802  return numLoop;
803} //loopPatternMatch
804
805template<class PassT>
806int CFGStructurizer<PassT>::loopbreakPatternMatch(LoopT *loopRep,
807                                                  BlockT *loopHeader) {
808  BlockTSmallerVector exitingBlks;
809  loopRep->getExitingBlocks(exitingBlks);
810
811  if (DEBUGME) {
812    errs() << "Loop has " << exitingBlks.size() << " exiting blocks\n";
813  }
814
815  if (exitingBlks.size() == 0) {
816    setLoopLandBlock(loopRep);
817    return 0;
818  }
819
820  // Compute the corresponding exitBlks and exit block set.
821  BlockTSmallerVector exitBlks;
822  std::set<BlockT *> exitBlkSet;
823  for (typename BlockTSmallerVector::const_iterator iter = exitingBlks.begin(),
824       iterEnd = exitingBlks.end(); iter != iterEnd; ++iter) {
825    BlockT *exitingBlk = *iter;
826    BlockT *exitBlk = exitingBlock2ExitBlock(loopRep, exitingBlk);
827    exitBlks.push_back(exitBlk);
828    exitBlkSet.insert(exitBlk);  //non-duplicate insert
829  }
830
831  assert(exitBlkSet.size() > 0);
832  assert(exitBlks.size() == exitingBlks.size());
833
834  if (DEBUGME) {
835    errs() << "Loop has " << exitBlkSet.size() << " exit blocks\n";
836  }
837
838  // Find exitLandBlk.
839  BlockT *exitLandBlk = NULL;
840  int numCloned = 0;
841  int numSerial = 0;
842
843  if (exitBlkSet.size() == 1) {
844    exitLandBlk = *exitBlkSet.begin();
845  } else {
846    exitLandBlk = findNearestCommonPostDom(exitBlkSet);
847
848    if (exitLandBlk == NULL) {
849      return -1;
850    }
851
852    bool allInPath = true;
853    bool allNotInPath = true;
854    for (typename std::set<BlockT*>::const_iterator
855         iter = exitBlkSet.begin(),
856         iterEnd = exitBlkSet.end();
857         iter != iterEnd; ++iter) {
858      BlockT *exitBlk = *iter;
859
860      PathToKind pathKind = singlePathTo(exitBlk, exitLandBlk, true);
861      if (DEBUGME) {
862        errs() << "BB" << exitBlk->getNumber()
863               << " to BB" << exitLandBlk->getNumber() << " PathToKind="
864               << pathKind << "\n";
865      }
866
867      allInPath = allInPath && (pathKind == SinglePath_InPath);
868      allNotInPath = allNotInPath && (pathKind == SinglePath_NotInPath);
869
870      if (!allInPath && !allNotInPath) {
871        if (DEBUGME) {
872              errs() << "singlePath check fail\n";
873        }
874        return -1;
875      }
876    } // check all exit blocks
877
878    if (allNotInPath) {
879
880      // TODO: Simplify, maybe separate function?
881      LoopT *parentLoopRep = loopRep->getParentLoop();
882      BlockT *parentLoopHeader = NULL;
883      if (parentLoopRep)
884        parentLoopHeader = parentLoopRep->getHeader();
885
886      if (exitLandBlk == parentLoopHeader &&
887          (exitLandBlk = relocateLoopcontBlock(parentLoopRep,
888                                               loopRep,
889                                               exitBlkSet,
890                                               exitLandBlk)) != NULL) {
891        if (DEBUGME) {
892          errs() << "relocateLoopcontBlock success\n";
893        }
894      } else if ((exitLandBlk = addLoopEndbranchBlock(loopRep,
895                                                      exitingBlks,
896                                                      exitBlks)) != NULL) {
897        if (DEBUGME) {
898          errs() << "insertEndbranchBlock success\n";
899        }
900      } else {
901        if (DEBUGME) {
902          errs() << "loop exit fail\n";
903        }
904        return -1;
905      }
906    }
907
908    // Handle side entry to exit path.
909    exitBlks.clear();
910    exitBlkSet.clear();
911    for (typename BlockTSmallerVector::iterator iterExiting =
912           exitingBlks.begin(),
913         iterExitingEnd = exitingBlks.end();
914         iterExiting != iterExitingEnd; ++iterExiting) {
915      BlockT *exitingBlk = *iterExiting;
916      BlockT *exitBlk = exitingBlock2ExitBlock(loopRep, exitingBlk);
917      BlockT *newExitBlk = exitBlk;
918
919      if (exitBlk != exitLandBlk && exitBlk->pred_size() > 1) {
920        newExitBlk = cloneBlockForPredecessor(exitBlk, exitingBlk);
921        ++numCloned;
922      }
923
924      numCloned += cloneOnSideEntryTo(exitingBlk, newExitBlk, exitLandBlk);
925
926      exitBlks.push_back(newExitBlk);
927      exitBlkSet.insert(newExitBlk);
928    }
929
930    for (typename BlockTSmallerVector::iterator iterExit = exitBlks.begin(),
931         iterExitEnd = exitBlks.end();
932         iterExit != iterExitEnd; ++iterExit) {
933      BlockT *exitBlk = *iterExit;
934      numSerial += serialPatternMatch(exitBlk);
935    }
936
937    for (typename BlockTSmallerVector::iterator iterExit = exitBlks.begin(),
938         iterExitEnd = exitBlks.end();
939         iterExit != iterExitEnd; ++iterExit) {
940      BlockT *exitBlk = *iterExit;
941      if (exitBlk->pred_size() > 1) {
942        if (exitBlk != exitLandBlk) {
943          return -1;
944        }
945      } else {
946        if (exitBlk != exitLandBlk &&
947            (exitBlk->succ_size() != 1 ||
948            *exitBlk->succ_begin() != exitLandBlk)) {
949          return -1;
950        }
951      }
952    }
953  } // else
954
955  exitLandBlk = recordLoopLandBlock(loopRep, exitLandBlk, exitBlks, exitBlkSet);
956
957  // Fold break into the breaking block. Leverage across level breaks.
958  assert(exitingBlks.size() == exitBlks.size());
959  for (typename BlockTSmallerVector::const_iterator iterExit = exitBlks.begin(),
960       iterExiting = exitingBlks.begin(), iterExitEnd = exitBlks.end();
961       iterExit != iterExitEnd; ++iterExit, ++iterExiting) {
962    BlockT *exitBlk = *iterExit;
963    BlockT *exitingBlk = *iterExiting;
964    assert(exitBlk->pred_size() == 1 || exitBlk == exitLandBlk);
965    LoopT *exitingLoop = loopInfo->getLoopFor(exitingBlk);
966    handleLoopbreak(exitingBlk, exitingLoop, exitBlk, loopRep, exitLandBlk);
967  }
968
969  int numBreak = static_cast<int>(exitingBlks.size());
970  numLoopbreakPatternMatch += numBreak;
971  numClonedBlock += numCloned;
972  return numBreak + numSerial + numCloned;
973} //loopbreakPatternMatch
974
975template<class PassT>
976int CFGStructurizer<PassT>::loopcontPatternMatch(LoopT *loopRep,
977                                                 BlockT *loopHeader) {
978  int numCont = 0;
979  SmallVector<BlockT *, DEFAULT_VEC_SLOTS> contBlk;
980  for (typename InvBlockGTraits::ChildIteratorType iter =
981       InvBlockGTraits::child_begin(loopHeader),
982       iterEnd = InvBlockGTraits::child_end(loopHeader);
983       iter != iterEnd; ++iter) {
984    BlockT *curBlk = *iter;
985    if (loopRep->contains(curBlk)) {
986      handleLoopcontBlock(curBlk, loopInfo->getLoopFor(curBlk),
987                          loopHeader, loopRep);
988      contBlk.push_back(curBlk);
989      ++numCont;
990    }
991  }
992
993  for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::iterator
994       iter = contBlk.begin(), iterEnd = contBlk.end();
995       iter != iterEnd; ++iter) {
996    (*iter)->removeSuccessor(loopHeader);
997  }
998
999  numLoopcontPatternMatch += numCont;
1000
1001  return numCont;
1002} //loopcontPatternMatch
1003
1004
1005template<class PassT>
1006bool CFGStructurizer<PassT>::isSameloopDetachedContbreak(BlockT *src1Blk,
1007                                                         BlockT *src2Blk) {
1008  // return true iff src1Blk->succ_size() == 0 && src1Blk and src2Blk are in the
1009  // same loop with LoopLandInfo without explicitly keeping track of
1010  // loopContBlks and loopBreakBlks, this is a method to get the information.
1011  //
1012  if (src1Blk->succ_size() == 0) {
1013    LoopT *loopRep = loopInfo->getLoopFor(src1Blk);
1014    if (loopRep != NULL && loopRep == loopInfo->getLoopFor(src2Blk)) {
1015      LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
1016      if (theEntry != NULL) {
1017        if (DEBUGME) {
1018          errs() << "isLoopContBreakBlock yes src1 = BB"
1019                 << src1Blk->getNumber()
1020                 << " src2 = BB" << src2Blk->getNumber() << "\n";
1021        }
1022        return true;
1023      }
1024    }
1025  }
1026  return false;
1027}  //isSameloopDetachedContbreak
1028
1029template<class PassT>
1030int CFGStructurizer<PassT>::handleJumpintoIf(BlockT *headBlk,
1031                                             BlockT *trueBlk,
1032                                             BlockT *falseBlk) {
1033  int num = handleJumpintoIfImp(headBlk, trueBlk, falseBlk);
1034  if (num == 0) {
1035    if (DEBUGME) {
1036      errs() << "handleJumpintoIf swap trueBlk and FalseBlk" << "\n";
1037    }
1038    num = handleJumpintoIfImp(headBlk, falseBlk, trueBlk);
1039  }
1040  return num;
1041}
1042
1043template<class PassT>
1044int CFGStructurizer<PassT>::handleJumpintoIfImp(BlockT *headBlk,
1045                                                BlockT *trueBlk,
1046                                                BlockT *falseBlk) {
1047  int num = 0;
1048  BlockT *downBlk;
1049
1050  //trueBlk could be the common post dominator
1051  downBlk = trueBlk;
1052
1053  if (DEBUGME) {
1054    errs() << "handleJumpintoIfImp head = BB" << headBlk->getNumber()
1055           << " true = BB" << trueBlk->getNumber()
1056           << ", numSucc=" << trueBlk->succ_size()
1057           << " false = BB" << falseBlk->getNumber() << "\n";
1058  }
1059
1060  while (downBlk) {
1061    if (DEBUGME) {
1062      errs() << "check down = BB" << downBlk->getNumber();
1063    }
1064
1065    if (singlePathTo(falseBlk, downBlk) == SinglePath_InPath) {
1066      if (DEBUGME) {
1067        errs() << " working\n";
1068      }
1069
1070      num += cloneOnSideEntryTo(headBlk, trueBlk, downBlk);
1071      num += cloneOnSideEntryTo(headBlk, falseBlk, downBlk);
1072
1073      numClonedBlock += num;
1074      num += serialPatternMatch(*headBlk->succ_begin());
1075      num += serialPatternMatch(*(++headBlk->succ_begin()));
1076      num += ifPatternMatch(headBlk);
1077      assert(num > 0);
1078
1079      break;
1080    }
1081    if (DEBUGME) {
1082      errs() << " not working\n";
1083    }
1084    downBlk = (downBlk->succ_size() == 1) ? (*downBlk->succ_begin()) : NULL;
1085  } // walk down the postDomTree
1086
1087  return num;
1088} //handleJumpintoIf
1089
1090template<class PassT>
1091void CFGStructurizer<PassT>::showImproveSimpleJumpintoIf(BlockT *headBlk,
1092                                                         BlockT *trueBlk,
1093                                                         BlockT *falseBlk,
1094                                                         BlockT *landBlk,
1095                                                         bool detail) {
1096  errs() << "head = BB" << headBlk->getNumber()
1097         << " size = " << headBlk->size();
1098  if (detail) {
1099    errs() << "\n";
1100    headBlk->print(errs());
1101    errs() << "\n";
1102  }
1103
1104  if (trueBlk) {
1105    errs() << ", true = BB" << trueBlk->getNumber() << " size = "
1106           << trueBlk->size() << " numPred = " << trueBlk->pred_size();
1107    if (detail) {
1108      errs() << "\n";
1109      trueBlk->print(errs());
1110      errs() << "\n";
1111    }
1112  }
1113  if (falseBlk) {
1114    errs() << ", false = BB" << falseBlk->getNumber() << " size = "
1115           << falseBlk->size() << " numPred = " << falseBlk->pred_size();
1116    if (detail) {
1117      errs() << "\n";
1118      falseBlk->print(errs());
1119      errs() << "\n";
1120    }
1121  }
1122  if (landBlk) {
1123    errs() << ", land = BB" << landBlk->getNumber() << " size = "
1124           << landBlk->size() << " numPred = " << landBlk->pred_size();
1125    if (detail) {
1126      errs() << "\n";
1127      landBlk->print(errs());
1128      errs() << "\n";
1129    }
1130  }
1131
1132    errs() << "\n";
1133} //showImproveSimpleJumpintoIf
1134
1135template<class PassT>
1136int CFGStructurizer<PassT>::improveSimpleJumpintoIf(BlockT *headBlk,
1137                                                    BlockT *trueBlk,
1138                                                    BlockT *falseBlk,
1139                                                    BlockT **plandBlk) {
1140  bool migrateTrue = false;
1141  bool migrateFalse = false;
1142
1143  BlockT *landBlk = *plandBlk;
1144
1145  assert((trueBlk == NULL || trueBlk->succ_size() <= 1)
1146         && (falseBlk == NULL || falseBlk->succ_size() <= 1));
1147
1148  if (trueBlk == falseBlk) {
1149    return 0;
1150  }
1151
1152  migrateTrue = needMigrateBlock(trueBlk);
1153  migrateFalse = needMigrateBlock(falseBlk);
1154
1155  if (!migrateTrue && !migrateFalse) {
1156    return 0;
1157  }
1158
1159  // If we need to migrate either trueBlk and falseBlk, migrate the rest that
1160  // have more than one predecessors.  without doing this, its predecessor
1161  // rather than headBlk will have undefined value in initReg.
1162  if (!migrateTrue && trueBlk && trueBlk->pred_size() > 1) {
1163    migrateTrue = true;
1164  }
1165  if (!migrateFalse && falseBlk && falseBlk->pred_size() > 1) {
1166    migrateFalse = true;
1167  }
1168
1169  if (DEBUGME) {
1170    errs() << "before improveSimpleJumpintoIf: ";
1171    showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0);
1172  }
1173
1174  // org: headBlk => if () {trueBlk} else {falseBlk} => landBlk
1175  //
1176  // new: headBlk => if () {initReg = 1; org trueBlk branch} else
1177  //      {initReg = 0; org falseBlk branch }
1178  //      => landBlk => if (initReg) {org trueBlk} else {org falseBlk}
1179  //      => org landBlk
1180  //      if landBlk->pred_size() > 2, put the about if-else inside
1181  //      if (initReg !=2) {...}
1182  //
1183  // add initReg = initVal to headBlk
1184
1185  const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
1186  unsigned initReg =
1187    funcRep->getRegInfo().createVirtualRegister(I32RC);
1188  if (!migrateTrue || !migrateFalse) {
1189    int initVal = migrateTrue ? 0 : 1;
1190    CFGTraits::insertAssignInstrBefore(headBlk, passRep, initReg, initVal);
1191  }
1192
1193  int numNewBlk = 0;
1194
1195  if (landBlk == NULL) {
1196    landBlk = funcRep->CreateMachineBasicBlock();
1197    funcRep->push_back(landBlk);  //insert to function
1198
1199    if (trueBlk) {
1200      trueBlk->addSuccessor(landBlk);
1201    } else {
1202      headBlk->addSuccessor(landBlk);
1203    }
1204
1205    if (falseBlk) {
1206      falseBlk->addSuccessor(landBlk);
1207    } else {
1208      headBlk->addSuccessor(landBlk);
1209    }
1210
1211    numNewBlk ++;
1212  }
1213
1214  bool landBlkHasOtherPred = (landBlk->pred_size() > 2);
1215
1216  //insert AMDGPU::ENDIF to avoid special case "input landBlk == NULL"
1217  typename BlockT::iterator insertPos =
1218    CFGTraits::getInstrPos
1219    (landBlk, CFGTraits::insertInstrBefore(landBlk, AMDGPU::ENDIF, passRep));
1220
1221  if (landBlkHasOtherPred) {
1222    unsigned immReg =
1223      funcRep->getRegInfo().createVirtualRegister(I32RC);
1224    CFGTraits::insertAssignInstrBefore(insertPos, passRep, immReg, 2);
1225    unsigned cmpResReg =
1226      funcRep->getRegInfo().createVirtualRegister(I32RC);
1227
1228    CFGTraits::insertCompareInstrBefore(landBlk, insertPos, passRep, cmpResReg,
1229                                        initReg, immReg);
1230    CFGTraits::insertCondBranchBefore(landBlk, insertPos,
1231                                      AMDGPU::IF_PREDICATE_SET, passRep,
1232                                      cmpResReg, DebugLoc());
1233  }
1234
1235  CFGTraits::insertCondBranchBefore(landBlk, insertPos, AMDGPU::IF_PREDICATE_SET,
1236                                    passRep, initReg, DebugLoc());
1237
1238  if (migrateTrue) {
1239    migrateInstruction(trueBlk, landBlk, insertPos);
1240    // need to uncondionally insert the assignment to ensure a path from its
1241    // predecessor rather than headBlk has valid value in initReg if
1242    // (initVal != 1).
1243    CFGTraits::insertAssignInstrBefore(trueBlk, passRep, initReg, 1);
1244  }
1245  CFGTraits::insertInstrBefore(insertPos, AMDGPU::ELSE, passRep);
1246
1247  if (migrateFalse) {
1248    migrateInstruction(falseBlk, landBlk, insertPos);
1249    // need to uncondionally insert the assignment to ensure a path from its
1250    // predecessor rather than headBlk has valid value in initReg if
1251    // (initVal != 0)
1252    CFGTraits::insertAssignInstrBefore(falseBlk, passRep, initReg, 0);
1253  }
1254
1255  if (landBlkHasOtherPred) {
1256    // add endif
1257    CFGTraits::insertInstrBefore(insertPos, AMDGPU::ENDIF, passRep);
1258
1259    // put initReg = 2 to other predecessors of landBlk
1260    for (typename BlockT::pred_iterator predIter = landBlk->pred_begin(),
1261         predIterEnd = landBlk->pred_end(); predIter != predIterEnd;
1262         ++predIter) {
1263      BlockT *curBlk = *predIter;
1264      if (curBlk != trueBlk && curBlk != falseBlk) {
1265        CFGTraits::insertAssignInstrBefore(curBlk, passRep, initReg, 2);
1266      }
1267    } //for
1268  }
1269  if (DEBUGME) {
1270    errs() << "result from improveSimpleJumpintoIf: ";
1271    showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0);
1272  }
1273
1274  // update landBlk
1275  *plandBlk = landBlk;
1276
1277  return numNewBlk;
1278} //improveSimpleJumpintoIf
1279
1280template<class PassT>
1281void CFGStructurizer<PassT>::handleLoopbreak(BlockT *exitingBlk,
1282                                              LoopT *exitingLoop,
1283                                             BlockT *exitBlk,
1284                                              LoopT *exitLoop,
1285                                             BlockT *landBlk) {
1286  if (DEBUGME) {
1287    errs() << "Trying to break loop-depth = " << getLoopDepth(exitLoop)
1288           << " from loop-depth = " << getLoopDepth(exitingLoop) << "\n";
1289  }
1290  const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
1291
1292  RegiT initReg = INVALIDREGNUM;
1293  if (exitingLoop != exitLoop) {
1294    initReg = static_cast<int>
1295      (funcRep->getRegInfo().createVirtualRegister(I32RC));
1296    assert(initReg != INVALIDREGNUM);
1297    addLoopBreakInitReg(exitLoop, initReg);
1298    while (exitingLoop != exitLoop && exitingLoop) {
1299      addLoopBreakOnReg(exitingLoop, initReg);
1300      exitingLoop = exitingLoop->getParentLoop();
1301    }
1302    assert(exitingLoop == exitLoop);
1303  }
1304
1305  mergeLoopbreakBlock(exitingBlk, exitBlk, landBlk, initReg);
1306
1307} //handleLoopbreak
1308
1309template<class PassT>
1310void CFGStructurizer<PassT>::handleLoopcontBlock(BlockT *contingBlk,
1311                                                  LoopT *contingLoop,
1312                                                 BlockT *contBlk,
1313                                                  LoopT *contLoop) {
1314  if (DEBUGME) {
1315    errs() << "loopcontPattern cont = BB" << contingBlk->getNumber()
1316           << " header = BB" << contBlk->getNumber() << "\n";
1317
1318    errs() << "Trying to continue loop-depth = "
1319           << getLoopDepth(contLoop)
1320           << " from loop-depth = " << getLoopDepth(contingLoop) << "\n";
1321  }
1322
1323  RegiT initReg = INVALIDREGNUM;
1324  const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
1325  if (contingLoop != contLoop) {
1326    initReg = static_cast<int>
1327      (funcRep->getRegInfo().createVirtualRegister(I32RC));
1328    assert(initReg != INVALIDREGNUM);
1329    addLoopContInitReg(contLoop, initReg);
1330    while (contingLoop && contingLoop->getParentLoop() != contLoop) {
1331      addLoopBreakOnReg(contingLoop, initReg);  //not addLoopContOnReg
1332      contingLoop = contingLoop->getParentLoop();
1333    }
1334    assert(contingLoop && contingLoop->getParentLoop() == contLoop);
1335    addLoopContOnReg(contingLoop, initReg);
1336  }
1337
1338  settleLoopcontBlock(contingBlk, contBlk, initReg);
1339} //handleLoopcontBlock
1340
1341template<class PassT>
1342void CFGStructurizer<PassT>::mergeSerialBlock(BlockT *dstBlk, BlockT *srcBlk) {
1343  if (DEBUGME) {
1344    errs() << "serialPattern BB" << dstBlk->getNumber()
1345           << " <= BB" << srcBlk->getNumber() << "\n";
1346  }
1347  dstBlk->splice(dstBlk->end(), srcBlk, srcBlk->begin(), srcBlk->end());
1348
1349  dstBlk->removeSuccessor(srcBlk);
1350  CFGTraits::cloneSuccessorList(dstBlk, srcBlk);
1351
1352  removeSuccessor(srcBlk);
1353  retireBlock(dstBlk, srcBlk);
1354} //mergeSerialBlock
1355
1356template<class PassT>
1357void CFGStructurizer<PassT>::mergeIfthenelseBlock(InstrT *branchInstr,
1358                                                  BlockT *curBlk,
1359                                                  BlockT *trueBlk,
1360                                                  BlockT *falseBlk,
1361                                                  BlockT *landBlk) {
1362  if (DEBUGME) {
1363    errs() << "ifPattern BB" << curBlk->getNumber();
1364    errs() << "{  ";
1365    if (trueBlk) {
1366      errs() << "BB" << trueBlk->getNumber();
1367    }
1368    errs() << "  } else ";
1369    errs() << "{  ";
1370    if (falseBlk) {
1371      errs() << "BB" << falseBlk->getNumber();
1372    }
1373    errs() << "  }\n ";
1374    errs() << "landBlock: ";
1375    if (landBlk == NULL) {
1376      errs() << "NULL";
1377    } else {
1378      errs() << "BB" << landBlk->getNumber();
1379    }
1380    errs() << "\n";
1381  }
1382
1383  int oldOpcode = branchInstr->getOpcode();
1384  DebugLoc branchDL = branchInstr->getDebugLoc();
1385
1386//    transform to
1387//    if cond
1388//       trueBlk
1389//    else
1390//       falseBlk
1391//    endif
1392//    landBlk
1393
1394  typename BlockT::iterator branchInstrPos =
1395    CFGTraits::getInstrPos(curBlk, branchInstr);
1396  CFGTraits::insertCondBranchBefore(branchInstrPos,
1397                                    CFGTraits::getBranchNzeroOpcode(oldOpcode),
1398                                    passRep,
1399                                    branchDL);
1400
1401  if (trueBlk) {
1402    curBlk->splice(branchInstrPos, trueBlk, trueBlk->begin(), trueBlk->end());
1403    curBlk->removeSuccessor(trueBlk);
1404    if (landBlk && trueBlk->succ_size()!=0) {
1405      trueBlk->removeSuccessor(landBlk);
1406    }
1407    retireBlock(curBlk, trueBlk);
1408  }
1409  CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ELSE, passRep);
1410
1411  if (falseBlk) {
1412    curBlk->splice(branchInstrPos, falseBlk, falseBlk->begin(),
1413                   falseBlk->end());
1414    curBlk->removeSuccessor(falseBlk);
1415    if (landBlk && falseBlk->succ_size() != 0) {
1416      falseBlk->removeSuccessor(landBlk);
1417    }
1418    retireBlock(curBlk, falseBlk);
1419  }
1420  CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ENDIF, passRep);
1421
1422  branchInstr->eraseFromParent();
1423
1424  if (landBlk && trueBlk && falseBlk) {
1425    curBlk->addSuccessor(landBlk);
1426  }
1427
1428} //mergeIfthenelseBlock
1429
1430template<class PassT>
1431void CFGStructurizer<PassT>::mergeLooplandBlock(BlockT *dstBlk,
1432                                                LoopLandInfo *loopLand) {
1433  BlockT *landBlk = loopLand->landBlk;
1434
1435  if (DEBUGME) {
1436    errs() << "loopPattern header = BB" << dstBlk->getNumber()
1437           << " land = BB" << landBlk->getNumber() << "\n";
1438  }
1439
1440  // Loop contInitRegs are init at the beginning of the loop.
1441  for (typename std::set<RegiT>::const_iterator iter =
1442         loopLand->contInitRegs.begin(),
1443       iterEnd = loopLand->contInitRegs.end(); iter != iterEnd; ++iter) {
1444    CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
1445  }
1446
1447  /* we last inserterd the DebugLoc in the
1448   * BREAK_LOGICALZ_i32 or AMDGPU::BREAK_LOGICALNZ statement in the current dstBlk.
1449   * search for the DebugLoc in the that statement.
1450   * if not found, we have to insert the empty/default DebugLoc */
1451  InstrT *loopBreakInstr = CFGTraits::getLoopBreakInstr(dstBlk);
1452  DebugLoc DLBreak = (loopBreakInstr) ? loopBreakInstr->getDebugLoc() : DebugLoc();
1453
1454  CFGTraits::insertInstrBefore(dstBlk, AMDGPU::WHILELOOP, passRep, DLBreak);
1455  // Loop breakInitRegs are init before entering the loop.
1456  for (typename std::set<RegiT>::const_iterator iter =
1457         loopLand->breakInitRegs.begin(),
1458       iterEnd = loopLand->breakInitRegs.end(); iter != iterEnd; ++iter) {
1459    CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
1460  }
1461  // Loop endbranchInitRegs are init before entering the loop.
1462  for (typename std::set<RegiT>::const_iterator iter =
1463         loopLand->endbranchInitRegs.begin(),
1464       iterEnd = loopLand->endbranchInitRegs.end(); iter != iterEnd; ++iter) {
1465    CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
1466  }
1467
1468  /* we last inserterd the DebugLoc in the continue statement in the current dstBlk
1469   * search for the DebugLoc in the continue statement.
1470   * if not found, we have to insert the empty/default DebugLoc */
1471  InstrT *continueInstr = CFGTraits::getContinueInstr(dstBlk);
1472  DebugLoc DLContinue = (continueInstr) ? continueInstr->getDebugLoc() : DebugLoc();
1473
1474  CFGTraits::insertInstrEnd(dstBlk, AMDGPU::ENDLOOP, passRep, DLContinue);
1475  // Loop breakOnRegs are check after the ENDLOOP: break the loop outside this
1476  // loop.
1477  for (typename std::set<RegiT>::const_iterator iter =
1478         loopLand->breakOnRegs.begin(),
1479       iterEnd = loopLand->breakOnRegs.end(); iter != iterEnd; ++iter) {
1480    CFGTraits::insertCondBranchEnd(dstBlk, AMDGPU::PREDICATED_BREAK, passRep,
1481                                   *iter);
1482  }
1483
1484  // Loop contOnRegs are check after the ENDLOOP: cont the loop outside this
1485  // loop.
1486  for (std::set<RegiT>::const_iterator iter = loopLand->contOnRegs.begin(),
1487       iterEnd = loopLand->contOnRegs.end(); iter != iterEnd; ++iter) {
1488    CFGTraits::insertCondBranchEnd(dstBlk, AMDGPU::CONTINUE_LOGICALNZ_i32,
1489                                   passRep, *iter);
1490  }
1491
1492  dstBlk->splice(dstBlk->end(), landBlk, landBlk->begin(), landBlk->end());
1493
1494  for (typename BlockT::succ_iterator iter = landBlk->succ_begin(),
1495       iterEnd = landBlk->succ_end(); iter != iterEnd; ++iter) {
1496    dstBlk->addSuccessor(*iter);  // *iter's predecessor is also taken care of.
1497  }
1498
1499  removeSuccessor(landBlk);
1500  retireBlock(dstBlk, landBlk);
1501} //mergeLooplandBlock
1502
1503template<class PassT>
1504void CFGStructurizer<PassT>::reversePredicateSetter(typename BlockT::iterator I) {
1505  while (I--) {
1506    if (I->getOpcode() == AMDGPU::PRED_X) {
1507      switch (static_cast<MachineInstr *>(I)->getOperand(2).getImm()) {
1508      case OPCODE_IS_ZERO_INT:
1509        static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_NOT_ZERO_INT);
1510        return;
1511      case OPCODE_IS_NOT_ZERO_INT:
1512        static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_ZERO_INT);
1513        return;
1514      case OPCODE_IS_ZERO:
1515        static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_NOT_ZERO);
1516        return;
1517      case OPCODE_IS_NOT_ZERO:
1518        static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_ZERO);
1519        return;
1520      default:
1521        assert(0 && "PRED_X Opcode invalid!");
1522      }
1523    }
1524  }
1525}
1526
1527template<class PassT>
1528void CFGStructurizer<PassT>::mergeLoopbreakBlock(BlockT *exitingBlk,
1529                                                 BlockT *exitBlk,
1530                                                 BlockT *exitLandBlk,
1531                                                 RegiT  setReg) {
1532  if (DEBUGME) {
1533    errs() << "loopbreakPattern exiting = BB" << exitingBlk->getNumber()
1534           << " exit = BB" << exitBlk->getNumber()
1535           << " land = BB" << exitLandBlk->getNumber() << "\n";
1536  }
1537
1538  InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(exitingBlk);
1539  assert(branchInstr && CFGTraits::isCondBranch(branchInstr));
1540
1541  DebugLoc DL = branchInstr->getDebugLoc();
1542
1543  BlockT *trueBranch = CFGTraits::getTrueBranch(branchInstr);
1544
1545  //    transform exitingBlk to
1546  //    if ( ) {
1547  //       exitBlk (if exitBlk != exitLandBlk)
1548  //       setReg = 1
1549  //       break
1550  //    }endif
1551  //    successor = {orgSuccessor(exitingBlk) - exitBlk}
1552
1553  typename BlockT::iterator branchInstrPos =
1554    CFGTraits::getInstrPos(exitingBlk, branchInstr);
1555
1556  if (exitBlk == exitLandBlk && setReg == INVALIDREGNUM) {
1557    //break_logical
1558
1559    if (trueBranch != exitBlk) {
1560      reversePredicateSetter(branchInstrPos);
1561    }
1562    CFGTraits::insertCondBranchBefore(branchInstrPos, AMDGPU::PREDICATED_BREAK, passRep, DL);
1563  } else {
1564    if (trueBranch != exitBlk) {
1565      reversePredicateSetter(branchInstr);
1566    }
1567    CFGTraits::insertCondBranchBefore(branchInstrPos, AMDGPU::PREDICATED_BREAK, passRep, DL);
1568    if (exitBlk != exitLandBlk) {
1569      //splice is insert-before ...
1570      exitingBlk->splice(branchInstrPos, exitBlk, exitBlk->begin(),
1571                         exitBlk->end());
1572    }
1573    if (setReg != INVALIDREGNUM) {
1574      CFGTraits::insertAssignInstrBefore(branchInstrPos, passRep, setReg, 1);
1575    }
1576    CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::BREAK, passRep);
1577  } //if_logical
1578
1579  //now branchInst can be erase safely
1580  branchInstr->eraseFromParent();
1581
1582  //now take care of successors, retire blocks
1583  exitingBlk->removeSuccessor(exitBlk);
1584  if (exitBlk != exitLandBlk) {
1585    //splice is insert-before ...
1586    exitBlk->removeSuccessor(exitLandBlk);
1587    retireBlock(exitingBlk, exitBlk);
1588  }
1589
1590} //mergeLoopbreakBlock
1591
1592template<class PassT>
1593void CFGStructurizer<PassT>::settleLoopcontBlock(BlockT *contingBlk,
1594                                                 BlockT *contBlk,
1595                                                 RegiT   setReg) {
1596  if (DEBUGME) {
1597    errs() << "settleLoopcontBlock conting = BB"
1598           << contingBlk->getNumber()
1599           << ", cont = BB" << contBlk->getNumber() << "\n";
1600  }
1601
1602  InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(contingBlk);
1603  if (branchInstr) {
1604    assert(CFGTraits::isCondBranch(branchInstr));
1605    typename BlockT::iterator branchInstrPos =
1606      CFGTraits::getInstrPos(contingBlk, branchInstr);
1607    BlockT *trueBranch = CFGTraits::getTrueBranch(branchInstr);
1608    int oldOpcode = branchInstr->getOpcode();
1609    DebugLoc DL = branchInstr->getDebugLoc();
1610
1611    //    transform contingBlk to
1612    //     if () {
1613    //          move instr after branchInstr
1614    //          continue
1615    //        or
1616    //          setReg = 1
1617    //          break
1618    //     }endif
1619    //     successor = {orgSuccessor(contingBlk) - loopHeader}
1620
1621    bool useContinueLogical =
1622      (setReg == INVALIDREGNUM && (&*contingBlk->rbegin()) == branchInstr);
1623
1624    if (useContinueLogical == false) {
1625      int branchOpcode =
1626        trueBranch == contBlk ? CFGTraits::getBranchNzeroOpcode(oldOpcode)
1627                              : CFGTraits::getBranchZeroOpcode(oldOpcode);
1628
1629      CFGTraits::insertCondBranchBefore(branchInstrPos, branchOpcode, passRep, DL);
1630
1631      if (setReg != INVALIDREGNUM) {
1632        CFGTraits::insertAssignInstrBefore(branchInstrPos, passRep, setReg, 1);
1633        // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
1634        CFGTraits::insertInstrEnd(contingBlk, AMDGPU::BREAK, passRep, DL);
1635      } else {
1636        // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
1637        CFGTraits::insertInstrEnd(contingBlk, AMDGPU::CONTINUE, passRep, DL);
1638      }
1639
1640      CFGTraits::insertInstrEnd(contingBlk, AMDGPU::ENDIF, passRep, DL);
1641    } else {
1642      int branchOpcode =
1643        trueBranch == contBlk ? CFGTraits::getContinueNzeroOpcode(oldOpcode)
1644                              : CFGTraits::getContinueZeroOpcode(oldOpcode);
1645
1646      CFGTraits::insertCondBranchBefore(branchInstrPos, branchOpcode, passRep, DL);
1647    }
1648
1649    branchInstr->eraseFromParent();
1650  } else {
1651    // if we've arrived here then we've already erased the branch instruction
1652    // travel back up the basic block to see the last reference of our debug location
1653    // we've just inserted that reference here so it should be representative
1654    if (setReg != INVALIDREGNUM) {
1655      CFGTraits::insertAssignInstrBefore(contingBlk, passRep, setReg, 1);
1656      // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
1657      CFGTraits::insertInstrEnd(contingBlk, AMDGPU::BREAK, passRep, CFGTraits::getLastDebugLocInBB(contingBlk));
1658    } else {
1659      // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
1660      CFGTraits::insertInstrEnd(contingBlk, AMDGPU::CONTINUE, passRep, CFGTraits::getLastDebugLocInBB(contingBlk));
1661    }
1662  } //else
1663
1664} //settleLoopcontBlock
1665
1666// BBs in exitBlkSet are determined as in break-path for loopRep,
1667// before we can put code for BBs as inside loop-body for loopRep
1668// check whether those BBs are determined as cont-BB for parentLoopRep
1669// earlier.
1670// If so, generate a new BB newBlk
1671//    (1) set newBlk common successor of BBs in exitBlkSet
1672//    (2) change the continue-instr in BBs in exitBlkSet to break-instr
1673//    (3) generate continue-instr in newBlk
1674//
1675template<class PassT>
1676typename CFGStructurizer<PassT>::BlockT *
1677CFGStructurizer<PassT>::relocateLoopcontBlock(LoopT *parentLoopRep,
1678                                              LoopT *loopRep,
1679                                              std::set<BlockT *> &exitBlkSet,
1680                                              BlockT *exitLandBlk) {
1681  std::set<BlockT *> endBlkSet;
1682
1683
1684
1685  for (typename std::set<BlockT *>::const_iterator iter = exitBlkSet.begin(),
1686       iterEnd = exitBlkSet.end();
1687       iter != iterEnd; ++iter) {
1688    BlockT *exitBlk = *iter;
1689    BlockT *endBlk = singlePathEnd(exitBlk, exitLandBlk);
1690
1691    if (endBlk == NULL || CFGTraits::getContinueInstr(endBlk) == NULL)
1692      return NULL;
1693
1694    endBlkSet.insert(endBlk);
1695  }
1696
1697  BlockT *newBlk = funcRep->CreateMachineBasicBlock();
1698  funcRep->push_back(newBlk);  //insert to function
1699  CFGTraits::insertInstrEnd(newBlk, AMDGPU::CONTINUE, passRep);
1700  SHOWNEWBLK(newBlk, "New continue block: ");
1701
1702  for (typename std::set<BlockT*>::const_iterator iter = endBlkSet.begin(),
1703       iterEnd = endBlkSet.end();
1704       iter != iterEnd; ++iter) {
1705      BlockT *endBlk = *iter;
1706      InstrT *contInstr = CFGTraits::getContinueInstr(endBlk);
1707      if (contInstr) {
1708        contInstr->eraseFromParent();
1709      }
1710      endBlk->addSuccessor(newBlk);
1711      if (DEBUGME) {
1712        errs() << "Add new continue Block to BB"
1713               << endBlk->getNumber() << " successors\n";
1714      }
1715  }
1716
1717  return newBlk;
1718} //relocateLoopcontBlock
1719
1720
1721// LoopEndbranchBlock is a BB created by the CFGStructurizer to use as
1722// LoopLandBlock. This BB branch on the loop endBranchInit register to the
1723// pathes corresponding to the loop exiting branches.
1724
1725template<class PassT>
1726typename CFGStructurizer<PassT>::BlockT *
1727CFGStructurizer<PassT>::addLoopEndbranchBlock(LoopT *loopRep,
1728                                              BlockTSmallerVector &exitingBlks,
1729                                              BlockTSmallerVector &exitBlks) {
1730  const AMDGPUInstrInfo *tii =
1731             static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
1732  const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
1733
1734  RegiT endBranchReg = static_cast<int>
1735    (funcRep->getRegInfo().createVirtualRegister(I32RC));
1736  assert(endBranchReg >= 0);
1737
1738  // reg = 0 before entering the loop
1739  addLoopEndbranchInitReg(loopRep, endBranchReg);
1740
1741  uint32_t numBlks = static_cast<uint32_t>(exitingBlks.size());
1742  assert(numBlks >=2 && numBlks == exitBlks.size());
1743
1744  BlockT *preExitingBlk = exitingBlks[0];
1745  BlockT *preExitBlk = exitBlks[0];
1746  BlockT *preBranchBlk = funcRep->CreateMachineBasicBlock();
1747  funcRep->push_back(preBranchBlk);  //insert to function
1748  SHOWNEWBLK(preBranchBlk, "New loopEndbranch block: ");
1749
1750  BlockT *newLandBlk = preBranchBlk;
1751
1752      CFGTraits::replaceInstrUseOfBlockWith(preExitingBlk, preExitBlk,
1753        newLandBlk);
1754  preExitingBlk->removeSuccessor(preExitBlk);
1755  preExitingBlk->addSuccessor(newLandBlk);
1756
1757  //it is redundant to add reg = 0 to exitingBlks[0]
1758
1759  // For 1..n th exiting path (the last iteration handles two pathes) create the
1760  // branch to the previous path and the current path.
1761  for (uint32_t i = 1; i < numBlks; ++i) {
1762    BlockT *curExitingBlk = exitingBlks[i];
1763    BlockT *curExitBlk = exitBlks[i];
1764    BlockT *curBranchBlk;
1765
1766    if (i == numBlks - 1) {
1767      curBranchBlk = curExitBlk;
1768    } else {
1769      curBranchBlk = funcRep->CreateMachineBasicBlock();
1770      funcRep->push_back(curBranchBlk);  //insert to function
1771      SHOWNEWBLK(curBranchBlk, "New loopEndbranch block: ");
1772    }
1773
1774    // Add reg = i to exitingBlks[i].
1775    CFGTraits::insertAssignInstrBefore(curExitingBlk, passRep,
1776                                       endBranchReg, i);
1777
1778    // Remove the edge (exitingBlks[i] exitBlks[i]) add new edge
1779    // (exitingBlks[i], newLandBlk).
1780    CFGTraits::replaceInstrUseOfBlockWith(curExitingBlk, curExitBlk,
1781                                          newLandBlk);
1782    curExitingBlk->removeSuccessor(curExitBlk);
1783    curExitingBlk->addSuccessor(newLandBlk);
1784
1785    // add to preBranchBlk the branch instruction:
1786    // if (endBranchReg == preVal)
1787    //    preExitBlk
1788    // else
1789    //    curBranchBlk
1790    //
1791    // preValReg = i - 1
1792
1793  DebugLoc DL;
1794  RegiT preValReg = static_cast<int>
1795    (funcRep->getRegInfo().createVirtualRegister(I32RC));
1796
1797  preBranchBlk->insert(preBranchBlk->begin(),
1798                       tii->getMovImmInstr(preBranchBlk->getParent(), preValReg,
1799                       i - 1));
1800
1801  // condResReg = (endBranchReg == preValReg)
1802    RegiT condResReg = static_cast<int>
1803      (funcRep->getRegInfo().createVirtualRegister(I32RC));
1804    BuildMI(preBranchBlk, DL, tii->get(tii->getIEQOpcode()), condResReg)
1805      .addReg(endBranchReg).addReg(preValReg);
1806
1807    BuildMI(preBranchBlk, DL, tii->get(AMDGPU::BRANCH_COND_i32))
1808      .addMBB(preExitBlk).addReg(condResReg);
1809
1810    preBranchBlk->addSuccessor(preExitBlk);
1811    preBranchBlk->addSuccessor(curBranchBlk);
1812
1813    // Update preExitingBlk, preExitBlk, preBranchBlk.
1814    preExitingBlk = curExitingBlk;
1815    preExitBlk = curExitBlk;
1816    preBranchBlk = curBranchBlk;
1817
1818  }  //end for 1 .. n blocks
1819
1820  return newLandBlk;
1821} //addLoopEndbranchBlock
1822
1823template<class PassT>
1824typename CFGStructurizer<PassT>::PathToKind
1825CFGStructurizer<PassT>::singlePathTo(BlockT *srcBlk, BlockT *dstBlk,
1826                                     bool allowSideEntry) {
1827  assert(dstBlk);
1828
1829  if (srcBlk == dstBlk) {
1830    return SinglePath_InPath;
1831  }
1832
1833  while (srcBlk && srcBlk->succ_size() == 1) {
1834    srcBlk = *srcBlk->succ_begin();
1835    if (srcBlk == dstBlk) {
1836      return SinglePath_InPath;
1837    }
1838
1839    if (!allowSideEntry && srcBlk->pred_size() > 1) {
1840      return Not_SinglePath;
1841    }
1842  }
1843
1844  if (srcBlk && srcBlk->succ_size()==0) {
1845    return SinglePath_NotInPath;
1846  }
1847
1848  return Not_SinglePath;
1849} //singlePathTo
1850
1851// If there is a single path from srcBlk to dstBlk, return the last block before
1852// dstBlk If there is a single path from srcBlk->end without dstBlk, return the
1853// last block in the path Otherwise, return NULL
1854template<class PassT>
1855typename CFGStructurizer<PassT>::BlockT *
1856CFGStructurizer<PassT>::singlePathEnd(BlockT *srcBlk, BlockT *dstBlk,
1857                                      bool allowSideEntry) {
1858  assert(dstBlk);
1859
1860  if (srcBlk == dstBlk) {
1861    return srcBlk;
1862  }
1863
1864  if (srcBlk->succ_size() == 0) {
1865    return srcBlk;
1866  }
1867
1868  while (srcBlk && srcBlk->succ_size() == 1) {
1869    BlockT *preBlk = srcBlk;
1870
1871    srcBlk = *srcBlk->succ_begin();
1872    if (srcBlk == NULL) {
1873      return preBlk;
1874    }
1875
1876    if (!allowSideEntry && srcBlk->pred_size() > 1) {
1877      return NULL;
1878    }
1879  }
1880
1881  if (srcBlk && srcBlk->succ_size()==0) {
1882    return srcBlk;
1883  }
1884
1885  return NULL;
1886
1887} //singlePathEnd
1888
1889template<class PassT>
1890int CFGStructurizer<PassT>::cloneOnSideEntryTo(BlockT *preBlk, BlockT *srcBlk,
1891                                               BlockT *dstBlk) {
1892  int cloned = 0;
1893  assert(preBlk->isSuccessor(srcBlk));
1894  while (srcBlk && srcBlk != dstBlk) {
1895    assert(srcBlk->succ_size() == 1);
1896    if (srcBlk->pred_size() > 1) {
1897      srcBlk = cloneBlockForPredecessor(srcBlk, preBlk);
1898      ++cloned;
1899    }
1900
1901    preBlk = srcBlk;
1902    srcBlk = *srcBlk->succ_begin();
1903  }
1904
1905  return cloned;
1906} //cloneOnSideEntryTo
1907
1908template<class PassT>
1909typename CFGStructurizer<PassT>::BlockT *
1910CFGStructurizer<PassT>::cloneBlockForPredecessor(BlockT *curBlk,
1911                                                 BlockT *predBlk) {
1912  assert(predBlk->isSuccessor(curBlk) &&
1913         "succBlk is not a prececessor of curBlk");
1914
1915  BlockT *cloneBlk = CFGTraits::clone(curBlk);  //clone instructions
1916  CFGTraits::replaceInstrUseOfBlockWith(predBlk, curBlk, cloneBlk);
1917  //srcBlk, oldBlk, newBlk
1918
1919  predBlk->removeSuccessor(curBlk);
1920  predBlk->addSuccessor(cloneBlk);
1921
1922  // add all successor to cloneBlk
1923  CFGTraits::cloneSuccessorList(cloneBlk, curBlk);
1924
1925  numClonedInstr += curBlk->size();
1926
1927  if (DEBUGME) {
1928    errs() << "Cloned block: " << "BB"
1929           << curBlk->getNumber() << "size " << curBlk->size() << "\n";
1930  }
1931
1932  SHOWNEWBLK(cloneBlk, "result of Cloned block: ");
1933
1934  return cloneBlk;
1935} //cloneBlockForPredecessor
1936
1937template<class PassT>
1938typename CFGStructurizer<PassT>::BlockT *
1939CFGStructurizer<PassT>::exitingBlock2ExitBlock(LoopT *loopRep,
1940                                               BlockT *exitingBlk) {
1941  BlockT *exitBlk = NULL;
1942
1943  for (typename BlockT::succ_iterator iterSucc = exitingBlk->succ_begin(),
1944       iterSuccEnd = exitingBlk->succ_end();
1945       iterSucc != iterSuccEnd; ++iterSucc) {
1946    BlockT *curBlk = *iterSucc;
1947    if (!loopRep->contains(curBlk)) {
1948      assert(exitBlk == NULL);
1949      exitBlk = curBlk;
1950    }
1951  }
1952
1953  assert(exitBlk != NULL);
1954
1955  return exitBlk;
1956} //exitingBlock2ExitBlock
1957
1958template<class PassT>
1959void CFGStructurizer<PassT>::migrateInstruction(BlockT *srcBlk,
1960                                                BlockT *dstBlk,
1961                                                InstrIterator insertPos) {
1962  InstrIterator spliceEnd;
1963  //look for the input branchinstr, not the AMDGPU branchinstr
1964  InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk);
1965  if (branchInstr == NULL) {
1966    if (DEBUGME) {
1967      errs() << "migrateInstruction don't see branch instr\n" ;
1968    }
1969    spliceEnd = srcBlk->end();
1970  } else {
1971    if (DEBUGME) {
1972      errs() << "migrateInstruction see branch instr\n" ;
1973      branchInstr->dump();
1974    }
1975    spliceEnd = CFGTraits::getInstrPos(srcBlk, branchInstr);
1976  }
1977  if (DEBUGME) {
1978    errs() << "migrateInstruction before splice dstSize = " << dstBlk->size()
1979      << "srcSize = " << srcBlk->size() << "\n";
1980  }
1981
1982  //splice insert before insertPos
1983  dstBlk->splice(insertPos, srcBlk, srcBlk->begin(), spliceEnd);
1984
1985  if (DEBUGME) {
1986    errs() << "migrateInstruction after splice dstSize = " << dstBlk->size()
1987      << "srcSize = " << srcBlk->size() << "\n";
1988  }
1989} //migrateInstruction
1990
1991// normalizeInfiniteLoopExit change
1992//   B1:
1993//        uncond_br LoopHeader
1994//
1995// to
1996//   B1:
1997//        cond_br 1 LoopHeader dummyExit
1998// and return the newly added dummy exit block
1999//
2000template<class PassT>
2001typename CFGStructurizer<PassT>::BlockT *
2002CFGStructurizer<PassT>::normalizeInfiniteLoopExit(LoopT* LoopRep) {
2003  BlockT *loopHeader;
2004  BlockT *loopLatch;
2005  loopHeader = LoopRep->getHeader();
2006  loopLatch = LoopRep->getLoopLatch();
2007  BlockT *dummyExitBlk = NULL;
2008  const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
2009  if (loopHeader!=NULL && loopLatch!=NULL) {
2010    InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(loopLatch);
2011    if (branchInstr!=NULL && CFGTraits::isUncondBranch(branchInstr)) {
2012      dummyExitBlk = funcRep->CreateMachineBasicBlock();
2013      funcRep->push_back(dummyExitBlk);  //insert to function
2014      SHOWNEWBLK(dummyExitBlk, "DummyExitBlock to normalize infiniteLoop: ");
2015
2016      if (DEBUGME) errs() << "Old branch instr: " << *branchInstr << "\n";
2017
2018      typename BlockT::iterator insertPos =
2019        CFGTraits::getInstrPos(loopLatch, branchInstr);
2020      unsigned immReg =
2021        funcRep->getRegInfo().createVirtualRegister(I32RC);
2022      CFGTraits::insertAssignInstrBefore(insertPos, passRep, immReg, 1);
2023      InstrT *newInstr =
2024        CFGTraits::insertInstrBefore(insertPos, AMDGPU::BRANCH_COND_i32, passRep);
2025      MachineInstrBuilder MIB(*funcRep, newInstr);
2026      MIB.addMBB(loopHeader);
2027      MIB.addReg(immReg, false);
2028
2029      SHOWNEWINSTR(newInstr);
2030
2031      branchInstr->eraseFromParent();
2032      loopLatch->addSuccessor(dummyExitBlk);
2033    }
2034  }
2035
2036  return dummyExitBlk;
2037} //normalizeInfiniteLoopExit
2038
2039template<class PassT>
2040void CFGStructurizer<PassT>::removeUnconditionalBranch(BlockT *srcBlk) {
2041  InstrT *branchInstr;
2042
2043  // I saw two unconditional branch in one basic block in example
2044  // test_fc_do_while_or.c need to fix the upstream on this to remove the loop.
2045  while ((branchInstr = CFGTraits::getLoopendBlockBranchInstr(srcBlk))
2046          && CFGTraits::isUncondBranch(branchInstr)) {
2047    if (DEBUGME) {
2048          errs() << "Removing unconditional branch instruction" ;
2049      branchInstr->dump();
2050    }
2051    branchInstr->eraseFromParent();
2052  }
2053} //removeUnconditionalBranch
2054
2055template<class PassT>
2056void CFGStructurizer<PassT>::removeRedundantConditionalBranch(BlockT *srcBlk) {
2057  if (srcBlk->succ_size() == 2) {
2058    BlockT *blk1 = *srcBlk->succ_begin();
2059    BlockT *blk2 = *(++srcBlk->succ_begin());
2060
2061    if (blk1 == blk2) {
2062      InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk);
2063      assert(branchInstr && CFGTraits::isCondBranch(branchInstr));
2064      if (DEBUGME) {
2065        errs() << "Removing unneeded conditional branch instruction" ;
2066        branchInstr->dump();
2067      }
2068      branchInstr->eraseFromParent();
2069      SHOWNEWBLK(blk1, "Removing redundant successor");
2070      srcBlk->removeSuccessor(blk1);
2071    }
2072  }
2073} //removeRedundantConditionalBranch
2074
2075template<class PassT>
2076void CFGStructurizer<PassT>::addDummyExitBlock(SmallVector<BlockT*,
2077                                               DEFAULT_VEC_SLOTS> &retBlks) {
2078  BlockT *dummyExitBlk = funcRep->CreateMachineBasicBlock();
2079  funcRep->push_back(dummyExitBlk);  //insert to function
2080  CFGTraits::insertInstrEnd(dummyExitBlk, AMDGPU::RETURN, passRep);
2081
2082  for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::iterator iter =
2083         retBlks.begin(),
2084       iterEnd = retBlks.end(); iter != iterEnd; ++iter) {
2085    BlockT *curBlk = *iter;
2086    InstrT *curInstr = CFGTraits::getReturnInstr(curBlk);
2087    if (curInstr) {
2088      curInstr->eraseFromParent();
2089    }
2090    curBlk->addSuccessor(dummyExitBlk);
2091    if (DEBUGME) {
2092      errs() << "Add dummyExitBlock to BB" << curBlk->getNumber()
2093             << " successors\n";
2094    }
2095  } //for
2096
2097  SHOWNEWBLK(dummyExitBlk, "DummyExitBlock: ");
2098} //addDummyExitBlock
2099
2100template<class PassT>
2101void CFGStructurizer<PassT>::removeSuccessor(BlockT *srcBlk) {
2102  while (srcBlk->succ_size()) {
2103    srcBlk->removeSuccessor(*srcBlk->succ_begin());
2104  }
2105}
2106
2107template<class PassT>
2108void CFGStructurizer<PassT>::recordSccnum(BlockT *srcBlk, int sccNum) {
2109  BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk];
2110
2111  if (srcBlkInfo == NULL) {
2112    srcBlkInfo = new BlockInfo();
2113  }
2114
2115  srcBlkInfo->sccNum = sccNum;
2116}
2117
2118template<class PassT>
2119int CFGStructurizer<PassT>::getSCCNum(BlockT *srcBlk) {
2120  BlockInfo *srcBlkInfo = blockInfoMap[srcBlk];
2121  return srcBlkInfo ? srcBlkInfo->sccNum : INVALIDSCCNUM;
2122}
2123
2124template<class PassT>
2125void CFGStructurizer<PassT>::retireBlock(BlockT *dstBlk, BlockT *srcBlk) {
2126  if (DEBUGME) {
2127        errs() << "Retiring BB" << srcBlk->getNumber() << "\n";
2128  }
2129
2130  BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk];
2131
2132  if (srcBlkInfo == NULL) {
2133    srcBlkInfo = new BlockInfo();
2134  }
2135
2136  srcBlkInfo->isRetired = true;
2137  assert(srcBlk->succ_size() == 0 && srcBlk->pred_size() == 0
2138         && "can't retire block yet");
2139}
2140
2141template<class PassT>
2142bool CFGStructurizer<PassT>::isRetiredBlock(BlockT *srcBlk) {
2143  BlockInfo *srcBlkInfo = blockInfoMap[srcBlk];
2144  return (srcBlkInfo && srcBlkInfo->isRetired);
2145}
2146
2147template<class PassT>
2148bool CFGStructurizer<PassT>::isActiveLoophead(BlockT *curBlk) {
2149  LoopT *loopRep = loopInfo->getLoopFor(curBlk);
2150  while (loopRep && loopRep->getHeader() == curBlk) {
2151    LoopLandInfo *loopLand = getLoopLandInfo(loopRep);
2152
2153    if(loopLand == NULL)
2154      return true;
2155
2156    BlockT *landBlk = loopLand->landBlk;
2157    assert(landBlk);
2158    if (!isRetiredBlock(landBlk)) {
2159      return true;
2160    }
2161
2162    loopRep = loopRep->getParentLoop();
2163  }
2164
2165  return false;
2166} //isActiveLoophead
2167
2168template<class PassT>
2169bool CFGStructurizer<PassT>::needMigrateBlock(BlockT *blk) {
2170  const unsigned blockSizeThreshold = 30;
2171  const unsigned cloneInstrThreshold = 100;
2172
2173  bool multiplePreds = blk && (blk->pred_size() > 1);
2174
2175  if(!multiplePreds)
2176    return false;
2177
2178  unsigned blkSize = blk->size();
2179  return ((blkSize > blockSizeThreshold)
2180          && (blkSize * (blk->pred_size() - 1) > cloneInstrThreshold));
2181} //needMigrateBlock
2182
2183template<class PassT>
2184typename CFGStructurizer<PassT>::BlockT *
2185CFGStructurizer<PassT>::recordLoopLandBlock(LoopT *loopRep, BlockT *landBlk,
2186                                            BlockTSmallerVector &exitBlks,
2187                                            std::set<BlockT *> &exitBlkSet) {
2188  SmallVector<BlockT *, DEFAULT_VEC_SLOTS> inpathBlks;  //in exit path blocks
2189
2190  for (typename BlockT::pred_iterator predIter = landBlk->pred_begin(),
2191       predIterEnd = landBlk->pred_end();
2192       predIter != predIterEnd; ++predIter) {
2193    BlockT *curBlk = *predIter;
2194    if (loopRep->contains(curBlk) || exitBlkSet.count(curBlk)) {
2195      inpathBlks.push_back(curBlk);
2196    }
2197  } //for
2198
2199  //if landBlk has predecessors that are not in the given loop,
2200  //create a new block
2201  BlockT *newLandBlk = landBlk;
2202  if (inpathBlks.size() != landBlk->pred_size()) {
2203    newLandBlk = funcRep->CreateMachineBasicBlock();
2204    funcRep->push_back(newLandBlk);  //insert to function
2205    newLandBlk->addSuccessor(landBlk);
2206    for (typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::iterator iter =
2207         inpathBlks.begin(),
2208         iterEnd = inpathBlks.end(); iter != iterEnd; ++iter) {
2209      BlockT *curBlk = *iter;
2210      CFGTraits::replaceInstrUseOfBlockWith(curBlk, landBlk, newLandBlk);
2211      //srcBlk, oldBlk, newBlk
2212      curBlk->removeSuccessor(landBlk);
2213      curBlk->addSuccessor(newLandBlk);
2214    }
2215    for (size_t i = 0, tot = exitBlks.size(); i < tot; ++i) {
2216      if (exitBlks[i] == landBlk) {
2217        exitBlks[i] = newLandBlk;
2218      }
2219    }
2220    SHOWNEWBLK(newLandBlk, "NewLandingBlock: ");
2221  }
2222
2223  setLoopLandBlock(loopRep, newLandBlk);
2224
2225  return newLandBlk;
2226} // recordLoopbreakLand
2227
2228template<class PassT>
2229void CFGStructurizer<PassT>::setLoopLandBlock(LoopT *loopRep, BlockT *blk) {
2230  LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2231
2232  if (theEntry == NULL) {
2233    theEntry = new LoopLandInfo();
2234  }
2235  assert(theEntry->landBlk == NULL);
2236
2237  if (blk == NULL) {
2238    blk = funcRep->CreateMachineBasicBlock();
2239    funcRep->push_back(blk);  //insert to function
2240    SHOWNEWBLK(blk, "DummyLandingBlock for loop without break: ");
2241  }
2242
2243  theEntry->landBlk = blk;
2244
2245  if (DEBUGME) {
2246    errs() << "setLoopLandBlock loop-header = BB"
2247           << loopRep->getHeader()->getNumber()
2248           << "  landing-block = BB" << blk->getNumber() << "\n";
2249  }
2250} // setLoopLandBlock
2251
2252template<class PassT>
2253void CFGStructurizer<PassT>::addLoopBreakOnReg(LoopT *loopRep, RegiT regNum) {
2254  LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2255
2256  if (theEntry == NULL) {
2257    theEntry = new LoopLandInfo();
2258  }
2259
2260  theEntry->breakOnRegs.insert(regNum);
2261
2262  if (DEBUGME) {
2263    errs() << "addLoopBreakOnReg loop-header = BB"
2264           << loopRep->getHeader()->getNumber()
2265           << "  regNum = " << regNum << "\n";
2266  }
2267} // addLoopBreakOnReg
2268
2269template<class PassT>
2270void CFGStructurizer<PassT>::addLoopContOnReg(LoopT *loopRep, RegiT regNum) {
2271  LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2272
2273  if (theEntry == NULL) {
2274    theEntry = new LoopLandInfo();
2275  }
2276  theEntry->contOnRegs.insert(regNum);
2277
2278  if (DEBUGME) {
2279    errs() << "addLoopContOnReg loop-header = BB"
2280           << loopRep->getHeader()->getNumber()
2281           << "  regNum = " << regNum << "\n";
2282  }
2283} // addLoopContOnReg
2284
2285template<class PassT>
2286void CFGStructurizer<PassT>::addLoopBreakInitReg(LoopT *loopRep, RegiT regNum) {
2287  LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2288
2289  if (theEntry == NULL) {
2290    theEntry = new LoopLandInfo();
2291  }
2292  theEntry->breakInitRegs.insert(regNum);
2293
2294  if (DEBUGME) {
2295    errs() << "addLoopBreakInitReg loop-header = BB"
2296           << loopRep->getHeader()->getNumber()
2297           << "  regNum = " << regNum << "\n";
2298  }
2299} // addLoopBreakInitReg
2300
2301template<class PassT>
2302void CFGStructurizer<PassT>::addLoopContInitReg(LoopT *loopRep, RegiT regNum) {
2303  LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2304
2305  if (theEntry == NULL) {
2306    theEntry = new LoopLandInfo();
2307  }
2308  theEntry->contInitRegs.insert(regNum);
2309
2310  if (DEBUGME) {
2311    errs() << "addLoopContInitReg loop-header = BB"
2312           << loopRep->getHeader()->getNumber()
2313           << "  regNum = " << regNum << "\n";
2314  }
2315} // addLoopContInitReg
2316
2317template<class PassT>
2318void CFGStructurizer<PassT>::addLoopEndbranchInitReg(LoopT *loopRep,
2319                                                     RegiT regNum) {
2320  LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2321
2322  if (theEntry == NULL) {
2323    theEntry = new LoopLandInfo();
2324  }
2325  theEntry->endbranchInitRegs.insert(regNum);
2326
2327  if (DEBUGME) {
2328        errs() << "addLoopEndbranchInitReg loop-header = BB"
2329      << loopRep->getHeader()->getNumber()
2330      << "  regNum = " << regNum << "\n";
2331  }
2332} // addLoopEndbranchInitReg
2333
2334template<class PassT>
2335typename CFGStructurizer<PassT>::LoopLandInfo *
2336CFGStructurizer<PassT>::getLoopLandInfo(LoopT *loopRep) {
2337  LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2338
2339  return theEntry;
2340} // getLoopLandInfo
2341
2342template<class PassT>
2343typename CFGStructurizer<PassT>::BlockT *
2344CFGStructurizer<PassT>::getLoopLandBlock(LoopT *loopRep) {
2345  LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2346
2347  return theEntry ? theEntry->landBlk : NULL;
2348} // getLoopLandBlock
2349
2350
2351template<class PassT>
2352bool CFGStructurizer<PassT>::hasBackEdge(BlockT *curBlk) {
2353  LoopT *loopRep = loopInfo->getLoopFor(curBlk);
2354  if (loopRep == NULL)
2355    return false;
2356
2357  BlockT *loopHeader = loopRep->getHeader();
2358
2359  return curBlk->isSuccessor(loopHeader);
2360
2361} //hasBackEdge
2362
2363template<class PassT>
2364unsigned CFGStructurizer<PassT>::getLoopDepth(LoopT *loopRep) {
2365  return loopRep ? loopRep->getLoopDepth() : 0;
2366} //getLoopDepth
2367
2368template<class PassT>
2369int CFGStructurizer<PassT>::countActiveBlock
2370(typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::const_iterator iterStart,
2371 typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::const_iterator iterEnd) {
2372  int count = 0;
2373  while (iterStart != iterEnd) {
2374    if (!isRetiredBlock(*iterStart)) {
2375      ++count;
2376    }
2377    ++iterStart;
2378  }
2379
2380  return count;
2381} //countActiveBlock
2382
2383// This is work around solution for findNearestCommonDominator not avaiable to
2384// post dom a proper fix should go to Dominators.h.
2385
2386template<class PassT>
2387typename CFGStructurizer<PassT>::BlockT*
2388CFGStructurizer<PassT>::findNearestCommonPostDom(BlockT *blk1, BlockT *blk2) {
2389
2390  if (postDomTree->dominates(blk1, blk2)) {
2391    return blk1;
2392  }
2393  if (postDomTree->dominates(blk2, blk1)) {
2394    return blk2;
2395  }
2396
2397  DomTreeNodeT *node1 = postDomTree->getNode(blk1);
2398  DomTreeNodeT *node2 = postDomTree->getNode(blk2);
2399
2400  // Handle newly cloned node.
2401  if (node1 == NULL && blk1->succ_size() == 1) {
2402    return findNearestCommonPostDom(*blk1->succ_begin(), blk2);
2403  }
2404  if (node2 == NULL && blk2->succ_size() == 1) {
2405    return findNearestCommonPostDom(blk1, *blk2->succ_begin());
2406  }
2407
2408  if (node1 == NULL || node2 == NULL) {
2409    return NULL;
2410  }
2411
2412  node1 = node1->getIDom();
2413  while (node1) {
2414    if (postDomTree->dominates(node1, node2)) {
2415      return node1->getBlock();
2416    }
2417    node1 = node1->getIDom();
2418  }
2419
2420  return NULL;
2421}
2422
2423template<class PassT>
2424typename CFGStructurizer<PassT>::BlockT *
2425CFGStructurizer<PassT>::findNearestCommonPostDom
2426(typename std::set<BlockT *> &blks) {
2427  BlockT *commonDom;
2428  typename std::set<BlockT *>::const_iterator iter = blks.begin();
2429  typename std::set<BlockT *>::const_iterator iterEnd = blks.end();
2430  for (commonDom = *iter; iter != iterEnd && commonDom != NULL; ++iter) {
2431    BlockT *curBlk = *iter;
2432    if (curBlk != commonDom) {
2433      commonDom = findNearestCommonPostDom(curBlk, commonDom);
2434    }
2435  }
2436
2437  if (DEBUGME) {
2438    errs() << "Common post dominator for exit blocks is ";
2439    if (commonDom) {
2440          errs() << "BB" << commonDom->getNumber() << "\n";
2441    } else {
2442      errs() << "NULL\n";
2443    }
2444  }
2445
2446  return commonDom;
2447} //findNearestCommonPostDom
2448
2449} //end namespace llvm
2450
2451//todo: move-end
2452
2453
2454//===----------------------------------------------------------------------===//
2455//
2456// CFGStructurizer for AMDGPU
2457//
2458//===----------------------------------------------------------------------===//
2459
2460
2461using namespace llvmCFGStruct;
2462
2463namespace llvm {
2464class AMDGPUCFGStructurizer : public MachineFunctionPass {
2465public:
2466  typedef MachineInstr              InstructionType;
2467  typedef MachineFunction           FunctionType;
2468  typedef MachineBasicBlock         BlockType;
2469  typedef MachineLoopInfo           LoopinfoType;
2470  typedef MachineDominatorTree      DominatortreeType;
2471  typedef MachinePostDominatorTree  PostDominatortreeType;
2472  typedef MachineDomTreeNode        DomTreeNodeType;
2473  typedef MachineLoop               LoopType;
2474
2475protected:
2476  TargetMachine &TM;
2477  const TargetInstrInfo *TII;
2478  const AMDGPURegisterInfo *TRI;
2479
2480public:
2481  AMDGPUCFGStructurizer(char &pid, TargetMachine &tm);
2482  const TargetInstrInfo *getTargetInstrInfo() const;
2483
2484private:
2485
2486};
2487
2488} //end of namespace llvm
2489AMDGPUCFGStructurizer::AMDGPUCFGStructurizer(char &pid, TargetMachine &tm)
2490: MachineFunctionPass(pid), TM(tm), TII(tm.getInstrInfo()),
2491  TRI(static_cast<const AMDGPURegisterInfo *>(tm.getRegisterInfo())) {
2492}
2493
2494const TargetInstrInfo *AMDGPUCFGStructurizer::getTargetInstrInfo() const {
2495  return TII;
2496}
2497//===----------------------------------------------------------------------===//
2498//
2499// CFGPrepare
2500//
2501//===----------------------------------------------------------------------===//
2502
2503
2504using namespace llvmCFGStruct;
2505
2506namespace llvm {
2507class AMDGPUCFGPrepare : public AMDGPUCFGStructurizer {
2508public:
2509  static char ID;
2510
2511public:
2512  AMDGPUCFGPrepare(TargetMachine &tm);
2513
2514  virtual const char *getPassName() const;
2515  virtual void getAnalysisUsage(AnalysisUsage &AU) const;
2516
2517  bool runOnMachineFunction(MachineFunction &F);
2518
2519private:
2520
2521};
2522
2523char AMDGPUCFGPrepare::ID = 0;
2524} //end of namespace llvm
2525
2526AMDGPUCFGPrepare::AMDGPUCFGPrepare(TargetMachine &tm)
2527  : AMDGPUCFGStructurizer(ID, tm )  {
2528}
2529const char *AMDGPUCFGPrepare::getPassName() const {
2530  return "AMD IL Control Flow Graph Preparation Pass";
2531}
2532
2533void AMDGPUCFGPrepare::getAnalysisUsage(AnalysisUsage &AU) const {
2534  AU.addPreserved<MachineFunctionAnalysis>();
2535  AU.addRequired<MachineFunctionAnalysis>();
2536  AU.addRequired<MachineDominatorTree>();
2537  AU.addRequired<MachinePostDominatorTree>();
2538  AU.addRequired<MachineLoopInfo>();
2539}
2540
2541//===----------------------------------------------------------------------===//
2542//
2543// CFGPerform
2544//
2545//===----------------------------------------------------------------------===//
2546
2547
2548using namespace llvmCFGStruct;
2549
2550namespace llvm {
2551class AMDGPUCFGPerform : public AMDGPUCFGStructurizer {
2552public:
2553  static char ID;
2554
2555public:
2556  AMDGPUCFGPerform(TargetMachine &tm);
2557  virtual const char *getPassName() const;
2558  virtual void getAnalysisUsage(AnalysisUsage &AU) const;
2559  bool runOnMachineFunction(MachineFunction &F);
2560
2561private:
2562
2563};
2564
2565char AMDGPUCFGPerform::ID = 0;
2566} //end of namespace llvm
2567
2568  AMDGPUCFGPerform::AMDGPUCFGPerform(TargetMachine &tm)
2569: AMDGPUCFGStructurizer(ID, tm) {
2570}
2571
2572const char *AMDGPUCFGPerform::getPassName() const {
2573  return "AMD IL Control Flow Graph structurizer Pass";
2574}
2575
2576void AMDGPUCFGPerform::getAnalysisUsage(AnalysisUsage &AU) const {
2577  AU.addPreserved<MachineFunctionAnalysis>();
2578  AU.addRequired<MachineFunctionAnalysis>();
2579  AU.addRequired<MachineDominatorTree>();
2580  AU.addRequired<MachinePostDominatorTree>();
2581  AU.addRequired<MachineLoopInfo>();
2582}
2583
2584//===----------------------------------------------------------------------===//
2585//
2586// CFGStructTraits<AMDGPUCFGStructurizer>
2587//
2588//===----------------------------------------------------------------------===//
2589
2590namespace llvmCFGStruct {
2591// this class is tailor to the AMDGPU backend
2592template<>
2593struct CFGStructTraits<AMDGPUCFGStructurizer> {
2594  typedef int RegiT;
2595
2596  static int getBranchNzeroOpcode(int oldOpcode) {
2597    switch(oldOpcode) {
2598    case AMDGPU::JUMP_COND:
2599    case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET;
2600    case AMDGPU::BRANCH_COND_i32:
2601    case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALNZ_f32;
2602    default:
2603      assert(0 && "internal error");
2604    }
2605    return -1;
2606  }
2607
2608  static int getBranchZeroOpcode(int oldOpcode) {
2609    switch(oldOpcode) {
2610    case AMDGPU::JUMP_COND:
2611    case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET;
2612    case AMDGPU::BRANCH_COND_i32:
2613    case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALZ_f32;
2614    default:
2615      assert(0 && "internal error");
2616    }
2617    return -1;
2618  }
2619
2620  static int getContinueNzeroOpcode(int oldOpcode) {
2621    switch(oldOpcode) {
2622    case AMDGPU::JUMP_COND:
2623    case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALNZ_i32;
2624    default:
2625      assert(0 && "internal error");
2626    };
2627    return -1;
2628  }
2629
2630  static int getContinueZeroOpcode(int oldOpcode) {
2631    switch(oldOpcode) {
2632    case AMDGPU::JUMP_COND:
2633    case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALZ_i32;
2634    default:
2635      assert(0 && "internal error");
2636    }
2637    return -1;
2638  }
2639
2640  static MachineBasicBlock *getTrueBranch(MachineInstr *instr) {
2641    return instr->getOperand(0).getMBB();
2642  }
2643
2644  static void setTrueBranch(MachineInstr *instr, MachineBasicBlock *blk) {
2645    instr->getOperand(0).setMBB(blk);
2646  }
2647
2648  static MachineBasicBlock *
2649  getFalseBranch(MachineBasicBlock *blk, MachineInstr *instr) {
2650    assert(blk->succ_size() == 2);
2651    MachineBasicBlock *trueBranch = getTrueBranch(instr);
2652    MachineBasicBlock::succ_iterator iter = blk->succ_begin();
2653    MachineBasicBlock::succ_iterator iterNext = iter;
2654    ++iterNext;
2655
2656    return (*iter == trueBranch) ? *iterNext : *iter;
2657  }
2658
2659  static bool isCondBranch(MachineInstr *instr) {
2660    switch (instr->getOpcode()) {
2661      case AMDGPU::JUMP_COND:
2662      case AMDGPU::BRANCH_COND_i32:
2663      case AMDGPU::BRANCH_COND_f32:
2664      break;
2665    default:
2666      return false;
2667    }
2668    return true;
2669  }
2670
2671  static bool isUncondBranch(MachineInstr *instr) {
2672    switch (instr->getOpcode()) {
2673    case AMDGPU::JUMP:
2674    case AMDGPU::BRANCH:
2675      return true;
2676    default:
2677      return false;
2678    }
2679    return true;
2680  }
2681
2682  static DebugLoc getLastDebugLocInBB(MachineBasicBlock *blk) {
2683    //get DebugLoc from the first MachineBasicBlock instruction with debug info
2684    DebugLoc DL;
2685    for (MachineBasicBlock::iterator iter = blk->begin(); iter != blk->end(); ++iter) {
2686      MachineInstr *instr = &(*iter);
2687      if (instr->getDebugLoc().isUnknown() == false) {
2688        DL = instr->getDebugLoc();
2689      }
2690    }
2691    return DL;
2692  }
2693
2694  static MachineInstr *getNormalBlockBranchInstr(MachineBasicBlock *blk) {
2695    MachineBasicBlock::reverse_iterator iter = blk->rbegin();
2696    MachineInstr *instr = &*iter;
2697    if (instr && (isCondBranch(instr) || isUncondBranch(instr))) {
2698      return instr;
2699    }
2700    return NULL;
2701  }
2702
2703  // The correct naming for this is getPossibleLoopendBlockBranchInstr.
2704  //
2705  // BB with backward-edge could have move instructions after the branch
2706  // instruction.  Such move instruction "belong to" the loop backward-edge.
2707  //
2708  static MachineInstr *getLoopendBlockBranchInstr(MachineBasicBlock *blk) {
2709    const AMDGPUInstrInfo * TII = static_cast<const AMDGPUInstrInfo *>(
2710                                  blk->getParent()->getTarget().getInstrInfo());
2711
2712    for (MachineBasicBlock::reverse_iterator iter = blk->rbegin(),
2713         iterEnd = blk->rend(); iter != iterEnd; ++iter) {
2714      // FIXME: Simplify
2715      MachineInstr *instr = &*iter;
2716      if (instr) {
2717        if (isCondBranch(instr) || isUncondBranch(instr)) {
2718          return instr;
2719        } else if (!TII->isMov(instr->getOpcode())) {
2720          break;
2721        }
2722      }
2723    }
2724    return NULL;
2725  }
2726
2727  static MachineInstr *getReturnInstr(MachineBasicBlock *blk) {
2728    MachineBasicBlock::reverse_iterator iter = blk->rbegin();
2729    if (iter != blk->rend()) {
2730      MachineInstr *instr = &(*iter);
2731      if (instr->getOpcode() == AMDGPU::RETURN) {
2732        return instr;
2733      }
2734    }
2735    return NULL;
2736  }
2737
2738  static MachineInstr *getContinueInstr(MachineBasicBlock *blk) {
2739    MachineBasicBlock::reverse_iterator iter = blk->rbegin();
2740    if (iter != blk->rend()) {
2741      MachineInstr *instr = &(*iter);
2742      if (instr->getOpcode() == AMDGPU::CONTINUE) {
2743        return instr;
2744      }
2745    }
2746    return NULL;
2747  }
2748
2749  static MachineInstr *getLoopBreakInstr(MachineBasicBlock *blk) {
2750    for (MachineBasicBlock::iterator iter = blk->begin(); (iter != blk->end()); ++iter) {
2751      MachineInstr *instr = &(*iter);
2752      if (instr->getOpcode() == AMDGPU::PREDICATED_BREAK) {
2753        return instr;
2754      }
2755    }
2756    return NULL;
2757  }
2758
2759  static bool isReturnBlock(MachineBasicBlock *blk) {
2760    MachineInstr *instr = getReturnInstr(blk);
2761    bool isReturn = (blk->succ_size() == 0);
2762    if (instr) {
2763      assert(isReturn);
2764    } else if (isReturn) {
2765      if (DEBUGME) {
2766        errs() << "BB" << blk->getNumber()
2767               <<" is return block without RETURN instr\n";
2768      }
2769    }
2770
2771    return  isReturn;
2772  }
2773
2774  static MachineBasicBlock::iterator
2775  getInstrPos(MachineBasicBlock *blk, MachineInstr *instr) {
2776    assert(instr->getParent() == blk && "instruction doesn't belong to block");
2777    MachineBasicBlock::iterator iter = blk->begin();
2778    MachineBasicBlock::iterator iterEnd = blk->end();
2779    while (&(*iter) != instr && iter != iterEnd) {
2780      ++iter;
2781    }
2782
2783    assert(iter != iterEnd);
2784    return iter;
2785  }//getInstrPos
2786
2787  static MachineInstr *insertInstrBefore(MachineBasicBlock *blk, int newOpcode,
2788                                         AMDGPUCFGStructurizer *passRep) {
2789    return insertInstrBefore(blk,newOpcode,passRep,DebugLoc());
2790  } //insertInstrBefore
2791
2792  static MachineInstr *insertInstrBefore(MachineBasicBlock *blk, int newOpcode,
2793                                         AMDGPUCFGStructurizer *passRep, DebugLoc DL) {
2794    const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
2795    MachineInstr *newInstr =
2796      blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DL);
2797
2798    MachineBasicBlock::iterator res;
2799    if (blk->begin() != blk->end()) {
2800      blk->insert(blk->begin(), newInstr);
2801    } else {
2802      blk->push_back(newInstr);
2803    }
2804
2805    SHOWNEWINSTR(newInstr);
2806
2807    return newInstr;
2808  } //insertInstrBefore
2809
2810  static void insertInstrEnd(MachineBasicBlock *blk, int newOpcode,
2811                             AMDGPUCFGStructurizer *passRep) {
2812    insertInstrEnd(blk,newOpcode,passRep,DebugLoc());
2813  } //insertInstrEnd
2814
2815  static void insertInstrEnd(MachineBasicBlock *blk, int newOpcode,
2816                             AMDGPUCFGStructurizer *passRep, DebugLoc DL) {
2817    const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
2818   MachineInstr *newInstr = blk->getParent()
2819      ->CreateMachineInstr(tii->get(newOpcode), DL);
2820
2821    blk->push_back(newInstr);
2822    //assume the instruction doesn't take any reg operand ...
2823
2824    SHOWNEWINSTR(newInstr);
2825  } //insertInstrEnd
2826
2827  static MachineInstr *insertInstrBefore(MachineBasicBlock::iterator instrPos,
2828                                         int newOpcode,
2829                                         AMDGPUCFGStructurizer *passRep) {
2830    MachineInstr *oldInstr = &(*instrPos);
2831    const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
2832    MachineBasicBlock *blk = oldInstr->getParent();
2833    MachineInstr *newInstr =
2834      blk->getParent()->CreateMachineInstr(tii->get(newOpcode),
2835                                           DebugLoc());
2836
2837    blk->insert(instrPos, newInstr);
2838    //assume the instruction doesn't take any reg operand ...
2839
2840    SHOWNEWINSTR(newInstr);
2841    return newInstr;
2842  } //insertInstrBefore
2843
2844  static void insertCondBranchBefore(MachineBasicBlock::iterator instrPos,
2845                                     int newOpcode,
2846                                     AMDGPUCFGStructurizer *passRep,
2847                                     DebugLoc DL) {
2848    MachineInstr *oldInstr = &(*instrPos);
2849    const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
2850    MachineBasicBlock *blk = oldInstr->getParent();
2851    MachineFunction *MF = blk->getParent();
2852    MachineInstr *newInstr = MF->CreateMachineInstr(tii->get(newOpcode), DL);
2853
2854    blk->insert(instrPos, newInstr);
2855    MachineInstrBuilder MIB(*MF, newInstr);
2856    MIB.addReg(oldInstr->getOperand(1).getReg(), false);
2857
2858    SHOWNEWINSTR(newInstr);
2859    //erase later oldInstr->eraseFromParent();
2860  } //insertCondBranchBefore
2861
2862  static void insertCondBranchBefore(MachineBasicBlock *blk,
2863                                     MachineBasicBlock::iterator insertPos,
2864                                     int newOpcode,
2865                                     AMDGPUCFGStructurizer *passRep,
2866                                     RegiT regNum,
2867                                     DebugLoc DL) {
2868    const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
2869    MachineFunction *MF = blk->getParent();
2870
2871    MachineInstr *newInstr = MF->CreateMachineInstr(tii->get(newOpcode), DL);
2872
2873    //insert before
2874    blk->insert(insertPos, newInstr);
2875    MachineInstrBuilder(*MF, newInstr).addReg(regNum, false);
2876
2877    SHOWNEWINSTR(newInstr);
2878  } //insertCondBranchBefore
2879
2880  static void insertCondBranchEnd(MachineBasicBlock *blk,
2881                                  int newOpcode,
2882                                  AMDGPUCFGStructurizer *passRep,
2883                                  RegiT regNum) {
2884    const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
2885    MachineFunction *MF = blk->getParent();
2886    MachineInstr *newInstr =
2887      MF->CreateMachineInstr(tii->get(newOpcode), DebugLoc());
2888
2889    blk->push_back(newInstr);
2890    MachineInstrBuilder(*MF, newInstr).addReg(regNum, false);
2891
2892    SHOWNEWINSTR(newInstr);
2893  } //insertCondBranchEnd
2894
2895
2896  static void insertAssignInstrBefore(MachineBasicBlock::iterator instrPos,
2897                                      AMDGPUCFGStructurizer *passRep,
2898                                      RegiT regNum, int regVal) {
2899    MachineInstr *oldInstr = &(*instrPos);
2900    const AMDGPUInstrInfo *tii =
2901             static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
2902    MachineBasicBlock *blk = oldInstr->getParent();
2903    MachineInstr *newInstr = tii->getMovImmInstr(blk->getParent(), regNum,
2904                                                 regVal);
2905    blk->insert(instrPos, newInstr);
2906
2907    SHOWNEWINSTR(newInstr);
2908  } //insertAssignInstrBefore
2909
2910  static void insertAssignInstrBefore(MachineBasicBlock *blk,
2911                                      AMDGPUCFGStructurizer *passRep,
2912                                      RegiT regNum, int regVal) {
2913    const AMDGPUInstrInfo *tii =
2914             static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
2915
2916    MachineInstr *newInstr = tii->getMovImmInstr(blk->getParent(), regNum,
2917                                                 regVal);
2918    if (blk->begin() != blk->end()) {
2919      blk->insert(blk->begin(), newInstr);
2920    } else {
2921      blk->push_back(newInstr);
2922    }
2923
2924    SHOWNEWINSTR(newInstr);
2925
2926  } //insertInstrBefore
2927
2928  static void insertCompareInstrBefore(MachineBasicBlock *blk,
2929                                       MachineBasicBlock::iterator instrPos,
2930                                       AMDGPUCFGStructurizer *passRep,
2931                                       RegiT dstReg, RegiT src1Reg,
2932                                       RegiT src2Reg) {
2933    const AMDGPUInstrInfo *tii =
2934             static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
2935    MachineFunction *MF = blk->getParent();
2936    MachineInstr *newInstr =
2937      MF->CreateMachineInstr(tii->get(tii->getIEQOpcode()), DebugLoc());
2938
2939    MachineInstrBuilder MIB(*MF, newInstr);
2940    MIB.addReg(dstReg, RegState::Define); //set target
2941    MIB.addReg(src1Reg); //set src value
2942    MIB.addReg(src2Reg); //set src value
2943
2944    blk->insert(instrPos, newInstr);
2945    SHOWNEWINSTR(newInstr);
2946
2947  } //insertCompareInstrBefore
2948
2949  static void cloneSuccessorList(MachineBasicBlock *dstBlk,
2950                                 MachineBasicBlock *srcBlk) {
2951    for (MachineBasicBlock::succ_iterator iter = srcBlk->succ_begin(),
2952         iterEnd = srcBlk->succ_end(); iter != iterEnd; ++iter) {
2953      dstBlk->addSuccessor(*iter);  // *iter's predecessor is also taken care of
2954    }
2955  } //cloneSuccessorList
2956
2957  static MachineBasicBlock *clone(MachineBasicBlock *srcBlk) {
2958    MachineFunction *func = srcBlk->getParent();
2959    MachineBasicBlock *newBlk = func->CreateMachineBasicBlock();
2960    func->push_back(newBlk);  //insert to function
2961    for (MachineBasicBlock::iterator iter = srcBlk->begin(),
2962         iterEnd = srcBlk->end();
2963         iter != iterEnd; ++iter) {
2964      MachineInstr *instr = func->CloneMachineInstr(iter);
2965      newBlk->push_back(instr);
2966    }
2967    return newBlk;
2968  }
2969
2970  //MachineBasicBlock::ReplaceUsesOfBlockWith doesn't serve the purpose because
2971  //the AMDGPU instruction is not recognized as terminator fix this and retire
2972  //this routine
2973  static void replaceInstrUseOfBlockWith(MachineBasicBlock *srcBlk,
2974                                         MachineBasicBlock *oldBlk,
2975                                         MachineBasicBlock *newBlk) {
2976    MachineInstr *branchInstr = getLoopendBlockBranchInstr(srcBlk);
2977    if (branchInstr && isCondBranch(branchInstr) &&
2978        getTrueBranch(branchInstr) == oldBlk) {
2979      setTrueBranch(branchInstr, newBlk);
2980    }
2981  }
2982
2983  static void wrapup(MachineBasicBlock *entryBlk) {
2984    assert((!entryBlk->getParent()->getJumpTableInfo()
2985            || entryBlk->getParent()->getJumpTableInfo()->isEmpty())
2986           && "found a jump table");
2987
2988     //collect continue right before endloop
2989     SmallVector<MachineInstr *, DEFAULT_VEC_SLOTS> contInstr;
2990     MachineBasicBlock::iterator pre = entryBlk->begin();
2991     MachineBasicBlock::iterator iterEnd = entryBlk->end();
2992     MachineBasicBlock::iterator iter = pre;
2993     while (iter != iterEnd) {
2994       if (pre->getOpcode() == AMDGPU::CONTINUE
2995           && iter->getOpcode() == AMDGPU::ENDLOOP) {
2996         contInstr.push_back(pre);
2997       }
2998       pre = iter;
2999       ++iter;
3000     } //end while
3001
3002     //delete continue right before endloop
3003     for (unsigned i = 0; i < contInstr.size(); ++i) {
3004        contInstr[i]->eraseFromParent();
3005     }
3006
3007     // TODO to fix up jump table so later phase won't be confused.  if
3008     // (jumpTableInfo->isEmpty() == false) { need to clean the jump table, but
3009     // there isn't such an interface yet.  alternatively, replace all the other
3010     // blocks in the jump table with the entryBlk //}
3011
3012  } //wrapup
3013
3014  static MachineDominatorTree *getDominatorTree(AMDGPUCFGStructurizer &pass) {
3015    return &pass.getAnalysis<MachineDominatorTree>();
3016  }
3017
3018  static MachinePostDominatorTree*
3019  getPostDominatorTree(AMDGPUCFGStructurizer &pass) {
3020    return &pass.getAnalysis<MachinePostDominatorTree>();
3021  }
3022
3023  static MachineLoopInfo *getLoopInfo(AMDGPUCFGStructurizer &pass) {
3024    return &pass.getAnalysis<MachineLoopInfo>();
3025  }
3026}; // template class CFGStructTraits
3027} //end of namespace llvm
3028
3029// createAMDGPUCFGPreparationPass- Returns a pass
3030FunctionPass *llvm::createAMDGPUCFGPreparationPass(TargetMachine &tm
3031                                                 ) {
3032  return new AMDGPUCFGPrepare(tm );
3033}
3034
3035bool AMDGPUCFGPrepare::runOnMachineFunction(MachineFunction &func) {
3036  return llvmCFGStruct::CFGStructurizer<AMDGPUCFGStructurizer>().prepare(func,
3037                                                                        *this,
3038                                                                        TRI);
3039}
3040
3041// createAMDGPUCFGStructurizerPass- Returns a pass
3042FunctionPass *llvm::createAMDGPUCFGStructurizerPass(TargetMachine &tm
3043                                                  ) {
3044  return new AMDGPUCFGPerform(tm );
3045}
3046
3047bool AMDGPUCFGPerform::runOnMachineFunction(MachineFunction &func) {
3048  return llvmCFGStruct::CFGStructurizer<AMDGPUCFGStructurizer>().run(func,
3049                                                                    *this,
3050                                                                    TRI);
3051}
3052