AMDGPUMachineCFGStructurizer.cpp revision 360784
1//===- AMDGPUMachineCFGStructurizer.cpp - Machine code if conversion pass. ===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the machine instruction level CFG structurizer pass.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AMDGPU.h"
14#include "AMDGPUSubtarget.h"
15#include "SIInstrInfo.h"
16#include "llvm/ADT/ArrayRef.h"
17#include "llvm/ADT/DenseMap.h"
18#include "llvm/ADT/DenseSet.h"
19#include "llvm/ADT/PostOrderIterator.h"
20#include "llvm/ADT/SetVector.h"
21#include "llvm/ADT/SmallPtrSet.h"
22#include "llvm/ADT/SmallVector.h"
23#include "llvm/CodeGen/MachineBasicBlock.h"
24#include "llvm/CodeGen/MachineFunction.h"
25#include "llvm/CodeGen/MachineFunctionPass.h"
26#include "llvm/CodeGen/MachineInstr.h"
27#include "llvm/CodeGen/MachineInstrBuilder.h"
28#include "llvm/CodeGen/MachineOperand.h"
29#include "llvm/CodeGen/MachineRegionInfo.h"
30#include "llvm/CodeGen/MachineRegisterInfo.h"
31#include "llvm/CodeGen/TargetOpcodes.h"
32#include "llvm/CodeGen/TargetRegisterInfo.h"
33#include "llvm/Config/llvm-config.h"
34#include "llvm/IR/DebugLoc.h"
35#include "llvm/InitializePasses.h"
36#include "llvm/Pass.h"
37#include "llvm/Support/Compiler.h"
38#include "llvm/Support/Debug.h"
39#include "llvm/Support/ErrorHandling.h"
40#include "llvm/Support/raw_ostream.h"
41#include <cassert>
42#include <tuple>
43#include <utility>
44
45using namespace llvm;
46
47#define DEBUG_TYPE "amdgpucfgstructurizer"
48
49namespace {
50
51class PHILinearizeDestIterator;
52
53class PHILinearize {
54  friend class PHILinearizeDestIterator;
55
56public:
57  using PHISourceT = std::pair<unsigned, MachineBasicBlock *>;
58
59private:
60  using PHISourcesT = DenseSet<PHISourceT>;
61  using PHIInfoElementT = struct {
62    unsigned DestReg;
63    DebugLoc DL;
64    PHISourcesT Sources;
65  };
66  using PHIInfoT = SmallPtrSet<PHIInfoElementT *, 2>;
67  PHIInfoT PHIInfo;
68
69  static unsigned phiInfoElementGetDest(PHIInfoElementT *Info);
70  static void phiInfoElementSetDef(PHIInfoElementT *Info, unsigned NewDef);
71  static PHISourcesT &phiInfoElementGetSources(PHIInfoElementT *Info);
72  static void phiInfoElementAddSource(PHIInfoElementT *Info, unsigned SourceReg,
73                                      MachineBasicBlock *SourceMBB);
74  static void phiInfoElementRemoveSource(PHIInfoElementT *Info,
75                                         unsigned SourceReg,
76                                         MachineBasicBlock *SourceMBB);
77  PHIInfoElementT *findPHIInfoElement(unsigned DestReg);
78  PHIInfoElementT *findPHIInfoElementFromSource(unsigned SourceReg,
79                                                MachineBasicBlock *SourceMBB);
80
81public:
82  bool findSourcesFromMBB(MachineBasicBlock *SourceMBB,
83                          SmallVector<unsigned, 4> &Sources);
84  void addDest(unsigned DestReg, const DebugLoc &DL);
85  void replaceDef(unsigned OldDestReg, unsigned NewDestReg);
86  void deleteDef(unsigned DestReg);
87  void addSource(unsigned DestReg, unsigned SourceReg,
88                 MachineBasicBlock *SourceMBB);
89  void removeSource(unsigned DestReg, unsigned SourceReg,
90                    MachineBasicBlock *SourceMBB = nullptr);
91  bool findDest(unsigned SourceReg, MachineBasicBlock *SourceMBB,
92                unsigned &DestReg);
93  bool isSource(unsigned Reg, MachineBasicBlock *SourceMBB = nullptr);
94  unsigned getNumSources(unsigned DestReg);
95  void dump(MachineRegisterInfo *MRI);
96  void clear();
97
98  using source_iterator = PHISourcesT::iterator;
99  using dest_iterator = PHILinearizeDestIterator;
100
101  dest_iterator dests_begin();
102  dest_iterator dests_end();
103
104  source_iterator sources_begin(unsigned Reg);
105  source_iterator sources_end(unsigned Reg);
106};
107
108class PHILinearizeDestIterator {
109private:
110  PHILinearize::PHIInfoT::iterator Iter;
111
112public:
113  PHILinearizeDestIterator(PHILinearize::PHIInfoT::iterator I) : Iter(I) {}
114
115  unsigned operator*() { return PHILinearize::phiInfoElementGetDest(*Iter); }
116  PHILinearizeDestIterator &operator++() {
117    ++Iter;
118    return *this;
119  }
120  bool operator==(const PHILinearizeDestIterator &I) const {
121    return I.Iter == Iter;
122  }
123  bool operator!=(const PHILinearizeDestIterator &I) const {
124    return I.Iter != Iter;
125  }
126};
127
128} // end anonymous namespace
129
130unsigned PHILinearize::phiInfoElementGetDest(PHIInfoElementT *Info) {
131  return Info->DestReg;
132}
133
134void PHILinearize::phiInfoElementSetDef(PHIInfoElementT *Info,
135                                        unsigned NewDef) {
136  Info->DestReg = NewDef;
137}
138
139PHILinearize::PHISourcesT &
140PHILinearize::phiInfoElementGetSources(PHIInfoElementT *Info) {
141  return Info->Sources;
142}
143
144void PHILinearize::phiInfoElementAddSource(PHIInfoElementT *Info,
145                                           unsigned SourceReg,
146                                           MachineBasicBlock *SourceMBB) {
147  // Assertion ensures we don't use the same SourceMBB for the
148  // sources, because we cannot have different registers with
149  // identical predecessors, but we can have the same register for
150  // multiple predecessors.
151#if !defined(NDEBUG)
152  for (auto SI : phiInfoElementGetSources(Info)) {
153    assert((SI.second != SourceMBB || SourceReg == SI.first));
154  }
155#endif
156
157  phiInfoElementGetSources(Info).insert(PHISourceT(SourceReg, SourceMBB));
158}
159
160void PHILinearize::phiInfoElementRemoveSource(PHIInfoElementT *Info,
161                                              unsigned SourceReg,
162                                              MachineBasicBlock *SourceMBB) {
163  auto &Sources = phiInfoElementGetSources(Info);
164  SmallVector<PHISourceT, 4> ElimiatedSources;
165  for (auto SI : Sources) {
166    if (SI.first == SourceReg &&
167        (SI.second == nullptr || SI.second == SourceMBB)) {
168      ElimiatedSources.push_back(PHISourceT(SI.first, SI.second));
169    }
170  }
171
172  for (auto &Source : ElimiatedSources) {
173    Sources.erase(Source);
174  }
175}
176
177PHILinearize::PHIInfoElementT *
178PHILinearize::findPHIInfoElement(unsigned DestReg) {
179  for (auto I : PHIInfo) {
180    if (phiInfoElementGetDest(I) == DestReg) {
181      return I;
182    }
183  }
184  return nullptr;
185}
186
187PHILinearize::PHIInfoElementT *
188PHILinearize::findPHIInfoElementFromSource(unsigned SourceReg,
189                                           MachineBasicBlock *SourceMBB) {
190  for (auto I : PHIInfo) {
191    for (auto SI : phiInfoElementGetSources(I)) {
192      if (SI.first == SourceReg &&
193          (SI.second == nullptr || SI.second == SourceMBB)) {
194        return I;
195      }
196    }
197  }
198  return nullptr;
199}
200
201bool PHILinearize::findSourcesFromMBB(MachineBasicBlock *SourceMBB,
202                                      SmallVector<unsigned, 4> &Sources) {
203  bool FoundSource = false;
204  for (auto I : PHIInfo) {
205    for (auto SI : phiInfoElementGetSources(I)) {
206      if (SI.second == SourceMBB) {
207        FoundSource = true;
208        Sources.push_back(SI.first);
209      }
210    }
211  }
212  return FoundSource;
213}
214
215void PHILinearize::addDest(unsigned DestReg, const DebugLoc &DL) {
216  assert(findPHIInfoElement(DestReg) == nullptr && "Dest already exsists");
217  PHISourcesT EmptySet;
218  PHIInfoElementT *NewElement = new PHIInfoElementT();
219  NewElement->DestReg = DestReg;
220  NewElement->DL = DL;
221  NewElement->Sources = EmptySet;
222  PHIInfo.insert(NewElement);
223}
224
225void PHILinearize::replaceDef(unsigned OldDestReg, unsigned NewDestReg) {
226  phiInfoElementSetDef(findPHIInfoElement(OldDestReg), NewDestReg);
227}
228
229void PHILinearize::deleteDef(unsigned DestReg) {
230  PHIInfoElementT *InfoElement = findPHIInfoElement(DestReg);
231  PHIInfo.erase(InfoElement);
232  delete InfoElement;
233}
234
235void PHILinearize::addSource(unsigned DestReg, unsigned SourceReg,
236                             MachineBasicBlock *SourceMBB) {
237  phiInfoElementAddSource(findPHIInfoElement(DestReg), SourceReg, SourceMBB);
238}
239
240void PHILinearize::removeSource(unsigned DestReg, unsigned SourceReg,
241                                MachineBasicBlock *SourceMBB) {
242  phiInfoElementRemoveSource(findPHIInfoElement(DestReg), SourceReg, SourceMBB);
243}
244
245bool PHILinearize::findDest(unsigned SourceReg, MachineBasicBlock *SourceMBB,
246                            unsigned &DestReg) {
247  PHIInfoElementT *InfoElement =
248      findPHIInfoElementFromSource(SourceReg, SourceMBB);
249  if (InfoElement != nullptr) {
250    DestReg = phiInfoElementGetDest(InfoElement);
251    return true;
252  }
253  return false;
254}
255
256bool PHILinearize::isSource(unsigned Reg, MachineBasicBlock *SourceMBB) {
257  unsigned DestReg;
258  return findDest(Reg, SourceMBB, DestReg);
259}
260
261unsigned PHILinearize::getNumSources(unsigned DestReg) {
262  return phiInfoElementGetSources(findPHIInfoElement(DestReg)).size();
263}
264
265#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
266LLVM_DUMP_METHOD void PHILinearize::dump(MachineRegisterInfo *MRI) {
267  const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
268  dbgs() << "=PHIInfo Start=\n";
269  for (auto PII : this->PHIInfo) {
270    PHIInfoElementT &Element = *PII;
271    dbgs() << "Dest: " << printReg(Element.DestReg, TRI)
272           << " Sources: {";
273    for (auto &SI : Element.Sources) {
274      dbgs() << printReg(SI.first, TRI) << '(' << printMBBReference(*SI.second)
275             << "),";
276    }
277    dbgs() << "}\n";
278  }
279  dbgs() << "=PHIInfo End=\n";
280}
281#endif
282
283void PHILinearize::clear() { PHIInfo = PHIInfoT(); }
284
285PHILinearize::dest_iterator PHILinearize::dests_begin() {
286  return PHILinearizeDestIterator(PHIInfo.begin());
287}
288
289PHILinearize::dest_iterator PHILinearize::dests_end() {
290  return PHILinearizeDestIterator(PHIInfo.end());
291}
292
293PHILinearize::source_iterator PHILinearize::sources_begin(unsigned Reg) {
294  auto InfoElement = findPHIInfoElement(Reg);
295  return phiInfoElementGetSources(InfoElement).begin();
296}
297
298PHILinearize::source_iterator PHILinearize::sources_end(unsigned Reg) {
299  auto InfoElement = findPHIInfoElement(Reg);
300  return phiInfoElementGetSources(InfoElement).end();
301}
302
303static unsigned getPHINumInputs(MachineInstr &PHI) {
304  assert(PHI.isPHI());
305  return (PHI.getNumOperands() - 1) / 2;
306}
307
308static MachineBasicBlock *getPHIPred(MachineInstr &PHI, unsigned Index) {
309  assert(PHI.isPHI());
310  return PHI.getOperand(Index * 2 + 2).getMBB();
311}
312
313static void setPhiPred(MachineInstr &PHI, unsigned Index,
314                       MachineBasicBlock *NewPred) {
315  PHI.getOperand(Index * 2 + 2).setMBB(NewPred);
316}
317
318static unsigned getPHISourceReg(MachineInstr &PHI, unsigned Index) {
319  assert(PHI.isPHI());
320  return PHI.getOperand(Index * 2 + 1).getReg();
321}
322
323static unsigned getPHIDestReg(MachineInstr &PHI) {
324  assert(PHI.isPHI());
325  return PHI.getOperand(0).getReg();
326}
327
328namespace {
329
330class RegionMRT;
331class MBBMRT;
332
333class LinearizedRegion {
334protected:
335  MachineBasicBlock *Entry;
336  // The exit block is part of the region, and is the last
337  // merge block before exiting the region.
338  MachineBasicBlock *Exit;
339  DenseSet<unsigned> LiveOuts;
340  SmallPtrSet<MachineBasicBlock *, 1> MBBs;
341  bool HasLoop;
342  LinearizedRegion *Parent;
343  RegionMRT *RMRT;
344
345  void storeLiveOutReg(MachineBasicBlock *MBB, unsigned Reg,
346                       MachineInstr *DefInstr, const MachineRegisterInfo *MRI,
347                       const TargetRegisterInfo *TRI, PHILinearize &PHIInfo);
348
349  void storeLiveOutRegRegion(RegionMRT *Region, unsigned Reg,
350                             MachineInstr *DefInstr,
351                             const MachineRegisterInfo *MRI,
352                             const TargetRegisterInfo *TRI,
353                             PHILinearize &PHIInfo);
354
355  void storeMBBLiveOuts(MachineBasicBlock *MBB, const MachineRegisterInfo *MRI,
356                        const TargetRegisterInfo *TRI, PHILinearize &PHIInfo,
357                        RegionMRT *TopRegion);
358
359  void storeLiveOuts(MachineBasicBlock *MBB, const MachineRegisterInfo *MRI,
360                     const TargetRegisterInfo *TRI, PHILinearize &PHIInfo);
361
362  void storeLiveOuts(RegionMRT *Region, const MachineRegisterInfo *MRI,
363                     const TargetRegisterInfo *TRI, PHILinearize &PHIInfo,
364                     RegionMRT *TopRegion = nullptr);
365
366public:
367  LinearizedRegion();
368  LinearizedRegion(MachineBasicBlock *MBB, const MachineRegisterInfo *MRI,
369                   const TargetRegisterInfo *TRI, PHILinearize &PHIInfo);
370  ~LinearizedRegion() = default;
371
372  void setRegionMRT(RegionMRT *Region) { RMRT = Region; }
373
374  RegionMRT *getRegionMRT() { return RMRT; }
375
376  void setParent(LinearizedRegion *P) { Parent = P; }
377
378  LinearizedRegion *getParent() { return Parent; }
379
380  void print(raw_ostream &OS, const TargetRegisterInfo *TRI = nullptr);
381
382  void setBBSelectRegIn(unsigned Reg);
383
384  unsigned getBBSelectRegIn();
385
386  void setBBSelectRegOut(unsigned Reg, bool IsLiveOut);
387
388  unsigned getBBSelectRegOut();
389
390  void setHasLoop(bool Value);
391
392  bool getHasLoop();
393
394  void addLiveOut(unsigned VReg);
395
396  void removeLiveOut(unsigned Reg);
397
398  void replaceLiveOut(unsigned OldReg, unsigned NewReg);
399
400  void replaceRegister(unsigned Register, unsigned NewRegister,
401                       MachineRegisterInfo *MRI, bool ReplaceInside,
402                       bool ReplaceOutside, bool IncludeLoopPHIs);
403
404  void replaceRegisterInsideRegion(unsigned Register, unsigned NewRegister,
405                                   bool IncludeLoopPHIs,
406                                   MachineRegisterInfo *MRI);
407
408  void replaceRegisterOutsideRegion(unsigned Register, unsigned NewRegister,
409                                    bool IncludeLoopPHIs,
410                                    MachineRegisterInfo *MRI);
411
412  DenseSet<unsigned> *getLiveOuts();
413
414  void setEntry(MachineBasicBlock *NewEntry);
415
416  MachineBasicBlock *getEntry();
417
418  void setExit(MachineBasicBlock *NewExit);
419
420  MachineBasicBlock *getExit();
421
422  void addMBB(MachineBasicBlock *MBB);
423
424  void addMBBs(LinearizedRegion *InnerRegion);
425
426  bool contains(MachineBasicBlock *MBB);
427
428  bool isLiveOut(unsigned Reg);
429
430  bool hasNoDef(unsigned Reg, MachineRegisterInfo *MRI);
431
432  void removeFalseRegisterKills(MachineRegisterInfo *MRI);
433
434  void initLiveOut(RegionMRT *Region, const MachineRegisterInfo *MRI,
435                   const TargetRegisterInfo *TRI, PHILinearize &PHIInfo);
436};
437
438class MRT {
439protected:
440  RegionMRT *Parent;
441  unsigned BBSelectRegIn;
442  unsigned BBSelectRegOut;
443
444public:
445  virtual ~MRT() = default;
446
447  unsigned getBBSelectRegIn() { return BBSelectRegIn; }
448
449  unsigned getBBSelectRegOut() { return BBSelectRegOut; }
450
451  void setBBSelectRegIn(unsigned Reg) { BBSelectRegIn = Reg; }
452
453  void setBBSelectRegOut(unsigned Reg) { BBSelectRegOut = Reg; }
454
455  virtual RegionMRT *getRegionMRT() { return nullptr; }
456
457  virtual MBBMRT *getMBBMRT() { return nullptr; }
458
459  bool isRegion() { return getRegionMRT() != nullptr; }
460
461  bool isMBB() { return getMBBMRT() != nullptr; }
462
463  bool isRoot() { return Parent == nullptr; }
464
465  void setParent(RegionMRT *Region) { Parent = Region; }
466
467  RegionMRT *getParent() { return Parent; }
468
469  static MachineBasicBlock *
470  initializeMRT(MachineFunction &MF, const MachineRegionInfo *RegionInfo,
471                DenseMap<MachineRegion *, RegionMRT *> &RegionMap);
472
473  static RegionMRT *buildMRT(MachineFunction &MF,
474                             const MachineRegionInfo *RegionInfo,
475                             const SIInstrInfo *TII,
476                             MachineRegisterInfo *MRI);
477
478  virtual void dump(const TargetRegisterInfo *TRI, int depth = 0) = 0;
479
480  void dumpDepth(int depth) {
481    for (int i = depth; i > 0; --i) {
482      dbgs() << "  ";
483    }
484  }
485};
486
487class MBBMRT : public MRT {
488  MachineBasicBlock *MBB;
489
490public:
491  MBBMRT(MachineBasicBlock *BB) : MBB(BB) {
492    setParent(nullptr);
493    setBBSelectRegOut(0);
494    setBBSelectRegIn(0);
495  }
496
497  MBBMRT *getMBBMRT() override { return this; }
498
499  MachineBasicBlock *getMBB() { return MBB; }
500
501  void dump(const TargetRegisterInfo *TRI, int depth = 0) override {
502    dumpDepth(depth);
503    dbgs() << "MBB: " << getMBB()->getNumber();
504    dbgs() << " In: " << printReg(getBBSelectRegIn(), TRI);
505    dbgs() << ", Out: " << printReg(getBBSelectRegOut(), TRI) << "\n";
506  }
507};
508
509class RegionMRT : public MRT {
510protected:
511  MachineRegion *Region;
512  LinearizedRegion *LRegion = nullptr;
513  MachineBasicBlock *Succ = nullptr;
514  SetVector<MRT *> Children;
515
516public:
517  RegionMRT(MachineRegion *MachineRegion) : Region(MachineRegion) {
518    setParent(nullptr);
519    setBBSelectRegOut(0);
520    setBBSelectRegIn(0);
521  }
522
523  ~RegionMRT() override {
524    if (LRegion) {
525      delete LRegion;
526    }
527
528    for (auto CI : Children) {
529      delete &(*CI);
530    }
531  }
532
533  RegionMRT *getRegionMRT() override { return this; }
534
535  void setLinearizedRegion(LinearizedRegion *LinearizeRegion) {
536    LRegion = LinearizeRegion;
537  }
538
539  LinearizedRegion *getLinearizedRegion() { return LRegion; }
540
541  MachineRegion *getMachineRegion() { return Region; }
542
543  unsigned getInnerOutputRegister() {
544    return (*(Children.begin()))->getBBSelectRegOut();
545  }
546
547  void addChild(MRT *Tree) { Children.insert(Tree); }
548
549  SetVector<MRT *> *getChildren() { return &Children; }
550
551  void dump(const TargetRegisterInfo *TRI, int depth = 0) override {
552    dumpDepth(depth);
553    dbgs() << "Region: " << (void *)Region;
554    dbgs() << " In: " << printReg(getBBSelectRegIn(), TRI);
555    dbgs() << ", Out: " << printReg(getBBSelectRegOut(), TRI) << "\n";
556
557    dumpDepth(depth);
558    if (getSucc())
559      dbgs() << "Succ: " << getSucc()->getNumber() << "\n";
560    else
561      dbgs() << "Succ: none \n";
562    for (auto MRTI : Children) {
563      MRTI->dump(TRI, depth + 1);
564    }
565  }
566
567  MRT *getEntryTree() { return Children.back(); }
568
569  MRT *getExitTree() { return Children.front(); }
570
571  MachineBasicBlock *getEntry() {
572    MRT *Tree = Children.back();
573    return (Tree->isRegion()) ? Tree->getRegionMRT()->getEntry()
574                              : Tree->getMBBMRT()->getMBB();
575  }
576
577  MachineBasicBlock *getExit() {
578    MRT *Tree = Children.front();
579    return (Tree->isRegion()) ? Tree->getRegionMRT()->getExit()
580                              : Tree->getMBBMRT()->getMBB();
581  }
582
583  void setSucc(MachineBasicBlock *MBB) { Succ = MBB; }
584
585  MachineBasicBlock *getSucc() { return Succ; }
586
587  bool contains(MachineBasicBlock *MBB) {
588    for (auto CI : Children) {
589      if (CI->isMBB()) {
590        if (MBB == CI->getMBBMRT()->getMBB()) {
591          return true;
592        }
593      } else {
594        if (CI->getRegionMRT()->contains(MBB)) {
595          return true;
596        } else if (CI->getRegionMRT()->getLinearizedRegion() != nullptr &&
597                   CI->getRegionMRT()->getLinearizedRegion()->contains(MBB)) {
598          return true;
599        }
600      }
601    }
602    return false;
603  }
604
605  void replaceLiveOutReg(unsigned Register, unsigned NewRegister) {
606    LinearizedRegion *LRegion = getLinearizedRegion();
607    LRegion->replaceLiveOut(Register, NewRegister);
608    for (auto &CI : Children) {
609      if (CI->isRegion()) {
610        CI->getRegionMRT()->replaceLiveOutReg(Register, NewRegister);
611      }
612    }
613  }
614};
615
616} // end anonymous namespace
617
618static unsigned createBBSelectReg(const SIInstrInfo *TII,
619                                  MachineRegisterInfo *MRI) {
620  return MRI->createVirtualRegister(TII->getPreferredSelectRegClass(32));
621}
622
623MachineBasicBlock *
624MRT::initializeMRT(MachineFunction &MF, const MachineRegionInfo *RegionInfo,
625                   DenseMap<MachineRegion *, RegionMRT *> &RegionMap) {
626  for (auto &MFI : MF) {
627    MachineBasicBlock *ExitMBB = &MFI;
628    if (ExitMBB->succ_size() == 0) {
629      return ExitMBB;
630    }
631  }
632  llvm_unreachable("CFG has no exit block");
633  return nullptr;
634}
635
636RegionMRT *MRT::buildMRT(MachineFunction &MF,
637                         const MachineRegionInfo *RegionInfo,
638                         const SIInstrInfo *TII, MachineRegisterInfo *MRI) {
639  SmallPtrSet<MachineRegion *, 4> PlacedRegions;
640  DenseMap<MachineRegion *, RegionMRT *> RegionMap;
641  MachineRegion *TopLevelRegion = RegionInfo->getTopLevelRegion();
642  RegionMRT *Result = new RegionMRT(TopLevelRegion);
643  RegionMap[TopLevelRegion] = Result;
644
645  // Insert the exit block first, we need it to be the merge node
646  // for the top level region.
647  MachineBasicBlock *Exit = initializeMRT(MF, RegionInfo, RegionMap);
648
649  unsigned BBSelectRegIn = createBBSelectReg(TII, MRI);
650  MBBMRT *ExitMRT = new MBBMRT(Exit);
651  RegionMap[RegionInfo->getRegionFor(Exit)]->addChild(ExitMRT);
652  ExitMRT->setBBSelectRegIn(BBSelectRegIn);
653
654  for (auto MBBI : post_order(&(MF.front()))) {
655    MachineBasicBlock *MBB = &(*MBBI);
656
657    // Skip Exit since we already added it
658    if (MBB == Exit) {
659      continue;
660    }
661
662    LLVM_DEBUG(dbgs() << "Visiting " << printMBBReference(*MBB) << "\n");
663    MBBMRT *NewMBB = new MBBMRT(MBB);
664    MachineRegion *Region = RegionInfo->getRegionFor(MBB);
665
666    // Ensure we have the MRT region
667    if (RegionMap.count(Region) == 0) {
668      RegionMRT *NewMRTRegion = new RegionMRT(Region);
669      RegionMap[Region] = NewMRTRegion;
670
671      // Ensure all parents are in the RegionMap
672      MachineRegion *Parent = Region->getParent();
673      while (RegionMap.count(Parent) == 0) {
674        RegionMRT *NewMRTParent = new RegionMRT(Parent);
675        NewMRTParent->addChild(NewMRTRegion);
676        NewMRTRegion->setParent(NewMRTParent);
677        RegionMap[Parent] = NewMRTParent;
678        NewMRTRegion = NewMRTParent;
679        Parent = Parent->getParent();
680      }
681      RegionMap[Parent]->addChild(NewMRTRegion);
682      NewMRTRegion->setParent(RegionMap[Parent]);
683    }
684
685    // Add MBB to Region MRT
686    RegionMap[Region]->addChild(NewMBB);
687    NewMBB->setParent(RegionMap[Region]);
688    RegionMap[Region]->setSucc(Region->getExit());
689  }
690  return Result;
691}
692
693void LinearizedRegion::storeLiveOutReg(MachineBasicBlock *MBB, unsigned Reg,
694                                       MachineInstr *DefInstr,
695                                       const MachineRegisterInfo *MRI,
696                                       const TargetRegisterInfo *TRI,
697                                       PHILinearize &PHIInfo) {
698  if (Register::isVirtualRegister(Reg)) {
699    LLVM_DEBUG(dbgs() << "Considering Register: " << printReg(Reg, TRI)
700                      << "\n");
701    // If this is a source register to a PHI we are chaining, it
702    // must be live out.
703    if (PHIInfo.isSource(Reg)) {
704      LLVM_DEBUG(dbgs() << "Add LiveOut (PHI): " << printReg(Reg, TRI) << "\n");
705      addLiveOut(Reg);
706    } else {
707      // If this is live out of the MBB
708      for (auto &UI : MRI->use_operands(Reg)) {
709        if (UI.getParent()->getParent() != MBB) {
710          LLVM_DEBUG(dbgs() << "Add LiveOut (MBB " << printMBBReference(*MBB)
711                            << "): " << printReg(Reg, TRI) << "\n");
712          addLiveOut(Reg);
713        } else {
714          // If the use is in the same MBB we have to make sure
715          // it is after the def, otherwise it is live out in a loop
716          MachineInstr *UseInstr = UI.getParent();
717          for (MachineBasicBlock::instr_iterator
718                   MII = UseInstr->getIterator(),
719                   MIE = UseInstr->getParent()->instr_end();
720               MII != MIE; ++MII) {
721            if ((&(*MII)) == DefInstr) {
722              LLVM_DEBUG(dbgs() << "Add LiveOut (Loop): " << printReg(Reg, TRI)
723                                << "\n");
724              addLiveOut(Reg);
725            }
726          }
727        }
728      }
729    }
730  }
731}
732
733void LinearizedRegion::storeLiveOutRegRegion(RegionMRT *Region, unsigned Reg,
734                                             MachineInstr *DefInstr,
735                                             const MachineRegisterInfo *MRI,
736                                             const TargetRegisterInfo *TRI,
737                                             PHILinearize &PHIInfo) {
738  if (Register::isVirtualRegister(Reg)) {
739    LLVM_DEBUG(dbgs() << "Considering Register: " << printReg(Reg, TRI)
740                      << "\n");
741    for (auto &UI : MRI->use_operands(Reg)) {
742      if (!Region->contains(UI.getParent()->getParent())) {
743        LLVM_DEBUG(dbgs() << "Add LiveOut (Region " << (void *)Region
744                          << "): " << printReg(Reg, TRI) << "\n");
745        addLiveOut(Reg);
746      }
747    }
748  }
749}
750
751void LinearizedRegion::storeLiveOuts(MachineBasicBlock *MBB,
752                                     const MachineRegisterInfo *MRI,
753                                     const TargetRegisterInfo *TRI,
754                                     PHILinearize &PHIInfo) {
755  LLVM_DEBUG(dbgs() << "-Store Live Outs Begin (" << printMBBReference(*MBB)
756                    << ")-\n");
757  for (auto &II : *MBB) {
758    for (auto &RI : II.defs()) {
759      storeLiveOutReg(MBB, RI.getReg(), RI.getParent(), MRI, TRI, PHIInfo);
760    }
761    for (auto &IRI : II.implicit_operands()) {
762      if (IRI.isDef()) {
763        storeLiveOutReg(MBB, IRI.getReg(), IRI.getParent(), MRI, TRI, PHIInfo);
764      }
765    }
766  }
767
768  // If we have a successor with a PHI, source coming from this MBB we have to
769  // add the register as live out
770  for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
771                                        E = MBB->succ_end();
772       SI != E; ++SI) {
773    for (auto &II : *(*SI)) {
774      if (II.isPHI()) {
775        MachineInstr &PHI = II;
776        int numPreds = getPHINumInputs(PHI);
777        for (int i = 0; i < numPreds; ++i) {
778          if (getPHIPred(PHI, i) == MBB) {
779            unsigned PHIReg = getPHISourceReg(PHI, i);
780            LLVM_DEBUG(dbgs()
781                       << "Add LiveOut (PhiSource " << printMBBReference(*MBB)
782                       << " -> " << printMBBReference(*(*SI))
783                       << "): " << printReg(PHIReg, TRI) << "\n");
784            addLiveOut(PHIReg);
785          }
786        }
787      }
788    }
789  }
790
791  LLVM_DEBUG(dbgs() << "-Store Live Outs Endn-\n");
792}
793
794void LinearizedRegion::storeMBBLiveOuts(MachineBasicBlock *MBB,
795                                        const MachineRegisterInfo *MRI,
796                                        const TargetRegisterInfo *TRI,
797                                        PHILinearize &PHIInfo,
798                                        RegionMRT *TopRegion) {
799  for (auto &II : *MBB) {
800    for (auto &RI : II.defs()) {
801      storeLiveOutRegRegion(TopRegion, RI.getReg(), RI.getParent(), MRI, TRI,
802                            PHIInfo);
803    }
804    for (auto &IRI : II.implicit_operands()) {
805      if (IRI.isDef()) {
806        storeLiveOutRegRegion(TopRegion, IRI.getReg(), IRI.getParent(), MRI,
807                              TRI, PHIInfo);
808      }
809    }
810  }
811}
812
813void LinearizedRegion::storeLiveOuts(RegionMRT *Region,
814                                     const MachineRegisterInfo *MRI,
815                                     const TargetRegisterInfo *TRI,
816                                     PHILinearize &PHIInfo,
817                                     RegionMRT *CurrentTopRegion) {
818  MachineBasicBlock *Exit = Region->getSucc();
819
820  RegionMRT *TopRegion =
821      CurrentTopRegion == nullptr ? Region : CurrentTopRegion;
822
823  // Check if exit is end of function, if so, no live outs.
824  if (Exit == nullptr)
825    return;
826
827  auto Children = Region->getChildren();
828  for (auto CI : *Children) {
829    if (CI->isMBB()) {
830      auto MBB = CI->getMBBMRT()->getMBB();
831      storeMBBLiveOuts(MBB, MRI, TRI, PHIInfo, TopRegion);
832    } else {
833      LinearizedRegion *SubRegion = CI->getRegionMRT()->getLinearizedRegion();
834      // We should be limited to only store registers that are live out from the
835      // lineaized region
836      for (auto MBBI : SubRegion->MBBs) {
837        storeMBBLiveOuts(MBBI, MRI, TRI, PHIInfo, TopRegion);
838      }
839    }
840  }
841
842  if (CurrentTopRegion == nullptr) {
843    auto Succ = Region->getSucc();
844    for (auto &II : *Succ) {
845      if (II.isPHI()) {
846        MachineInstr &PHI = II;
847        int numPreds = getPHINumInputs(PHI);
848        for (int i = 0; i < numPreds; ++i) {
849          if (Region->contains(getPHIPred(PHI, i))) {
850            unsigned PHIReg = getPHISourceReg(PHI, i);
851            LLVM_DEBUG(dbgs() << "Add Region LiveOut (" << (void *)Region
852                              << "): " << printReg(PHIReg, TRI) << "\n");
853            addLiveOut(PHIReg);
854          }
855        }
856      }
857    }
858  }
859}
860
861#ifndef NDEBUG
862void LinearizedRegion::print(raw_ostream &OS, const TargetRegisterInfo *TRI) {
863  OS << "Linearized Region {";
864  bool IsFirst = true;
865  for (auto MBB : MBBs) {
866    if (IsFirst) {
867      IsFirst = false;
868    } else {
869      OS << " ,";
870    }
871    OS << MBB->getNumber();
872  }
873  OS << "} (" << Entry->getNumber() << ", "
874     << (Exit == nullptr ? -1 : Exit->getNumber())
875     << "): In:" << printReg(getBBSelectRegIn(), TRI)
876     << " Out:" << printReg(getBBSelectRegOut(), TRI) << " {";
877  for (auto &LI : LiveOuts) {
878    OS << printReg(LI, TRI) << " ";
879  }
880  OS << "} \n";
881}
882#endif
883
884unsigned LinearizedRegion::getBBSelectRegIn() {
885  return getRegionMRT()->getBBSelectRegIn();
886}
887
888unsigned LinearizedRegion::getBBSelectRegOut() {
889  return getRegionMRT()->getBBSelectRegOut();
890}
891
892void LinearizedRegion::setHasLoop(bool Value) { HasLoop = Value; }
893
894bool LinearizedRegion::getHasLoop() { return HasLoop; }
895
896void LinearizedRegion::addLiveOut(unsigned VReg) { LiveOuts.insert(VReg); }
897
898void LinearizedRegion::removeLiveOut(unsigned Reg) {
899  if (isLiveOut(Reg))
900    LiveOuts.erase(Reg);
901}
902
903void LinearizedRegion::replaceLiveOut(unsigned OldReg, unsigned NewReg) {
904  if (isLiveOut(OldReg)) {
905    removeLiveOut(OldReg);
906    addLiveOut(NewReg);
907  }
908}
909
910void LinearizedRegion::replaceRegister(unsigned Register, unsigned NewRegister,
911                                       MachineRegisterInfo *MRI,
912                                       bool ReplaceInside, bool ReplaceOutside,
913                                       bool IncludeLoopPHI) {
914  assert(Register != NewRegister && "Cannot replace a reg with itself");
915
916  LLVM_DEBUG(
917      dbgs() << "Pepareing to replace register (region): "
918             << printReg(Register, MRI->getTargetRegisterInfo()) << " with "
919             << printReg(NewRegister, MRI->getTargetRegisterInfo()) << "\n");
920
921  // If we are replacing outside, we also need to update the LiveOuts
922  if (ReplaceOutside &&
923      (isLiveOut(Register) || this->getParent()->isLiveOut(Register))) {
924    LinearizedRegion *Current = this;
925    while (Current != nullptr && Current->getEntry() != nullptr) {
926      LLVM_DEBUG(dbgs() << "Region before register replace\n");
927      LLVM_DEBUG(Current->print(dbgs(), MRI->getTargetRegisterInfo()));
928      Current->replaceLiveOut(Register, NewRegister);
929      LLVM_DEBUG(dbgs() << "Region after register replace\n");
930      LLVM_DEBUG(Current->print(dbgs(), MRI->getTargetRegisterInfo()));
931      Current = Current->getParent();
932    }
933  }
934
935  for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(Register),
936                                         E = MRI->reg_end();
937       I != E;) {
938    MachineOperand &O = *I;
939    ++I;
940
941    // We don't rewrite defs.
942    if (O.isDef())
943      continue;
944
945    bool IsInside = contains(O.getParent()->getParent());
946    bool IsLoopPHI = IsInside && (O.getParent()->isPHI() &&
947                                  O.getParent()->getParent() == getEntry());
948    bool ShouldReplace = (IsInside && ReplaceInside) ||
949                         (!IsInside && ReplaceOutside) ||
950                         (IncludeLoopPHI && IsLoopPHI);
951    if (ShouldReplace) {
952
953      if (Register::isPhysicalRegister(NewRegister)) {
954        LLVM_DEBUG(dbgs() << "Trying to substitute physical register: "
955                          << printReg(NewRegister, MRI->getTargetRegisterInfo())
956                          << "\n");
957        llvm_unreachable("Cannot substitute physical registers");
958      } else {
959        LLVM_DEBUG(dbgs() << "Replacing register (region): "
960                          << printReg(Register, MRI->getTargetRegisterInfo())
961                          << " with "
962                          << printReg(NewRegister, MRI->getTargetRegisterInfo())
963                          << "\n");
964        O.setReg(NewRegister);
965      }
966    }
967  }
968}
969
970void LinearizedRegion::replaceRegisterInsideRegion(unsigned Register,
971                                                   unsigned NewRegister,
972                                                   bool IncludeLoopPHIs,
973                                                   MachineRegisterInfo *MRI) {
974  replaceRegister(Register, NewRegister, MRI, true, false, IncludeLoopPHIs);
975}
976
977void LinearizedRegion::replaceRegisterOutsideRegion(unsigned Register,
978                                                    unsigned NewRegister,
979                                                    bool IncludeLoopPHIs,
980                                                    MachineRegisterInfo *MRI) {
981  replaceRegister(Register, NewRegister, MRI, false, true, IncludeLoopPHIs);
982}
983
984DenseSet<unsigned> *LinearizedRegion::getLiveOuts() { return &LiveOuts; }
985
986void LinearizedRegion::setEntry(MachineBasicBlock *NewEntry) {
987  Entry = NewEntry;
988}
989
990MachineBasicBlock *LinearizedRegion::getEntry() { return Entry; }
991
992void LinearizedRegion::setExit(MachineBasicBlock *NewExit) { Exit = NewExit; }
993
994MachineBasicBlock *LinearizedRegion::getExit() { return Exit; }
995
996void LinearizedRegion::addMBB(MachineBasicBlock *MBB) { MBBs.insert(MBB); }
997
998void LinearizedRegion::addMBBs(LinearizedRegion *InnerRegion) {
999  for (auto MBB : InnerRegion->MBBs) {
1000    addMBB(MBB);
1001  }
1002}
1003
1004bool LinearizedRegion::contains(MachineBasicBlock *MBB) {
1005  return MBBs.count(MBB) == 1;
1006}
1007
1008bool LinearizedRegion::isLiveOut(unsigned Reg) {
1009  return LiveOuts.count(Reg) == 1;
1010}
1011
1012bool LinearizedRegion::hasNoDef(unsigned Reg, MachineRegisterInfo *MRI) {
1013  return MRI->def_begin(Reg) == MRI->def_end();
1014}
1015
1016// After the code has been structurized, what was flagged as kills
1017// before are no longer register kills.
1018void LinearizedRegion::removeFalseRegisterKills(MachineRegisterInfo *MRI) {
1019  const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
1020  (void)TRI; // It's used by LLVM_DEBUG.
1021
1022  for (auto MBBI : MBBs) {
1023    MachineBasicBlock *MBB = MBBI;
1024    for (auto &II : *MBB) {
1025      for (auto &RI : II.uses()) {
1026        if (RI.isReg()) {
1027          Register Reg = RI.getReg();
1028          if (Register::isVirtualRegister(Reg)) {
1029            if (hasNoDef(Reg, MRI))
1030              continue;
1031            if (!MRI->hasOneDef(Reg)) {
1032              LLVM_DEBUG(this->getEntry()->getParent()->dump());
1033              LLVM_DEBUG(dbgs() << printReg(Reg, TRI) << "\n");
1034            }
1035
1036            if (MRI->def_begin(Reg) == MRI->def_end()) {
1037              LLVM_DEBUG(dbgs() << "Register "
1038                                << printReg(Reg, MRI->getTargetRegisterInfo())
1039                                << " has NO defs\n");
1040            } else if (!MRI->hasOneDef(Reg)) {
1041              LLVM_DEBUG(dbgs() << "Register "
1042                                << printReg(Reg, MRI->getTargetRegisterInfo())
1043                                << " has multiple defs\n");
1044            }
1045
1046            assert(MRI->hasOneDef(Reg) && "Register has multiple definitions");
1047            MachineOperand *Def = &(*(MRI->def_begin(Reg)));
1048            MachineOperand *UseOperand = &(RI);
1049            bool UseIsOutsideDefMBB = Def->getParent()->getParent() != MBB;
1050            if (UseIsOutsideDefMBB && UseOperand->isKill()) {
1051              LLVM_DEBUG(dbgs() << "Removing kill flag on register: "
1052                                << printReg(Reg, TRI) << "\n");
1053              UseOperand->setIsKill(false);
1054            }
1055          }
1056        }
1057      }
1058    }
1059  }
1060}
1061
1062void LinearizedRegion::initLiveOut(RegionMRT *Region,
1063                                   const MachineRegisterInfo *MRI,
1064                                   const TargetRegisterInfo *TRI,
1065                                   PHILinearize &PHIInfo) {
1066  storeLiveOuts(Region, MRI, TRI, PHIInfo);
1067}
1068
1069LinearizedRegion::LinearizedRegion(MachineBasicBlock *MBB,
1070                                   const MachineRegisterInfo *MRI,
1071                                   const TargetRegisterInfo *TRI,
1072                                   PHILinearize &PHIInfo) {
1073  setEntry(MBB);
1074  setExit(MBB);
1075  storeLiveOuts(MBB, MRI, TRI, PHIInfo);
1076  MBBs.insert(MBB);
1077  Parent = nullptr;
1078}
1079
1080LinearizedRegion::LinearizedRegion() {
1081  setEntry(nullptr);
1082  setExit(nullptr);
1083  Parent = nullptr;
1084}
1085
1086namespace {
1087
1088class AMDGPUMachineCFGStructurizer : public MachineFunctionPass {
1089private:
1090  const MachineRegionInfo *Regions;
1091  const SIInstrInfo *TII;
1092  const TargetRegisterInfo *TRI;
1093  MachineRegisterInfo *MRI;
1094  unsigned BBSelectRegister;
1095  PHILinearize PHIInfo;
1096  DenseMap<MachineBasicBlock *, MachineBasicBlock *> FallthroughMap;
1097  RegionMRT *RMRT;
1098
1099  void getPHIRegionIndices(RegionMRT *Region, MachineInstr &PHI,
1100                           SmallVector<unsigned, 2> &RegionIndices);
1101  void getPHIRegionIndices(LinearizedRegion *Region, MachineInstr &PHI,
1102                           SmallVector<unsigned, 2> &RegionIndices);
1103  void getPHINonRegionIndices(LinearizedRegion *Region, MachineInstr &PHI,
1104                              SmallVector<unsigned, 2> &PHINonRegionIndices);
1105
1106  void storePHILinearizationInfoDest(
1107      unsigned LDestReg, MachineInstr &PHI,
1108      SmallVector<unsigned, 2> *RegionIndices = nullptr);
1109
1110  unsigned storePHILinearizationInfo(MachineInstr &PHI,
1111                                     SmallVector<unsigned, 2> *RegionIndices);
1112
1113  void extractKilledPHIs(MachineBasicBlock *MBB);
1114
1115  bool shrinkPHI(MachineInstr &PHI, SmallVector<unsigned, 2> &PHIIndices,
1116                 unsigned *ReplaceReg);
1117
1118  bool shrinkPHI(MachineInstr &PHI, unsigned CombinedSourceReg,
1119                 MachineBasicBlock *SourceMBB,
1120                 SmallVector<unsigned, 2> &PHIIndices, unsigned *ReplaceReg);
1121
1122  void replacePHI(MachineInstr &PHI, unsigned CombinedSourceReg,
1123                  MachineBasicBlock *LastMerge,
1124                  SmallVector<unsigned, 2> &PHIRegionIndices);
1125  void replaceEntryPHI(MachineInstr &PHI, unsigned CombinedSourceReg,
1126                       MachineBasicBlock *IfMBB,
1127                       SmallVector<unsigned, 2> &PHIRegionIndices);
1128  void replaceLiveOutRegs(MachineInstr &PHI,
1129                          SmallVector<unsigned, 2> &PHIRegionIndices,
1130                          unsigned CombinedSourceReg,
1131                          LinearizedRegion *LRegion);
1132  void rewriteRegionExitPHI(RegionMRT *Region, MachineBasicBlock *LastMerge,
1133                            MachineInstr &PHI, LinearizedRegion *LRegion);
1134
1135  void rewriteRegionExitPHIs(RegionMRT *Region, MachineBasicBlock *LastMerge,
1136                             LinearizedRegion *LRegion);
1137  void rewriteRegionEntryPHI(LinearizedRegion *Region, MachineBasicBlock *IfMBB,
1138                             MachineInstr &PHI);
1139  void rewriteRegionEntryPHIs(LinearizedRegion *Region,
1140                              MachineBasicBlock *IfMBB);
1141
1142  bool regionIsSimpleIf(RegionMRT *Region);
1143
1144  void transformSimpleIfRegion(RegionMRT *Region);
1145
1146  void eliminateDeadBranchOperands(MachineBasicBlock::instr_iterator &II);
1147
1148  void insertUnconditionalBranch(MachineBasicBlock *MBB,
1149                                 MachineBasicBlock *Dest,
1150                                 const DebugLoc &DL = DebugLoc());
1151
1152  MachineBasicBlock *createLinearizedExitBlock(RegionMRT *Region);
1153
1154  void insertMergePHI(MachineBasicBlock *IfBB, MachineBasicBlock *CodeBB,
1155                      MachineBasicBlock *MergeBB, unsigned DestRegister,
1156                      unsigned IfSourceRegister, unsigned CodeSourceRegister,
1157                      bool IsUndefIfSource = false);
1158
1159  MachineBasicBlock *createIfBlock(MachineBasicBlock *MergeBB,
1160                                   MachineBasicBlock *CodeBBStart,
1161                                   MachineBasicBlock *CodeBBEnd,
1162                                   MachineBasicBlock *SelectBB, unsigned IfReg,
1163                                   bool InheritPreds);
1164
1165  void prunePHIInfo(MachineBasicBlock *MBB);
1166  void createEntryPHI(LinearizedRegion *CurrentRegion, unsigned DestReg);
1167
1168  void createEntryPHIs(LinearizedRegion *CurrentRegion);
1169  void resolvePHIInfos(MachineBasicBlock *FunctionEntry);
1170
1171  void replaceRegisterWith(unsigned Register, unsigned NewRegister);
1172
1173  MachineBasicBlock *createIfRegion(MachineBasicBlock *MergeBB,
1174                                    MachineBasicBlock *CodeBB,
1175                                    LinearizedRegion *LRegion,
1176                                    unsigned BBSelectRegIn,
1177                                    unsigned BBSelectRegOut);
1178
1179  MachineBasicBlock *
1180  createIfRegion(MachineBasicBlock *MergeMBB, LinearizedRegion *InnerRegion,
1181                 LinearizedRegion *CurrentRegion, MachineBasicBlock *SelectBB,
1182                 unsigned BBSelectRegIn, unsigned BBSelectRegOut);
1183  void ensureCondIsNotKilled(SmallVector<MachineOperand, 1> Cond);
1184
1185  void rewriteCodeBBTerminator(MachineBasicBlock *CodeBB,
1186                               MachineBasicBlock *MergeBB,
1187                               unsigned BBSelectReg);
1188
1189  MachineInstr *getDefInstr(unsigned Reg);
1190  void insertChainedPHI(MachineBasicBlock *IfBB, MachineBasicBlock *CodeBB,
1191                        MachineBasicBlock *MergeBB,
1192                        LinearizedRegion *InnerRegion, unsigned DestReg,
1193                        unsigned SourceReg);
1194  bool containsDef(MachineBasicBlock *MBB, LinearizedRegion *InnerRegion,
1195                   unsigned Register);
1196  void rewriteLiveOutRegs(MachineBasicBlock *IfBB, MachineBasicBlock *CodeBB,
1197                          MachineBasicBlock *MergeBB,
1198                          LinearizedRegion *InnerRegion,
1199                          LinearizedRegion *LRegion);
1200
1201  void splitLoopPHI(MachineInstr &PHI, MachineBasicBlock *Entry,
1202                    MachineBasicBlock *EntrySucc, LinearizedRegion *LRegion);
1203  void splitLoopPHIs(MachineBasicBlock *Entry, MachineBasicBlock *EntrySucc,
1204                     LinearizedRegion *LRegion);
1205
1206  MachineBasicBlock *splitExit(LinearizedRegion *LRegion);
1207
1208  MachineBasicBlock *splitEntry(LinearizedRegion *LRegion);
1209
1210  LinearizedRegion *initLinearizedRegion(RegionMRT *Region);
1211
1212  bool structurizeComplexRegion(RegionMRT *Region);
1213
1214  bool structurizeRegion(RegionMRT *Region);
1215
1216  bool structurizeRegions(RegionMRT *Region, bool isTopRegion);
1217
1218public:
1219  static char ID;
1220
1221  AMDGPUMachineCFGStructurizer() : MachineFunctionPass(ID) {
1222    initializeAMDGPUMachineCFGStructurizerPass(*PassRegistry::getPassRegistry());
1223  }
1224
1225  void getAnalysisUsage(AnalysisUsage &AU) const override {
1226    AU.addRequired<MachineRegionInfoPass>();
1227    MachineFunctionPass::getAnalysisUsage(AU);
1228  }
1229
1230  void initFallthroughMap(MachineFunction &MF);
1231
1232  void createLinearizedRegion(RegionMRT *Region, unsigned SelectOut);
1233
1234  unsigned initializeSelectRegisters(MRT *MRT, unsigned ExistingExitReg,
1235                                     MachineRegisterInfo *MRI,
1236                                     const SIInstrInfo *TII);
1237
1238  void setRegionMRT(RegionMRT *RegionTree) { RMRT = RegionTree; }
1239
1240  RegionMRT *getRegionMRT() { return RMRT; }
1241
1242  bool runOnMachineFunction(MachineFunction &MF) override;
1243};
1244
1245} // end anonymous namespace
1246
1247char AMDGPUMachineCFGStructurizer::ID = 0;
1248
1249bool AMDGPUMachineCFGStructurizer::regionIsSimpleIf(RegionMRT *Region) {
1250  MachineBasicBlock *Entry = Region->getEntry();
1251  MachineBasicBlock *Succ = Region->getSucc();
1252  bool FoundBypass = false;
1253  bool FoundIf = false;
1254
1255  if (Entry->succ_size() != 2) {
1256    return false;
1257  }
1258
1259  for (MachineBasicBlock::const_succ_iterator SI = Entry->succ_begin(),
1260                                              E = Entry->succ_end();
1261       SI != E; ++SI) {
1262    MachineBasicBlock *Current = *SI;
1263
1264    if (Current == Succ) {
1265      FoundBypass = true;
1266    } else if ((Current->succ_size() == 1) &&
1267               *(Current->succ_begin()) == Succ) {
1268      FoundIf = true;
1269    }
1270  }
1271
1272  return FoundIf && FoundBypass;
1273}
1274
1275void AMDGPUMachineCFGStructurizer::transformSimpleIfRegion(RegionMRT *Region) {
1276  MachineBasicBlock *Entry = Region->getEntry();
1277  MachineBasicBlock *Exit = Region->getExit();
1278  TII->convertNonUniformIfRegion(Entry, Exit);
1279}
1280
1281static void fixMBBTerminator(MachineBasicBlock *MBB) {
1282  if (MBB->succ_size() == 1) {
1283    auto *Succ = *(MBB->succ_begin());
1284    for (auto &TI : MBB->terminators()) {
1285      for (auto &UI : TI.uses()) {
1286        if (UI.isMBB() && UI.getMBB() != Succ) {
1287          UI.setMBB(Succ);
1288        }
1289      }
1290    }
1291  }
1292}
1293
1294static void fixRegionTerminator(RegionMRT *Region) {
1295  MachineBasicBlock *InternalSucc = nullptr;
1296  MachineBasicBlock *ExternalSucc = nullptr;
1297  LinearizedRegion *LRegion = Region->getLinearizedRegion();
1298  auto Exit = LRegion->getExit();
1299
1300  SmallPtrSet<MachineBasicBlock *, 2> Successors;
1301  for (MachineBasicBlock::const_succ_iterator SI = Exit->succ_begin(),
1302                                              SE = Exit->succ_end();
1303       SI != SE; ++SI) {
1304    MachineBasicBlock *Succ = *SI;
1305    if (LRegion->contains(Succ)) {
1306      // Do not allow re-assign
1307      assert(InternalSucc == nullptr);
1308      InternalSucc = Succ;
1309    } else {
1310      // Do not allow re-assign
1311      assert(ExternalSucc == nullptr);
1312      ExternalSucc = Succ;
1313    }
1314  }
1315
1316  for (auto &TI : Exit->terminators()) {
1317    for (auto &UI : TI.uses()) {
1318      if (UI.isMBB()) {
1319        auto Target = UI.getMBB();
1320        if (Target != InternalSucc && Target != ExternalSucc) {
1321          UI.setMBB(ExternalSucc);
1322        }
1323      }
1324    }
1325  }
1326}
1327
1328// If a region region is just a sequence of regions (and the exit
1329// block in the case of the top level region), we can simply skip
1330// linearizing it, because it is already linear
1331bool regionIsSequence(RegionMRT *Region) {
1332  auto Children = Region->getChildren();
1333  for (auto CI : *Children) {
1334    if (!CI->isRegion()) {
1335      if (CI->getMBBMRT()->getMBB()->succ_size() > 1) {
1336        return false;
1337      }
1338    }
1339  }
1340  return true;
1341}
1342
1343void fixupRegionExits(RegionMRT *Region) {
1344  auto Children = Region->getChildren();
1345  for (auto CI : *Children) {
1346    if (!CI->isRegion()) {
1347      fixMBBTerminator(CI->getMBBMRT()->getMBB());
1348    } else {
1349      fixRegionTerminator(CI->getRegionMRT());
1350    }
1351  }
1352}
1353
1354void AMDGPUMachineCFGStructurizer::getPHIRegionIndices(
1355    RegionMRT *Region, MachineInstr &PHI,
1356    SmallVector<unsigned, 2> &PHIRegionIndices) {
1357  unsigned NumInputs = getPHINumInputs(PHI);
1358  for (unsigned i = 0; i < NumInputs; ++i) {
1359    MachineBasicBlock *Pred = getPHIPred(PHI, i);
1360    if (Region->contains(Pred)) {
1361      PHIRegionIndices.push_back(i);
1362    }
1363  }
1364}
1365
1366void AMDGPUMachineCFGStructurizer::getPHIRegionIndices(
1367    LinearizedRegion *Region, MachineInstr &PHI,
1368    SmallVector<unsigned, 2> &PHIRegionIndices) {
1369  unsigned NumInputs = getPHINumInputs(PHI);
1370  for (unsigned i = 0; i < NumInputs; ++i) {
1371    MachineBasicBlock *Pred = getPHIPred(PHI, i);
1372    if (Region->contains(Pred)) {
1373      PHIRegionIndices.push_back(i);
1374    }
1375  }
1376}
1377
1378void AMDGPUMachineCFGStructurizer::getPHINonRegionIndices(
1379    LinearizedRegion *Region, MachineInstr &PHI,
1380    SmallVector<unsigned, 2> &PHINonRegionIndices) {
1381  unsigned NumInputs = getPHINumInputs(PHI);
1382  for (unsigned i = 0; i < NumInputs; ++i) {
1383    MachineBasicBlock *Pred = getPHIPred(PHI, i);
1384    if (!Region->contains(Pred)) {
1385      PHINonRegionIndices.push_back(i);
1386    }
1387  }
1388}
1389
1390void AMDGPUMachineCFGStructurizer::storePHILinearizationInfoDest(
1391    unsigned LDestReg, MachineInstr &PHI,
1392    SmallVector<unsigned, 2> *RegionIndices) {
1393  if (RegionIndices) {
1394    for (auto i : *RegionIndices) {
1395      PHIInfo.addSource(LDestReg, getPHISourceReg(PHI, i), getPHIPred(PHI, i));
1396    }
1397  } else {
1398    unsigned NumInputs = getPHINumInputs(PHI);
1399    for (unsigned i = 0; i < NumInputs; ++i) {
1400      PHIInfo.addSource(LDestReg, getPHISourceReg(PHI, i), getPHIPred(PHI, i));
1401    }
1402  }
1403}
1404
1405unsigned AMDGPUMachineCFGStructurizer::storePHILinearizationInfo(
1406    MachineInstr &PHI, SmallVector<unsigned, 2> *RegionIndices) {
1407  unsigned DestReg = getPHIDestReg(PHI);
1408  Register LinearizeDestReg =
1409      MRI->createVirtualRegister(MRI->getRegClass(DestReg));
1410  PHIInfo.addDest(LinearizeDestReg, PHI.getDebugLoc());
1411  storePHILinearizationInfoDest(LinearizeDestReg, PHI, RegionIndices);
1412  return LinearizeDestReg;
1413}
1414
1415void AMDGPUMachineCFGStructurizer::extractKilledPHIs(MachineBasicBlock *MBB) {
1416  // We need to create a new chain for the killed phi, but there is no
1417  // need to do the renaming outside or inside the block.
1418  SmallPtrSet<MachineInstr *, 2> PHIs;
1419  for (MachineBasicBlock::instr_iterator I = MBB->instr_begin(),
1420                                         E = MBB->instr_end();
1421       I != E; ++I) {
1422    MachineInstr &Instr = *I;
1423    if (Instr.isPHI()) {
1424      unsigned PHIDestReg = getPHIDestReg(Instr);
1425      LLVM_DEBUG(dbgs() << "Extractking killed phi:\n");
1426      LLVM_DEBUG(Instr.dump());
1427      PHIs.insert(&Instr);
1428      PHIInfo.addDest(PHIDestReg, Instr.getDebugLoc());
1429      storePHILinearizationInfoDest(PHIDestReg, Instr);
1430    }
1431  }
1432
1433  for (auto PI : PHIs) {
1434    PI->eraseFromParent();
1435  }
1436}
1437
1438static bool isPHIRegionIndex(SmallVector<unsigned, 2> PHIRegionIndices,
1439                             unsigned Index) {
1440  for (auto i : PHIRegionIndices) {
1441    if (i == Index)
1442      return true;
1443  }
1444  return false;
1445}
1446
1447bool AMDGPUMachineCFGStructurizer::shrinkPHI(MachineInstr &PHI,
1448                                       SmallVector<unsigned, 2> &PHIIndices,
1449                                       unsigned *ReplaceReg) {
1450  return shrinkPHI(PHI, 0, nullptr, PHIIndices, ReplaceReg);
1451}
1452
1453bool AMDGPUMachineCFGStructurizer::shrinkPHI(MachineInstr &PHI,
1454                                       unsigned CombinedSourceReg,
1455                                       MachineBasicBlock *SourceMBB,
1456                                       SmallVector<unsigned, 2> &PHIIndices,
1457                                       unsigned *ReplaceReg) {
1458  LLVM_DEBUG(dbgs() << "Shrink PHI: ");
1459  LLVM_DEBUG(PHI.dump());
1460  LLVM_DEBUG(dbgs() << " to " << printReg(getPHIDestReg(PHI), TRI)
1461                    << " = PHI(");
1462
1463  bool Replaced = false;
1464  unsigned NumInputs = getPHINumInputs(PHI);
1465  int SingleExternalEntryIndex = -1;
1466  for (unsigned i = 0; i < NumInputs; ++i) {
1467    if (!isPHIRegionIndex(PHIIndices, i)) {
1468      if (SingleExternalEntryIndex == -1) {
1469        // Single entry
1470        SingleExternalEntryIndex = i;
1471      } else {
1472        // Multiple entries
1473        SingleExternalEntryIndex = -2;
1474      }
1475    }
1476  }
1477
1478  if (SingleExternalEntryIndex > -1) {
1479    *ReplaceReg = getPHISourceReg(PHI, SingleExternalEntryIndex);
1480    // We should not rewrite the code, we should only pick up the single value
1481    // that represents the shrunk PHI.
1482    Replaced = true;
1483  } else {
1484    MachineBasicBlock *MBB = PHI.getParent();
1485    MachineInstrBuilder MIB =
1486        BuildMI(*MBB, PHI, PHI.getDebugLoc(), TII->get(TargetOpcode::PHI),
1487                getPHIDestReg(PHI));
1488    if (SourceMBB) {
1489      MIB.addReg(CombinedSourceReg);
1490      MIB.addMBB(SourceMBB);
1491      LLVM_DEBUG(dbgs() << printReg(CombinedSourceReg, TRI) << ", "
1492                        << printMBBReference(*SourceMBB));
1493    }
1494
1495    for (unsigned i = 0; i < NumInputs; ++i) {
1496      if (isPHIRegionIndex(PHIIndices, i)) {
1497        continue;
1498      }
1499      unsigned SourceReg = getPHISourceReg(PHI, i);
1500      MachineBasicBlock *SourcePred = getPHIPred(PHI, i);
1501      MIB.addReg(SourceReg);
1502      MIB.addMBB(SourcePred);
1503      LLVM_DEBUG(dbgs() << printReg(SourceReg, TRI) << ", "
1504                        << printMBBReference(*SourcePred));
1505    }
1506    LLVM_DEBUG(dbgs() << ")\n");
1507  }
1508  PHI.eraseFromParent();
1509  return Replaced;
1510}
1511
1512void AMDGPUMachineCFGStructurizer::replacePHI(
1513    MachineInstr &PHI, unsigned CombinedSourceReg, MachineBasicBlock *LastMerge,
1514    SmallVector<unsigned, 2> &PHIRegionIndices) {
1515  LLVM_DEBUG(dbgs() << "Replace PHI: ");
1516  LLVM_DEBUG(PHI.dump());
1517  LLVM_DEBUG(dbgs() << " with " << printReg(getPHIDestReg(PHI), TRI)
1518                    << " = PHI(");
1519
1520  bool HasExternalEdge = false;
1521  unsigned NumInputs = getPHINumInputs(PHI);
1522  for (unsigned i = 0; i < NumInputs; ++i) {
1523    if (!isPHIRegionIndex(PHIRegionIndices, i)) {
1524      HasExternalEdge = true;
1525    }
1526  }
1527
1528  if (HasExternalEdge) {
1529    MachineBasicBlock *MBB = PHI.getParent();
1530    MachineInstrBuilder MIB =
1531        BuildMI(*MBB, PHI, PHI.getDebugLoc(), TII->get(TargetOpcode::PHI),
1532                getPHIDestReg(PHI));
1533    MIB.addReg(CombinedSourceReg);
1534    MIB.addMBB(LastMerge);
1535    LLVM_DEBUG(dbgs() << printReg(CombinedSourceReg, TRI) << ", "
1536                      << printMBBReference(*LastMerge));
1537    for (unsigned i = 0; i < NumInputs; ++i) {
1538      if (isPHIRegionIndex(PHIRegionIndices, i)) {
1539        continue;
1540      }
1541      unsigned SourceReg = getPHISourceReg(PHI, i);
1542      MachineBasicBlock *SourcePred = getPHIPred(PHI, i);
1543      MIB.addReg(SourceReg);
1544      MIB.addMBB(SourcePred);
1545      LLVM_DEBUG(dbgs() << printReg(SourceReg, TRI) << ", "
1546                        << printMBBReference(*SourcePred));
1547    }
1548    LLVM_DEBUG(dbgs() << ")\n");
1549  } else {
1550    replaceRegisterWith(getPHIDestReg(PHI), CombinedSourceReg);
1551  }
1552  PHI.eraseFromParent();
1553}
1554
1555void AMDGPUMachineCFGStructurizer::replaceEntryPHI(
1556    MachineInstr &PHI, unsigned CombinedSourceReg, MachineBasicBlock *IfMBB,
1557    SmallVector<unsigned, 2> &PHIRegionIndices) {
1558  LLVM_DEBUG(dbgs() << "Replace entry PHI: ");
1559  LLVM_DEBUG(PHI.dump());
1560  LLVM_DEBUG(dbgs() << " with ");
1561
1562  unsigned NumInputs = getPHINumInputs(PHI);
1563  unsigned NumNonRegionInputs = NumInputs;
1564  for (unsigned i = 0; i < NumInputs; ++i) {
1565    if (isPHIRegionIndex(PHIRegionIndices, i)) {
1566      NumNonRegionInputs--;
1567    }
1568  }
1569
1570  if (NumNonRegionInputs == 0) {
1571    auto DestReg = getPHIDestReg(PHI);
1572    replaceRegisterWith(DestReg, CombinedSourceReg);
1573    LLVM_DEBUG(dbgs() << " register " << printReg(CombinedSourceReg, TRI)
1574                      << "\n");
1575    PHI.eraseFromParent();
1576  } else {
1577    LLVM_DEBUG(dbgs() << printReg(getPHIDestReg(PHI), TRI) << " = PHI(");
1578    MachineBasicBlock *MBB = PHI.getParent();
1579    MachineInstrBuilder MIB =
1580        BuildMI(*MBB, PHI, PHI.getDebugLoc(), TII->get(TargetOpcode::PHI),
1581                getPHIDestReg(PHI));
1582    MIB.addReg(CombinedSourceReg);
1583    MIB.addMBB(IfMBB);
1584    LLVM_DEBUG(dbgs() << printReg(CombinedSourceReg, TRI) << ", "
1585                      << printMBBReference(*IfMBB));
1586    unsigned NumInputs = getPHINumInputs(PHI);
1587    for (unsigned i = 0; i < NumInputs; ++i) {
1588      if (isPHIRegionIndex(PHIRegionIndices, i)) {
1589        continue;
1590      }
1591      unsigned SourceReg = getPHISourceReg(PHI, i);
1592      MachineBasicBlock *SourcePred = getPHIPred(PHI, i);
1593      MIB.addReg(SourceReg);
1594      MIB.addMBB(SourcePred);
1595      LLVM_DEBUG(dbgs() << printReg(SourceReg, TRI) << ", "
1596                        << printMBBReference(*SourcePred));
1597    }
1598    LLVM_DEBUG(dbgs() << ")\n");
1599    PHI.eraseFromParent();
1600  }
1601}
1602
1603void AMDGPUMachineCFGStructurizer::replaceLiveOutRegs(
1604    MachineInstr &PHI, SmallVector<unsigned, 2> &PHIRegionIndices,
1605    unsigned CombinedSourceReg, LinearizedRegion *LRegion) {
1606  bool WasLiveOut = false;
1607  for (auto PII : PHIRegionIndices) {
1608    unsigned Reg = getPHISourceReg(PHI, PII);
1609    if (LRegion->isLiveOut(Reg)) {
1610      bool IsDead = true;
1611
1612      // Check if register is live out of the basic block
1613      MachineBasicBlock *DefMBB = getDefInstr(Reg)->getParent();
1614      for (auto UI = MRI->use_begin(Reg), E = MRI->use_end(); UI != E; ++UI) {
1615        if ((*UI).getParent()->getParent() != DefMBB) {
1616          IsDead = false;
1617        }
1618      }
1619
1620      LLVM_DEBUG(dbgs() << "Register " << printReg(Reg, TRI) << " is "
1621                        << (IsDead ? "dead" : "alive")
1622                        << " after PHI replace\n");
1623      if (IsDead) {
1624        LRegion->removeLiveOut(Reg);
1625      }
1626      WasLiveOut = true;
1627    }
1628  }
1629
1630  if (WasLiveOut)
1631    LRegion->addLiveOut(CombinedSourceReg);
1632}
1633
1634void AMDGPUMachineCFGStructurizer::rewriteRegionExitPHI(RegionMRT *Region,
1635                                                  MachineBasicBlock *LastMerge,
1636                                                  MachineInstr &PHI,
1637                                                  LinearizedRegion *LRegion) {
1638  SmallVector<unsigned, 2> PHIRegionIndices;
1639  getPHIRegionIndices(Region, PHI, PHIRegionIndices);
1640  unsigned LinearizedSourceReg =
1641      storePHILinearizationInfo(PHI, &PHIRegionIndices);
1642
1643  replacePHI(PHI, LinearizedSourceReg, LastMerge, PHIRegionIndices);
1644  replaceLiveOutRegs(PHI, PHIRegionIndices, LinearizedSourceReg, LRegion);
1645}
1646
1647void AMDGPUMachineCFGStructurizer::rewriteRegionEntryPHI(LinearizedRegion *Region,
1648                                                   MachineBasicBlock *IfMBB,
1649                                                   MachineInstr &PHI) {
1650  SmallVector<unsigned, 2> PHINonRegionIndices;
1651  getPHINonRegionIndices(Region, PHI, PHINonRegionIndices);
1652  unsigned LinearizedSourceReg =
1653      storePHILinearizationInfo(PHI, &PHINonRegionIndices);
1654  replaceEntryPHI(PHI, LinearizedSourceReg, IfMBB, PHINonRegionIndices);
1655}
1656
1657static void collectPHIs(MachineBasicBlock *MBB,
1658                        SmallVector<MachineInstr *, 2> &PHIs) {
1659  for (auto &BBI : *MBB) {
1660    if (BBI.isPHI()) {
1661      PHIs.push_back(&BBI);
1662    }
1663  }
1664}
1665
1666void AMDGPUMachineCFGStructurizer::rewriteRegionExitPHIs(RegionMRT *Region,
1667                                                   MachineBasicBlock *LastMerge,
1668                                                   LinearizedRegion *LRegion) {
1669  SmallVector<MachineInstr *, 2> PHIs;
1670  auto Exit = Region->getSucc();
1671  if (Exit == nullptr)
1672    return;
1673
1674  collectPHIs(Exit, PHIs);
1675
1676  for (auto PHII : PHIs) {
1677    rewriteRegionExitPHI(Region, LastMerge, *PHII, LRegion);
1678  }
1679}
1680
1681void AMDGPUMachineCFGStructurizer::rewriteRegionEntryPHIs(LinearizedRegion *Region,
1682                                                    MachineBasicBlock *IfMBB) {
1683  SmallVector<MachineInstr *, 2> PHIs;
1684  auto Entry = Region->getEntry();
1685
1686  collectPHIs(Entry, PHIs);
1687
1688  for (auto PHII : PHIs) {
1689    rewriteRegionEntryPHI(Region, IfMBB, *PHII);
1690  }
1691}
1692
1693void AMDGPUMachineCFGStructurizer::insertUnconditionalBranch(MachineBasicBlock *MBB,
1694                                                       MachineBasicBlock *Dest,
1695                                                       const DebugLoc &DL) {
1696  LLVM_DEBUG(dbgs() << "Inserting unconditional branch: " << MBB->getNumber()
1697                    << " -> " << Dest->getNumber() << "\n");
1698  MachineBasicBlock::instr_iterator Terminator = MBB->getFirstInstrTerminator();
1699  bool HasTerminator = Terminator != MBB->instr_end();
1700  if (HasTerminator) {
1701    TII->ReplaceTailWithBranchTo(Terminator, Dest);
1702  }
1703  if (++MachineFunction::iterator(MBB) != MachineFunction::iterator(Dest)) {
1704    TII->insertUnconditionalBranch(*MBB, Dest, DL);
1705  }
1706}
1707
1708static MachineBasicBlock *getSingleExitNode(MachineFunction &MF) {
1709  MachineBasicBlock *result = nullptr;
1710  for (auto &MFI : MF) {
1711    if (MFI.succ_size() == 0) {
1712      if (result == nullptr) {
1713        result = &MFI;
1714      } else {
1715        return nullptr;
1716      }
1717    }
1718  }
1719
1720  return result;
1721}
1722
1723static bool hasOneExitNode(MachineFunction &MF) {
1724  return getSingleExitNode(MF) != nullptr;
1725}
1726
1727MachineBasicBlock *
1728AMDGPUMachineCFGStructurizer::createLinearizedExitBlock(RegionMRT *Region) {
1729  auto Exit = Region->getSucc();
1730
1731  // If the exit is the end of the function, we just use the existing
1732  MachineFunction *MF = Region->getEntry()->getParent();
1733  if (Exit == nullptr && hasOneExitNode(*MF)) {
1734    return &(*(--(Region->getEntry()->getParent()->end())));
1735  }
1736
1737  MachineBasicBlock *LastMerge = MF->CreateMachineBasicBlock();
1738  if (Exit == nullptr) {
1739    MachineFunction::iterator ExitIter = MF->end();
1740    MF->insert(ExitIter, LastMerge);
1741  } else {
1742    MachineFunction::iterator ExitIter = Exit->getIterator();
1743    MF->insert(ExitIter, LastMerge);
1744    LastMerge->addSuccessor(Exit);
1745    insertUnconditionalBranch(LastMerge, Exit);
1746    LLVM_DEBUG(dbgs() << "Created exit block: " << LastMerge->getNumber()
1747                      << "\n");
1748  }
1749  return LastMerge;
1750}
1751
1752void AMDGPUMachineCFGStructurizer::insertMergePHI(MachineBasicBlock *IfBB,
1753                                            MachineBasicBlock *CodeBB,
1754                                            MachineBasicBlock *MergeBB,
1755                                            unsigned DestRegister,
1756                                            unsigned IfSourceRegister,
1757                                            unsigned CodeSourceRegister,
1758                                            bool IsUndefIfSource) {
1759  // If this is the function exit block, we don't need a phi.
1760  if (MergeBB->succ_begin() == MergeBB->succ_end()) {
1761    return;
1762  }
1763  LLVM_DEBUG(dbgs() << "Merge PHI (" << printMBBReference(*MergeBB)
1764                    << "): " << printReg(DestRegister, TRI) << " = PHI("
1765                    << printReg(IfSourceRegister, TRI) << ", "
1766                    << printMBBReference(*IfBB)
1767                    << printReg(CodeSourceRegister, TRI) << ", "
1768                    << printMBBReference(*CodeBB) << ")\n");
1769  const DebugLoc &DL = MergeBB->findDebugLoc(MergeBB->begin());
1770  MachineInstrBuilder MIB = BuildMI(*MergeBB, MergeBB->instr_begin(), DL,
1771                                    TII->get(TargetOpcode::PHI), DestRegister);
1772  if (IsUndefIfSource && false) {
1773    MIB.addReg(IfSourceRegister, RegState::Undef);
1774  } else {
1775    MIB.addReg(IfSourceRegister);
1776  }
1777  MIB.addMBB(IfBB);
1778  MIB.addReg(CodeSourceRegister);
1779  MIB.addMBB(CodeBB);
1780}
1781
1782static void removeExternalCFGSuccessors(MachineBasicBlock *MBB) {
1783  for (MachineBasicBlock::succ_iterator PI = MBB->succ_begin(),
1784                                        E = MBB->succ_end();
1785       PI != E; ++PI) {
1786    if ((*PI) != MBB) {
1787      (MBB)->removeSuccessor(*PI);
1788    }
1789  }
1790}
1791
1792static void removeExternalCFGEdges(MachineBasicBlock *StartMBB,
1793                                   MachineBasicBlock *EndMBB) {
1794
1795  // We have to check against the StartMBB successor becasuse a
1796  // structurized region with a loop will have the entry block split,
1797  // and the backedge will go to the entry successor.
1798  DenseSet<std::pair<MachineBasicBlock *, MachineBasicBlock *>> Succs;
1799  unsigned SuccSize = StartMBB->succ_size();
1800  if (SuccSize > 0) {
1801    MachineBasicBlock *StartMBBSucc = *(StartMBB->succ_begin());
1802    for (MachineBasicBlock::succ_iterator PI = EndMBB->succ_begin(),
1803                                          E = EndMBB->succ_end();
1804         PI != E; ++PI) {
1805      // Either we have a back-edge to the entry block, or a back-edge to the
1806      // successor of the entry block since the block may be split.
1807      if ((*PI) != StartMBB &&
1808          !((*PI) == StartMBBSucc && StartMBB != EndMBB && SuccSize == 1)) {
1809        Succs.insert(
1810            std::pair<MachineBasicBlock *, MachineBasicBlock *>(EndMBB, *PI));
1811      }
1812    }
1813  }
1814
1815  for (MachineBasicBlock::pred_iterator PI = StartMBB->pred_begin(),
1816                                        E = StartMBB->pred_end();
1817       PI != E; ++PI) {
1818    if ((*PI) != EndMBB) {
1819      Succs.insert(
1820          std::pair<MachineBasicBlock *, MachineBasicBlock *>(*PI, StartMBB));
1821    }
1822  }
1823
1824  for (auto SI : Succs) {
1825    std::pair<MachineBasicBlock *, MachineBasicBlock *> Edge = SI;
1826    LLVM_DEBUG(dbgs() << "Removing edge: " << printMBBReference(*Edge.first)
1827                      << " -> " << printMBBReference(*Edge.second) << "\n");
1828    Edge.first->removeSuccessor(Edge.second);
1829  }
1830}
1831
1832MachineBasicBlock *AMDGPUMachineCFGStructurizer::createIfBlock(
1833    MachineBasicBlock *MergeBB, MachineBasicBlock *CodeBBStart,
1834    MachineBasicBlock *CodeBBEnd, MachineBasicBlock *SelectBB, unsigned IfReg,
1835    bool InheritPreds) {
1836  MachineFunction *MF = MergeBB->getParent();
1837  MachineBasicBlock *IfBB = MF->CreateMachineBasicBlock();
1838
1839  if (InheritPreds) {
1840    for (MachineBasicBlock::pred_iterator PI = CodeBBStart->pred_begin(),
1841                                          E = CodeBBStart->pred_end();
1842         PI != E; ++PI) {
1843      if ((*PI) != CodeBBEnd) {
1844        MachineBasicBlock *Pred = (*PI);
1845        Pred->addSuccessor(IfBB);
1846      }
1847    }
1848  }
1849
1850  removeExternalCFGEdges(CodeBBStart, CodeBBEnd);
1851
1852  auto CodeBBStartI = CodeBBStart->getIterator();
1853  auto CodeBBEndI = CodeBBEnd->getIterator();
1854  auto MergeIter = MergeBB->getIterator();
1855  MF->insert(MergeIter, IfBB);
1856  MF->splice(MergeIter, CodeBBStartI, ++CodeBBEndI);
1857  IfBB->addSuccessor(MergeBB);
1858  IfBB->addSuccessor(CodeBBStart);
1859
1860  LLVM_DEBUG(dbgs() << "Created If block: " << IfBB->getNumber() << "\n");
1861  // Ensure that the MergeBB is a successor of the CodeEndBB.
1862  if (!CodeBBEnd->isSuccessor(MergeBB))
1863    CodeBBEnd->addSuccessor(MergeBB);
1864
1865  LLVM_DEBUG(dbgs() << "Moved " << printMBBReference(*CodeBBStart)
1866                    << " through " << printMBBReference(*CodeBBEnd) << "\n");
1867
1868  // If we have a single predecessor we can find a reasonable debug location
1869  MachineBasicBlock *SinglePred =
1870      CodeBBStart->pred_size() == 1 ? *(CodeBBStart->pred_begin()) : nullptr;
1871  const DebugLoc &DL = SinglePred
1872                    ? SinglePred->findDebugLoc(SinglePred->getFirstTerminator())
1873                    : DebugLoc();
1874
1875  unsigned Reg =
1876      TII->insertEQ(IfBB, IfBB->begin(), DL, IfReg,
1877                    SelectBB->getNumber() /* CodeBBStart->getNumber() */);
1878  if (&(*(IfBB->getParent()->begin())) == IfBB) {
1879    TII->materializeImmediate(*IfBB, IfBB->begin(), DL, IfReg,
1880                              CodeBBStart->getNumber());
1881  }
1882  MachineOperand RegOp = MachineOperand::CreateReg(Reg, false, false, true);
1883  ArrayRef<MachineOperand> Cond(RegOp);
1884  TII->insertBranch(*IfBB, MergeBB, CodeBBStart, Cond, DL);
1885
1886  return IfBB;
1887}
1888
1889void AMDGPUMachineCFGStructurizer::ensureCondIsNotKilled(
1890    SmallVector<MachineOperand, 1> Cond) {
1891  if (Cond.size() != 1)
1892    return;
1893  if (!Cond[0].isReg())
1894    return;
1895
1896  Register CondReg = Cond[0].getReg();
1897  for (auto UI = MRI->use_begin(CondReg), E = MRI->use_end(); UI != E; ++UI) {
1898    (*UI).setIsKill(false);
1899  }
1900}
1901
1902void AMDGPUMachineCFGStructurizer::rewriteCodeBBTerminator(MachineBasicBlock *CodeBB,
1903                                                     MachineBasicBlock *MergeBB,
1904                                                     unsigned BBSelectReg) {
1905  MachineBasicBlock *TrueBB = nullptr;
1906  MachineBasicBlock *FalseBB = nullptr;
1907  SmallVector<MachineOperand, 1> Cond;
1908  MachineBasicBlock *FallthroughBB = FallthroughMap[CodeBB];
1909  TII->analyzeBranch(*CodeBB, TrueBB, FalseBB, Cond);
1910
1911  const DebugLoc &DL = CodeBB->findDebugLoc(CodeBB->getFirstTerminator());
1912
1913  if (FalseBB == nullptr && TrueBB == nullptr && FallthroughBB == nullptr) {
1914    // This is an exit block, hence no successors. We will assign the
1915    // bb select register to the entry block.
1916    TII->materializeImmediate(*CodeBB, CodeBB->getFirstTerminator(), DL,
1917                              BBSelectReg,
1918                              CodeBB->getParent()->begin()->getNumber());
1919    insertUnconditionalBranch(CodeBB, MergeBB, DL);
1920    return;
1921  }
1922
1923  if (FalseBB == nullptr && TrueBB == nullptr) {
1924    TrueBB = FallthroughBB;
1925  } else if (TrueBB != nullptr) {
1926    FalseBB =
1927        (FallthroughBB && (FallthroughBB != TrueBB)) ? FallthroughBB : FalseBB;
1928  }
1929
1930  if ((TrueBB != nullptr && FalseBB == nullptr) || (TrueBB == FalseBB)) {
1931    TII->materializeImmediate(*CodeBB, CodeBB->getFirstTerminator(), DL,
1932                              BBSelectReg, TrueBB->getNumber());
1933  } else {
1934    const TargetRegisterClass *RegClass = MRI->getRegClass(BBSelectReg);
1935    Register TrueBBReg = MRI->createVirtualRegister(RegClass);
1936    Register FalseBBReg = MRI->createVirtualRegister(RegClass);
1937    TII->materializeImmediate(*CodeBB, CodeBB->getFirstTerminator(), DL,
1938                              TrueBBReg, TrueBB->getNumber());
1939    TII->materializeImmediate(*CodeBB, CodeBB->getFirstTerminator(), DL,
1940                              FalseBBReg, FalseBB->getNumber());
1941    ensureCondIsNotKilled(Cond);
1942    TII->insertVectorSelect(*CodeBB, CodeBB->getFirstTerminator(), DL,
1943                            BBSelectReg, Cond, TrueBBReg, FalseBBReg);
1944  }
1945
1946  insertUnconditionalBranch(CodeBB, MergeBB, DL);
1947}
1948
1949MachineInstr *AMDGPUMachineCFGStructurizer::getDefInstr(unsigned Reg) {
1950  if (MRI->def_begin(Reg) == MRI->def_end()) {
1951    LLVM_DEBUG(dbgs() << "Register "
1952                      << printReg(Reg, MRI->getTargetRegisterInfo())
1953                      << " has NO defs\n");
1954  } else if (!MRI->hasOneDef(Reg)) {
1955    LLVM_DEBUG(dbgs() << "Register "
1956                      << printReg(Reg, MRI->getTargetRegisterInfo())
1957                      << " has multiple defs\n");
1958    LLVM_DEBUG(dbgs() << "DEFS BEGIN:\n");
1959    for (auto DI = MRI->def_begin(Reg), DE = MRI->def_end(); DI != DE; ++DI) {
1960      LLVM_DEBUG(DI->getParent()->dump());
1961    }
1962    LLVM_DEBUG(dbgs() << "DEFS END\n");
1963  }
1964
1965  assert(MRI->hasOneDef(Reg) && "Register has multiple definitions");
1966  return (*(MRI->def_begin(Reg))).getParent();
1967}
1968
1969void AMDGPUMachineCFGStructurizer::insertChainedPHI(MachineBasicBlock *IfBB,
1970                                              MachineBasicBlock *CodeBB,
1971                                              MachineBasicBlock *MergeBB,
1972                                              LinearizedRegion *InnerRegion,
1973                                              unsigned DestReg,
1974                                              unsigned SourceReg) {
1975  // In this function we know we are part of a chain already, so we need
1976  // to add the registers to the existing chain, and rename the register
1977  // inside the region.
1978  bool IsSingleBB = InnerRegion->getEntry() == InnerRegion->getExit();
1979  MachineInstr *DefInstr = getDefInstr(SourceReg);
1980  if (DefInstr->isPHI() && DefInstr->getParent() == CodeBB && IsSingleBB) {
1981    // Handle the case where the def is a PHI-def inside a basic
1982    // block, then we only need to do renaming. Special care needs to
1983    // be taken if the PHI-def is part of an existing chain, or if a
1984    // new one needs to be created.
1985    InnerRegion->replaceRegisterInsideRegion(SourceReg, DestReg, true, MRI);
1986
1987    // We collect all PHI Information, and if we are at the region entry,
1988    // all PHIs will be removed, and then re-introduced if needed.
1989    storePHILinearizationInfoDest(DestReg, *DefInstr);
1990    // We have picked up all the information we need now and can remove
1991    // the PHI
1992    PHIInfo.removeSource(DestReg, SourceReg, CodeBB);
1993    DefInstr->eraseFromParent();
1994  } else {
1995    // If this is not a phi-def, or it is a phi-def but from a linearized region
1996    if (IsSingleBB && DefInstr->getParent() == InnerRegion->getEntry()) {
1997      // If this is a single BB and the definition is in this block we
1998      // need to replace any uses outside the region.
1999      InnerRegion->replaceRegisterOutsideRegion(SourceReg, DestReg, false, MRI);
2000    }
2001    const TargetRegisterClass *RegClass = MRI->getRegClass(DestReg);
2002    Register NextDestReg = MRI->createVirtualRegister(RegClass);
2003    bool IsLastDef = PHIInfo.getNumSources(DestReg) == 1;
2004    LLVM_DEBUG(dbgs() << "Insert Chained PHI\n");
2005    insertMergePHI(IfBB, InnerRegion->getExit(), MergeBB, DestReg, NextDestReg,
2006                   SourceReg, IsLastDef);
2007
2008    PHIInfo.removeSource(DestReg, SourceReg, CodeBB);
2009    if (IsLastDef) {
2010      const DebugLoc &DL = IfBB->findDebugLoc(IfBB->getFirstTerminator());
2011      TII->materializeImmediate(*IfBB, IfBB->getFirstTerminator(), DL,
2012                                NextDestReg, 0);
2013      PHIInfo.deleteDef(DestReg);
2014    } else {
2015      PHIInfo.replaceDef(DestReg, NextDestReg);
2016    }
2017  }
2018}
2019
2020bool AMDGPUMachineCFGStructurizer::containsDef(MachineBasicBlock *MBB,
2021                                         LinearizedRegion *InnerRegion,
2022                                         unsigned Register) {
2023  return getDefInstr(Register)->getParent() == MBB ||
2024         InnerRegion->contains(getDefInstr(Register)->getParent());
2025}
2026
2027void AMDGPUMachineCFGStructurizer::rewriteLiveOutRegs(MachineBasicBlock *IfBB,
2028                                                MachineBasicBlock *CodeBB,
2029                                                MachineBasicBlock *MergeBB,
2030                                                LinearizedRegion *InnerRegion,
2031                                                LinearizedRegion *LRegion) {
2032  DenseSet<unsigned> *LiveOuts = InnerRegion->getLiveOuts();
2033  SmallVector<unsigned, 4> OldLiveOuts;
2034  bool IsSingleBB = InnerRegion->getEntry() == InnerRegion->getExit();
2035  for (auto OLI : *LiveOuts) {
2036    OldLiveOuts.push_back(OLI);
2037  }
2038
2039  for (auto LI : OldLiveOuts) {
2040    LLVM_DEBUG(dbgs() << "LiveOut: " << printReg(LI, TRI));
2041    if (!containsDef(CodeBB, InnerRegion, LI) ||
2042        (!IsSingleBB && (getDefInstr(LI)->getParent() == LRegion->getExit()))) {
2043      // If the register simly lives through the CodeBB, we don't have
2044      // to rewrite anything since the register is not defined in this
2045      // part of the code.
2046      LLVM_DEBUG(dbgs() << "- through");
2047      continue;
2048    }
2049    LLVM_DEBUG(dbgs() << "\n");
2050    unsigned Reg = LI;
2051    if (/*!PHIInfo.isSource(Reg) &&*/ Reg != InnerRegion->getBBSelectRegOut()) {
2052      // If the register is live out, we do want to create a phi,
2053      // unless it is from the Exit block, becasuse in that case there
2054      // is already a PHI, and no need to create a new one.
2055
2056      // If the register is just a live out def and not part of a phi
2057      // chain, we need to create a PHI node to handle the if region,
2058      // and replace all uses outside of the region with the new dest
2059      // register, unless it is the outgoing BB select register. We have
2060      // already creaed phi nodes for these.
2061      const TargetRegisterClass *RegClass = MRI->getRegClass(Reg);
2062      Register PHIDestReg = MRI->createVirtualRegister(RegClass);
2063      Register IfSourceReg = MRI->createVirtualRegister(RegClass);
2064      // Create initializer, this value is never used, but is needed
2065      // to satisfy SSA.
2066      LLVM_DEBUG(dbgs() << "Initializer for reg: " << printReg(Reg) << "\n");
2067      TII->materializeImmediate(*IfBB, IfBB->getFirstTerminator(), DebugLoc(),
2068                        IfSourceReg, 0);
2069
2070      InnerRegion->replaceRegisterOutsideRegion(Reg, PHIDestReg, true, MRI);
2071      LLVM_DEBUG(dbgs() << "Insert Non-Chained Live out PHI\n");
2072      insertMergePHI(IfBB, InnerRegion->getExit(), MergeBB, PHIDestReg,
2073                     IfSourceReg, Reg, true);
2074    }
2075  }
2076
2077  // Handle the chained definitions in PHIInfo, checking if this basic block
2078  // is a source block for a definition.
2079  SmallVector<unsigned, 4> Sources;
2080  if (PHIInfo.findSourcesFromMBB(CodeBB, Sources)) {
2081    LLVM_DEBUG(dbgs() << "Inserting PHI Live Out from "
2082                      << printMBBReference(*CodeBB) << "\n");
2083    for (auto SI : Sources) {
2084      unsigned DestReg;
2085      PHIInfo.findDest(SI, CodeBB, DestReg);
2086      insertChainedPHI(IfBB, CodeBB, MergeBB, InnerRegion, DestReg, SI);
2087    }
2088    LLVM_DEBUG(dbgs() << "Insertion done.\n");
2089  }
2090
2091  LLVM_DEBUG(PHIInfo.dump(MRI));
2092}
2093
2094void AMDGPUMachineCFGStructurizer::prunePHIInfo(MachineBasicBlock *MBB) {
2095  LLVM_DEBUG(dbgs() << "Before PHI Prune\n");
2096  LLVM_DEBUG(PHIInfo.dump(MRI));
2097  SmallVector<std::tuple<unsigned, unsigned, MachineBasicBlock *>, 4>
2098      ElimiatedSources;
2099  for (auto DRI = PHIInfo.dests_begin(), DE = PHIInfo.dests_end(); DRI != DE;
2100       ++DRI) {
2101
2102    unsigned DestReg = *DRI;
2103    auto SE = PHIInfo.sources_end(DestReg);
2104
2105    bool MBBContainsPHISource = false;
2106    // Check if there is a PHI source in this MBB
2107    for (auto SRI = PHIInfo.sources_begin(DestReg); SRI != SE; ++SRI) {
2108      unsigned SourceReg = (*SRI).first;
2109      MachineOperand *Def = &(*(MRI->def_begin(SourceReg)));
2110      if (Def->getParent()->getParent() == MBB) {
2111        MBBContainsPHISource = true;
2112      }
2113    }
2114
2115    // If so, all other sources are useless since we know this block
2116    // is always executed when the region is executed.
2117    if (MBBContainsPHISource) {
2118      for (auto SRI = PHIInfo.sources_begin(DestReg); SRI != SE; ++SRI) {
2119        PHILinearize::PHISourceT Source = *SRI;
2120        unsigned SourceReg = Source.first;
2121        MachineBasicBlock *SourceMBB = Source.second;
2122        MachineOperand *Def = &(*(MRI->def_begin(SourceReg)));
2123        if (Def->getParent()->getParent() != MBB) {
2124          ElimiatedSources.push_back(
2125              std::make_tuple(DestReg, SourceReg, SourceMBB));
2126        }
2127      }
2128    }
2129  }
2130
2131  // Remove the PHI sources that are in the given MBB
2132  for (auto &SourceInfo : ElimiatedSources) {
2133    PHIInfo.removeSource(std::get<0>(SourceInfo), std::get<1>(SourceInfo),
2134                         std::get<2>(SourceInfo));
2135  }
2136  LLVM_DEBUG(dbgs() << "After PHI Prune\n");
2137  LLVM_DEBUG(PHIInfo.dump(MRI));
2138}
2139
2140void AMDGPUMachineCFGStructurizer::createEntryPHI(LinearizedRegion *CurrentRegion,
2141                                            unsigned DestReg) {
2142  MachineBasicBlock *Entry = CurrentRegion->getEntry();
2143  MachineBasicBlock *Exit = CurrentRegion->getExit();
2144
2145  LLVM_DEBUG(dbgs() << "RegionExit: " << Exit->getNumber() << " Pred: "
2146                    << (*(Entry->pred_begin()))->getNumber() << "\n");
2147
2148  int NumSources = 0;
2149  auto SE = PHIInfo.sources_end(DestReg);
2150
2151  for (auto SRI = PHIInfo.sources_begin(DestReg); SRI != SE; ++SRI) {
2152    NumSources++;
2153  }
2154
2155  if (NumSources == 1) {
2156    auto SRI = PHIInfo.sources_begin(DestReg);
2157    unsigned SourceReg = (*SRI).first;
2158    replaceRegisterWith(DestReg, SourceReg);
2159  } else {
2160    const DebugLoc &DL = Entry->findDebugLoc(Entry->begin());
2161    MachineInstrBuilder MIB = BuildMI(*Entry, Entry->instr_begin(), DL,
2162                                      TII->get(TargetOpcode::PHI), DestReg);
2163    LLVM_DEBUG(dbgs() << "Entry PHI " << printReg(DestReg, TRI) << " = PHI(");
2164
2165    unsigned CurrentBackedgeReg = 0;
2166
2167    for (auto SRI = PHIInfo.sources_begin(DestReg); SRI != SE; ++SRI) {
2168      unsigned SourceReg = (*SRI).first;
2169
2170      if (CurrentRegion->contains((*SRI).second)) {
2171        if (CurrentBackedgeReg == 0) {
2172          CurrentBackedgeReg = SourceReg;
2173        } else {
2174          MachineInstr *PHIDefInstr = getDefInstr(SourceReg);
2175          MachineBasicBlock *PHIDefMBB = PHIDefInstr->getParent();
2176          const TargetRegisterClass *RegClass =
2177              MRI->getRegClass(CurrentBackedgeReg);
2178          Register NewBackedgeReg = MRI->createVirtualRegister(RegClass);
2179          MachineInstrBuilder BackedgePHI =
2180              BuildMI(*PHIDefMBB, PHIDefMBB->instr_begin(), DL,
2181                      TII->get(TargetOpcode::PHI), NewBackedgeReg);
2182          BackedgePHI.addReg(CurrentBackedgeReg);
2183          BackedgePHI.addMBB(getPHIPred(*PHIDefInstr, 0));
2184          BackedgePHI.addReg(getPHISourceReg(*PHIDefInstr, 1));
2185          BackedgePHI.addMBB((*SRI).second);
2186          CurrentBackedgeReg = NewBackedgeReg;
2187          LLVM_DEBUG(dbgs()
2188                     << "Inserting backedge PHI: "
2189                     << printReg(NewBackedgeReg, TRI) << " = PHI("
2190                     << printReg(CurrentBackedgeReg, TRI) << ", "
2191                     << printMBBReference(*getPHIPred(*PHIDefInstr, 0)) << ", "
2192                     << printReg(getPHISourceReg(*PHIDefInstr, 1), TRI) << ", "
2193                     << printMBBReference(*(*SRI).second));
2194        }
2195      } else {
2196        MIB.addReg(SourceReg);
2197        MIB.addMBB((*SRI).second);
2198        LLVM_DEBUG(dbgs() << printReg(SourceReg, TRI) << ", "
2199                          << printMBBReference(*(*SRI).second) << ", ");
2200      }
2201    }
2202
2203    // Add the final backedge register source to the entry phi
2204    if (CurrentBackedgeReg != 0) {
2205      MIB.addReg(CurrentBackedgeReg);
2206      MIB.addMBB(Exit);
2207      LLVM_DEBUG(dbgs() << printReg(CurrentBackedgeReg, TRI) << ", "
2208                        << printMBBReference(*Exit) << ")\n");
2209    } else {
2210      LLVM_DEBUG(dbgs() << ")\n");
2211    }
2212  }
2213}
2214
2215void AMDGPUMachineCFGStructurizer::createEntryPHIs(LinearizedRegion *CurrentRegion) {
2216  LLVM_DEBUG(PHIInfo.dump(MRI));
2217
2218  for (auto DRI = PHIInfo.dests_begin(), DE = PHIInfo.dests_end(); DRI != DE;
2219       ++DRI) {
2220
2221    unsigned DestReg = *DRI;
2222    createEntryPHI(CurrentRegion, DestReg);
2223  }
2224  PHIInfo.clear();
2225}
2226
2227void AMDGPUMachineCFGStructurizer::replaceRegisterWith(unsigned Register,
2228                                                 unsigned NewRegister) {
2229  assert(Register != NewRegister && "Cannot replace a reg with itself");
2230
2231  for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(Register),
2232                                         E = MRI->reg_end();
2233       I != E;) {
2234    MachineOperand &O = *I;
2235    ++I;
2236    if (Register::isPhysicalRegister(NewRegister)) {
2237      LLVM_DEBUG(dbgs() << "Trying to substitute physical register: "
2238                        << printReg(NewRegister, MRI->getTargetRegisterInfo())
2239                        << "\n");
2240      llvm_unreachable("Cannot substitute physical registers");
2241      // We don't handle physical registers, but if we need to
2242      // in the future This is how we do it:
2243      // O.substPhysReg(NewRegister, *TRI);
2244    } else {
2245      LLVM_DEBUG(dbgs() << "Replacing register: "
2246                        << printReg(Register, MRI->getTargetRegisterInfo())
2247                        << " with "
2248                        << printReg(NewRegister, MRI->getTargetRegisterInfo())
2249                        << "\n");
2250      O.setReg(NewRegister);
2251    }
2252  }
2253  PHIInfo.deleteDef(Register);
2254
2255  getRegionMRT()->replaceLiveOutReg(Register, NewRegister);
2256
2257  LLVM_DEBUG(PHIInfo.dump(MRI));
2258}
2259
2260void AMDGPUMachineCFGStructurizer::resolvePHIInfos(MachineBasicBlock *FunctionEntry) {
2261  LLVM_DEBUG(dbgs() << "Resolve PHI Infos\n");
2262  LLVM_DEBUG(PHIInfo.dump(MRI));
2263  for (auto DRI = PHIInfo.dests_begin(), DE = PHIInfo.dests_end(); DRI != DE;
2264       ++DRI) {
2265    unsigned DestReg = *DRI;
2266    LLVM_DEBUG(dbgs() << "DestReg: " << printReg(DestReg, TRI) << "\n");
2267    auto SRI = PHIInfo.sources_begin(DestReg);
2268    unsigned SourceReg = (*SRI).first;
2269    LLVM_DEBUG(dbgs() << "DestReg: " << printReg(DestReg, TRI)
2270                      << " SourceReg: " << printReg(SourceReg, TRI) << "\n");
2271
2272    assert(PHIInfo.sources_end(DestReg) == ++SRI &&
2273           "More than one phi source in entry node");
2274    replaceRegisterWith(DestReg, SourceReg);
2275  }
2276}
2277
2278static bool isFunctionEntryBlock(MachineBasicBlock *MBB) {
2279  return ((&(*(MBB->getParent()->begin()))) == MBB);
2280}
2281
2282MachineBasicBlock *AMDGPUMachineCFGStructurizer::createIfRegion(
2283    MachineBasicBlock *MergeBB, MachineBasicBlock *CodeBB,
2284    LinearizedRegion *CurrentRegion, unsigned BBSelectRegIn,
2285    unsigned BBSelectRegOut) {
2286  if (isFunctionEntryBlock(CodeBB) && !CurrentRegion->getHasLoop()) {
2287    // Handle non-loop function entry block.
2288    // We need to allow loops to the entry block and then
2289    rewriteCodeBBTerminator(CodeBB, MergeBB, BBSelectRegOut);
2290    resolvePHIInfos(CodeBB);
2291    removeExternalCFGSuccessors(CodeBB);
2292    CodeBB->addSuccessor(MergeBB);
2293    CurrentRegion->addMBB(CodeBB);
2294    return nullptr;
2295  }
2296  if (CurrentRegion->getEntry() == CodeBB && !CurrentRegion->getHasLoop()) {
2297    // Handle non-loop region entry block.
2298    MachineFunction *MF = MergeBB->getParent();
2299    auto MergeIter = MergeBB->getIterator();
2300    auto CodeBBStartIter = CodeBB->getIterator();
2301    auto CodeBBEndIter = ++(CodeBB->getIterator());
2302    if (CodeBBEndIter != MergeIter) {
2303      MF->splice(MergeIter, CodeBBStartIter, CodeBBEndIter);
2304    }
2305    rewriteCodeBBTerminator(CodeBB, MergeBB, BBSelectRegOut);
2306    prunePHIInfo(CodeBB);
2307    createEntryPHIs(CurrentRegion);
2308    removeExternalCFGSuccessors(CodeBB);
2309    CodeBB->addSuccessor(MergeBB);
2310    CurrentRegion->addMBB(CodeBB);
2311    return nullptr;
2312  } else {
2313    // Handle internal block.
2314    const TargetRegisterClass *RegClass = MRI->getRegClass(BBSelectRegIn);
2315    Register CodeBBSelectReg = MRI->createVirtualRegister(RegClass);
2316    rewriteCodeBBTerminator(CodeBB, MergeBB, CodeBBSelectReg);
2317    bool IsRegionEntryBB = CurrentRegion->getEntry() == CodeBB;
2318    MachineBasicBlock *IfBB = createIfBlock(MergeBB, CodeBB, CodeBB, CodeBB,
2319                                            BBSelectRegIn, IsRegionEntryBB);
2320    CurrentRegion->addMBB(IfBB);
2321    // If this is the entry block we need to make the If block the new
2322    // linearized region entry.
2323    if (IsRegionEntryBB) {
2324      CurrentRegion->setEntry(IfBB);
2325
2326      if (CurrentRegion->getHasLoop()) {
2327        MachineBasicBlock *RegionExit = CurrentRegion->getExit();
2328        MachineBasicBlock *ETrueBB = nullptr;
2329        MachineBasicBlock *EFalseBB = nullptr;
2330        SmallVector<MachineOperand, 1> ECond;
2331
2332        const DebugLoc &DL = DebugLoc();
2333        TII->analyzeBranch(*RegionExit, ETrueBB, EFalseBB, ECond);
2334        TII->removeBranch(*RegionExit);
2335
2336        // We need to create a backedge if there is a loop
2337        unsigned Reg = TII->insertNE(
2338            RegionExit, RegionExit->instr_end(), DL,
2339            CurrentRegion->getRegionMRT()->getInnerOutputRegister(),
2340            CurrentRegion->getRegionMRT()->getEntry()->getNumber());
2341        MachineOperand RegOp =
2342            MachineOperand::CreateReg(Reg, false, false, true);
2343        ArrayRef<MachineOperand> Cond(RegOp);
2344        LLVM_DEBUG(dbgs() << "RegionExitReg: ");
2345        LLVM_DEBUG(Cond[0].print(dbgs(), TRI));
2346        LLVM_DEBUG(dbgs() << "\n");
2347        TII->insertBranch(*RegionExit, CurrentRegion->getEntry(), RegionExit,
2348                          Cond, DebugLoc());
2349        RegionExit->addSuccessor(CurrentRegion->getEntry());
2350      }
2351    }
2352    CurrentRegion->addMBB(CodeBB);
2353    LinearizedRegion InnerRegion(CodeBB, MRI, TRI, PHIInfo);
2354
2355    InnerRegion.setParent(CurrentRegion);
2356    LLVM_DEBUG(dbgs() << "Insert BB Select PHI (BB)\n");
2357    insertMergePHI(IfBB, CodeBB, MergeBB, BBSelectRegOut, BBSelectRegIn,
2358                   CodeBBSelectReg);
2359    InnerRegion.addMBB(MergeBB);
2360
2361    LLVM_DEBUG(InnerRegion.print(dbgs(), TRI));
2362    rewriteLiveOutRegs(IfBB, CodeBB, MergeBB, &InnerRegion, CurrentRegion);
2363    extractKilledPHIs(CodeBB);
2364    if (IsRegionEntryBB) {
2365      createEntryPHIs(CurrentRegion);
2366    }
2367    return IfBB;
2368  }
2369}
2370
2371MachineBasicBlock *AMDGPUMachineCFGStructurizer::createIfRegion(
2372    MachineBasicBlock *MergeBB, LinearizedRegion *InnerRegion,
2373    LinearizedRegion *CurrentRegion, MachineBasicBlock *SelectBB,
2374    unsigned BBSelectRegIn, unsigned BBSelectRegOut) {
2375  unsigned CodeBBSelectReg =
2376      InnerRegion->getRegionMRT()->getInnerOutputRegister();
2377  MachineBasicBlock *CodeEntryBB = InnerRegion->getEntry();
2378  MachineBasicBlock *CodeExitBB = InnerRegion->getExit();
2379  MachineBasicBlock *IfBB = createIfBlock(MergeBB, CodeEntryBB, CodeExitBB,
2380                                          SelectBB, BBSelectRegIn, true);
2381  CurrentRegion->addMBB(IfBB);
2382  bool isEntry = CurrentRegion->getEntry() == InnerRegion->getEntry();
2383  if (isEntry) {
2384
2385    if (CurrentRegion->getHasLoop()) {
2386      MachineBasicBlock *RegionExit = CurrentRegion->getExit();
2387      MachineBasicBlock *ETrueBB = nullptr;
2388      MachineBasicBlock *EFalseBB = nullptr;
2389      SmallVector<MachineOperand, 1> ECond;
2390
2391      const DebugLoc &DL = DebugLoc();
2392      TII->analyzeBranch(*RegionExit, ETrueBB, EFalseBB, ECond);
2393      TII->removeBranch(*RegionExit);
2394
2395      // We need to create a backedge if there is a loop
2396      unsigned Reg =
2397          TII->insertNE(RegionExit, RegionExit->instr_end(), DL,
2398                        CurrentRegion->getRegionMRT()->getInnerOutputRegister(),
2399                        CurrentRegion->getRegionMRT()->getEntry()->getNumber());
2400      MachineOperand RegOp = MachineOperand::CreateReg(Reg, false, false, true);
2401      ArrayRef<MachineOperand> Cond(RegOp);
2402      LLVM_DEBUG(dbgs() << "RegionExitReg: ");
2403      LLVM_DEBUG(Cond[0].print(dbgs(), TRI));
2404      LLVM_DEBUG(dbgs() << "\n");
2405      TII->insertBranch(*RegionExit, CurrentRegion->getEntry(), RegionExit,
2406                        Cond, DebugLoc());
2407      RegionExit->addSuccessor(IfBB);
2408    }
2409  }
2410  CurrentRegion->addMBBs(InnerRegion);
2411  LLVM_DEBUG(dbgs() << "Insert BB Select PHI (region)\n");
2412  insertMergePHI(IfBB, CodeExitBB, MergeBB, BBSelectRegOut, BBSelectRegIn,
2413                 CodeBBSelectReg);
2414
2415  rewriteLiveOutRegs(IfBB, /* CodeEntryBB */ CodeExitBB, MergeBB, InnerRegion,
2416                     CurrentRegion);
2417
2418  rewriteRegionEntryPHIs(InnerRegion, IfBB);
2419
2420  if (isEntry) {
2421    CurrentRegion->setEntry(IfBB);
2422  }
2423
2424  if (isEntry) {
2425    createEntryPHIs(CurrentRegion);
2426  }
2427
2428  return IfBB;
2429}
2430
2431void AMDGPUMachineCFGStructurizer::splitLoopPHI(MachineInstr &PHI,
2432                                          MachineBasicBlock *Entry,
2433                                          MachineBasicBlock *EntrySucc,
2434                                          LinearizedRegion *LRegion) {
2435  SmallVector<unsigned, 2> PHIRegionIndices;
2436  getPHIRegionIndices(LRegion, PHI, PHIRegionIndices);
2437
2438  assert(PHIRegionIndices.size() == 1);
2439
2440  unsigned RegionIndex = PHIRegionIndices[0];
2441  unsigned RegionSourceReg = getPHISourceReg(PHI, RegionIndex);
2442  MachineBasicBlock *RegionSourceMBB = getPHIPred(PHI, RegionIndex);
2443  unsigned PHIDest = getPHIDestReg(PHI);
2444  unsigned PHISource = PHIDest;
2445  unsigned ReplaceReg;
2446
2447  if (shrinkPHI(PHI, PHIRegionIndices, &ReplaceReg)) {
2448    PHISource = ReplaceReg;
2449  }
2450
2451  const TargetRegisterClass *RegClass = MRI->getRegClass(PHIDest);
2452  Register NewDestReg = MRI->createVirtualRegister(RegClass);
2453  LRegion->replaceRegisterInsideRegion(PHIDest, NewDestReg, false, MRI);
2454  MachineInstrBuilder MIB =
2455      BuildMI(*EntrySucc, EntrySucc->instr_begin(), PHI.getDebugLoc(),
2456              TII->get(TargetOpcode::PHI), NewDestReg);
2457  LLVM_DEBUG(dbgs() << "Split Entry PHI " << printReg(NewDestReg, TRI)
2458                    << " = PHI(");
2459  MIB.addReg(PHISource);
2460  MIB.addMBB(Entry);
2461  LLVM_DEBUG(dbgs() << printReg(PHISource, TRI) << ", "
2462                    << printMBBReference(*Entry));
2463  MIB.addReg(RegionSourceReg);
2464  MIB.addMBB(RegionSourceMBB);
2465  LLVM_DEBUG(dbgs() << " ," << printReg(RegionSourceReg, TRI) << ", "
2466                    << printMBBReference(*RegionSourceMBB) << ")\n");
2467}
2468
2469void AMDGPUMachineCFGStructurizer::splitLoopPHIs(MachineBasicBlock *Entry,
2470                                           MachineBasicBlock *EntrySucc,
2471                                           LinearizedRegion *LRegion) {
2472  SmallVector<MachineInstr *, 2> PHIs;
2473  collectPHIs(Entry, PHIs);
2474
2475  for (auto PHII : PHIs) {
2476    splitLoopPHI(*PHII, Entry, EntrySucc, LRegion);
2477  }
2478}
2479
2480// Split the exit block so that we can insert a end control flow
2481MachineBasicBlock *
2482AMDGPUMachineCFGStructurizer::splitExit(LinearizedRegion *LRegion) {
2483  auto MRTRegion = LRegion->getRegionMRT();
2484  auto Exit = LRegion->getExit();
2485  auto MF = Exit->getParent();
2486  auto Succ = MRTRegion->getSucc();
2487
2488  auto NewExit = MF->CreateMachineBasicBlock();
2489  auto AfterExitIter = Exit->getIterator();
2490  AfterExitIter++;
2491  MF->insert(AfterExitIter, NewExit);
2492  Exit->removeSuccessor(Succ);
2493  Exit->addSuccessor(NewExit);
2494  NewExit->addSuccessor(Succ);
2495  insertUnconditionalBranch(NewExit, Succ);
2496  LRegion->addMBB(NewExit);
2497  LRegion->setExit(NewExit);
2498
2499  LLVM_DEBUG(dbgs() << "Created new exit block: " << NewExit->getNumber()
2500                    << "\n");
2501
2502  // Replace any PHI Predecessors in the successor with NewExit
2503  for (auto &II : *Succ) {
2504    MachineInstr &Instr = II;
2505
2506    // If we are past the PHI instructions we are done
2507    if (!Instr.isPHI())
2508      break;
2509
2510    int numPreds = getPHINumInputs(Instr);
2511    for (int i = 0; i < numPreds; ++i) {
2512      auto Pred = getPHIPred(Instr, i);
2513      if (Pred == Exit) {
2514        setPhiPred(Instr, i, NewExit);
2515      }
2516    }
2517  }
2518
2519  return NewExit;
2520}
2521
2522static MachineBasicBlock *split(MachineBasicBlock::iterator I) {
2523  // Create the fall-through block.
2524  MachineBasicBlock *MBB = (*I).getParent();
2525  MachineFunction *MF = MBB->getParent();
2526  MachineBasicBlock *SuccMBB = MF->CreateMachineBasicBlock();
2527  auto MBBIter = ++(MBB->getIterator());
2528  MF->insert(MBBIter, SuccMBB);
2529  SuccMBB->transferSuccessorsAndUpdatePHIs(MBB);
2530  MBB->addSuccessor(SuccMBB);
2531
2532  // Splice the code over.
2533  SuccMBB->splice(SuccMBB->end(), MBB, I, MBB->end());
2534
2535  return SuccMBB;
2536}
2537
2538// Split the entry block separating PHI-nodes and the rest of the code
2539// This is needed to insert an initializer for the bb select register
2540// inloop regions.
2541
2542MachineBasicBlock *
2543AMDGPUMachineCFGStructurizer::splitEntry(LinearizedRegion *LRegion) {
2544  MachineBasicBlock *Entry = LRegion->getEntry();
2545  MachineBasicBlock *EntrySucc = split(Entry->getFirstNonPHI());
2546  MachineBasicBlock *Exit = LRegion->getExit();
2547
2548  LLVM_DEBUG(dbgs() << "Split " << printMBBReference(*Entry) << " to "
2549                    << printMBBReference(*Entry) << " -> "
2550                    << printMBBReference(*EntrySucc) << "\n");
2551  LRegion->addMBB(EntrySucc);
2552
2553  // Make the backedge go to Entry Succ
2554  if (Exit->isSuccessor(Entry)) {
2555    Exit->removeSuccessor(Entry);
2556  }
2557  Exit->addSuccessor(EntrySucc);
2558  MachineInstr &Branch = *(Exit->instr_rbegin());
2559  for (auto &UI : Branch.uses()) {
2560    if (UI.isMBB() && UI.getMBB() == Entry) {
2561      UI.setMBB(EntrySucc);
2562    }
2563  }
2564
2565  splitLoopPHIs(Entry, EntrySucc, LRegion);
2566
2567  return EntrySucc;
2568}
2569
2570LinearizedRegion *
2571AMDGPUMachineCFGStructurizer::initLinearizedRegion(RegionMRT *Region) {
2572  LinearizedRegion *LRegion = Region->getLinearizedRegion();
2573  LRegion->initLiveOut(Region, MRI, TRI, PHIInfo);
2574  LRegion->setEntry(Region->getEntry());
2575  return LRegion;
2576}
2577
2578static void removeOldExitPreds(RegionMRT *Region) {
2579  MachineBasicBlock *Exit = Region->getSucc();
2580  if (Exit == nullptr) {
2581    return;
2582  }
2583  for (MachineBasicBlock::pred_iterator PI = Exit->pred_begin(),
2584                                        E = Exit->pred_end();
2585       PI != E; ++PI) {
2586    if (Region->contains(*PI)) {
2587      (*PI)->removeSuccessor(Exit);
2588    }
2589  }
2590}
2591
2592static bool mbbHasBackEdge(MachineBasicBlock *MBB,
2593                           SmallPtrSet<MachineBasicBlock *, 8> &MBBs) {
2594  for (auto SI = MBB->succ_begin(), SE = MBB->succ_end(); SI != SE; ++SI) {
2595    if (MBBs.count(*SI) != 0) {
2596      return true;
2597    }
2598  }
2599  return false;
2600}
2601
2602static bool containsNewBackedge(MRT *Tree,
2603                                SmallPtrSet<MachineBasicBlock *, 8> &MBBs) {
2604  // Need to traverse this in reverse since it is in post order.
2605  if (Tree == nullptr)
2606    return false;
2607
2608  if (Tree->isMBB()) {
2609    MachineBasicBlock *MBB = Tree->getMBBMRT()->getMBB();
2610    MBBs.insert(MBB);
2611    if (mbbHasBackEdge(MBB, MBBs)) {
2612      return true;
2613    }
2614  } else {
2615    RegionMRT *Region = Tree->getRegionMRT();
2616    SetVector<MRT *> *Children = Region->getChildren();
2617    for (auto CI = Children->rbegin(), CE = Children->rend(); CI != CE; ++CI) {
2618      if (containsNewBackedge(*CI, MBBs))
2619        return true;
2620    }
2621  }
2622  return false;
2623}
2624
2625static bool containsNewBackedge(RegionMRT *Region) {
2626  SmallPtrSet<MachineBasicBlock *, 8> MBBs;
2627  return containsNewBackedge(Region, MBBs);
2628}
2629
2630bool AMDGPUMachineCFGStructurizer::structurizeComplexRegion(RegionMRT *Region) {
2631  auto *LRegion = initLinearizedRegion(Region);
2632  LRegion->setHasLoop(containsNewBackedge(Region));
2633  MachineBasicBlock *LastMerge = createLinearizedExitBlock(Region);
2634  MachineBasicBlock *CurrentMerge = LastMerge;
2635  LRegion->addMBB(LastMerge);
2636  LRegion->setExit(LastMerge);
2637
2638  rewriteRegionExitPHIs(Region, LastMerge, LRegion);
2639  removeOldExitPreds(Region);
2640
2641  LLVM_DEBUG(PHIInfo.dump(MRI));
2642
2643  SetVector<MRT *> *Children = Region->getChildren();
2644  LLVM_DEBUG(dbgs() << "===========If Region Start===============\n");
2645  if (LRegion->getHasLoop()) {
2646    LLVM_DEBUG(dbgs() << "Has Backedge: Yes\n");
2647  } else {
2648    LLVM_DEBUG(dbgs() << "Has Backedge: No\n");
2649  }
2650
2651  unsigned BBSelectRegIn;
2652  unsigned BBSelectRegOut;
2653  for (auto CI = Children->begin(), CE = Children->end(); CI != CE; ++CI) {
2654    LLVM_DEBUG(dbgs() << "CurrentRegion: \n");
2655    LLVM_DEBUG(LRegion->print(dbgs(), TRI));
2656
2657    auto CNI = CI;
2658    ++CNI;
2659
2660    MRT *Child = (*CI);
2661
2662    if (Child->isRegion()) {
2663
2664      LinearizedRegion *InnerLRegion =
2665          Child->getRegionMRT()->getLinearizedRegion();
2666      // We found the block is the exit of an inner region, we need
2667      // to put it in the current linearized region.
2668
2669      LLVM_DEBUG(dbgs() << "Linearizing region: ");
2670      LLVM_DEBUG(InnerLRegion->print(dbgs(), TRI));
2671      LLVM_DEBUG(dbgs() << "\n");
2672
2673      MachineBasicBlock *InnerEntry = InnerLRegion->getEntry();
2674      if ((&(*(InnerEntry->getParent()->begin()))) == InnerEntry) {
2675        // Entry has already been linearized, no need to do this region.
2676        unsigned OuterSelect = InnerLRegion->getBBSelectRegOut();
2677        unsigned InnerSelectReg =
2678            InnerLRegion->getRegionMRT()->getInnerOutputRegister();
2679        replaceRegisterWith(InnerSelectReg, OuterSelect),
2680            resolvePHIInfos(InnerEntry);
2681        if (!InnerLRegion->getExit()->isSuccessor(CurrentMerge))
2682          InnerLRegion->getExit()->addSuccessor(CurrentMerge);
2683        continue;
2684      }
2685
2686      BBSelectRegOut = Child->getBBSelectRegOut();
2687      BBSelectRegIn = Child->getBBSelectRegIn();
2688
2689      LLVM_DEBUG(dbgs() << "BBSelectRegIn: " << printReg(BBSelectRegIn, TRI)
2690                        << "\n");
2691      LLVM_DEBUG(dbgs() << "BBSelectRegOut: " << printReg(BBSelectRegOut, TRI)
2692                        << "\n");
2693
2694      MachineBasicBlock *IfEnd = CurrentMerge;
2695      CurrentMerge = createIfRegion(CurrentMerge, InnerLRegion, LRegion,
2696                                    Child->getRegionMRT()->getEntry(),
2697                                    BBSelectRegIn, BBSelectRegOut);
2698      TII->convertNonUniformIfRegion(CurrentMerge, IfEnd);
2699    } else {
2700      MachineBasicBlock *MBB = Child->getMBBMRT()->getMBB();
2701      LLVM_DEBUG(dbgs() << "Linearizing block: " << MBB->getNumber() << "\n");
2702
2703      if (MBB == getSingleExitNode(*(MBB->getParent()))) {
2704        // If this is the exit block then we need to skip to the next.
2705        // The "in" register will be transferred to "out" in the next
2706        // iteration.
2707        continue;
2708      }
2709
2710      BBSelectRegOut = Child->getBBSelectRegOut();
2711      BBSelectRegIn = Child->getBBSelectRegIn();
2712
2713      LLVM_DEBUG(dbgs() << "BBSelectRegIn: " << printReg(BBSelectRegIn, TRI)
2714                        << "\n");
2715      LLVM_DEBUG(dbgs() << "BBSelectRegOut: " << printReg(BBSelectRegOut, TRI)
2716                        << "\n");
2717
2718      MachineBasicBlock *IfEnd = CurrentMerge;
2719      // This is a basic block that is not part of an inner region, we
2720      // need to put it in the current linearized region.
2721      CurrentMerge = createIfRegion(CurrentMerge, MBB, LRegion, BBSelectRegIn,
2722                                    BBSelectRegOut);
2723      if (CurrentMerge) {
2724        TII->convertNonUniformIfRegion(CurrentMerge, IfEnd);
2725      }
2726
2727      LLVM_DEBUG(PHIInfo.dump(MRI));
2728    }
2729  }
2730
2731  LRegion->removeFalseRegisterKills(MRI);
2732
2733  if (LRegion->getHasLoop()) {
2734    MachineBasicBlock *NewSucc = splitEntry(LRegion);
2735    if (isFunctionEntryBlock(LRegion->getEntry())) {
2736      resolvePHIInfos(LRegion->getEntry());
2737    }
2738    const DebugLoc &DL = NewSucc->findDebugLoc(NewSucc->getFirstNonPHI());
2739    unsigned InReg = LRegion->getBBSelectRegIn();
2740    Register InnerSelectReg =
2741        MRI->createVirtualRegister(MRI->getRegClass(InReg));
2742    Register NewInReg = MRI->createVirtualRegister(MRI->getRegClass(InReg));
2743    TII->materializeImmediate(*(LRegion->getEntry()),
2744                              LRegion->getEntry()->getFirstTerminator(), DL,
2745                              NewInReg, Region->getEntry()->getNumber());
2746    // Need to be careful about updating the registers inside the region.
2747    LRegion->replaceRegisterInsideRegion(InReg, InnerSelectReg, false, MRI);
2748    LLVM_DEBUG(dbgs() << "Loop BBSelect Merge PHI:\n");
2749    insertMergePHI(LRegion->getEntry(), LRegion->getExit(), NewSucc,
2750                   InnerSelectReg, NewInReg,
2751                   LRegion->getRegionMRT()->getInnerOutputRegister());
2752    splitExit(LRegion);
2753    TII->convertNonUniformLoopRegion(NewSucc, LastMerge);
2754  }
2755
2756  if (Region->isRoot()) {
2757    TII->insertReturn(*LastMerge);
2758  }
2759
2760  LLVM_DEBUG(Region->getEntry()->getParent()->dump());
2761  LLVM_DEBUG(LRegion->print(dbgs(), TRI));
2762  LLVM_DEBUG(PHIInfo.dump(MRI));
2763
2764  LLVM_DEBUG(dbgs() << "===========If Region End===============\n");
2765
2766  Region->setLinearizedRegion(LRegion);
2767  return true;
2768}
2769
2770bool AMDGPUMachineCFGStructurizer::structurizeRegion(RegionMRT *Region) {
2771  if (false && regionIsSimpleIf(Region)) {
2772    transformSimpleIfRegion(Region);
2773    return true;
2774  } else if (regionIsSequence(Region)) {
2775    fixupRegionExits(Region);
2776    return false;
2777  } else {
2778    structurizeComplexRegion(Region);
2779  }
2780  return false;
2781}
2782
2783static int structurize_once = 0;
2784
2785bool AMDGPUMachineCFGStructurizer::structurizeRegions(RegionMRT *Region,
2786                                                bool isTopRegion) {
2787  bool Changed = false;
2788
2789  auto Children = Region->getChildren();
2790  for (auto CI : *Children) {
2791    if (CI->isRegion()) {
2792      Changed |= structurizeRegions(CI->getRegionMRT(), false);
2793    }
2794  }
2795
2796  if (structurize_once < 2 || true) {
2797    Changed |= structurizeRegion(Region);
2798    structurize_once++;
2799  }
2800  return Changed;
2801}
2802
2803void AMDGPUMachineCFGStructurizer::initFallthroughMap(MachineFunction &MF) {
2804  LLVM_DEBUG(dbgs() << "Fallthrough Map:\n");
2805  for (auto &MBBI : MF) {
2806    MachineBasicBlock *MBB = MBBI.getFallThrough();
2807    if (MBB != nullptr) {
2808      LLVM_DEBUG(dbgs() << "Fallthrough: " << MBBI.getNumber() << " -> "
2809                        << MBB->getNumber() << "\n");
2810    }
2811    FallthroughMap[&MBBI] = MBB;
2812  }
2813}
2814
2815void AMDGPUMachineCFGStructurizer::createLinearizedRegion(RegionMRT *Region,
2816                                                    unsigned SelectOut) {
2817  LinearizedRegion *LRegion = new LinearizedRegion();
2818  if (SelectOut) {
2819    LRegion->addLiveOut(SelectOut);
2820    LLVM_DEBUG(dbgs() << "Add LiveOut (BBSelect): " << printReg(SelectOut, TRI)
2821                      << "\n");
2822  }
2823  LRegion->setRegionMRT(Region);
2824  Region->setLinearizedRegion(LRegion);
2825  LRegion->setParent(Region->getParent()
2826                         ? Region->getParent()->getLinearizedRegion()
2827                         : nullptr);
2828}
2829
2830unsigned
2831AMDGPUMachineCFGStructurizer::initializeSelectRegisters(MRT *MRT, unsigned SelectOut,
2832                                                  MachineRegisterInfo *MRI,
2833                                                  const SIInstrInfo *TII) {
2834  if (MRT->isRegion()) {
2835    RegionMRT *Region = MRT->getRegionMRT();
2836    Region->setBBSelectRegOut(SelectOut);
2837    unsigned InnerSelectOut = createBBSelectReg(TII, MRI);
2838
2839    // Fixme: Move linearization creation to the original spot
2840    createLinearizedRegion(Region, SelectOut);
2841
2842    for (auto CI = Region->getChildren()->begin(),
2843              CE = Region->getChildren()->end();
2844         CI != CE; ++CI) {
2845      InnerSelectOut =
2846          initializeSelectRegisters((*CI), InnerSelectOut, MRI, TII);
2847    }
2848    MRT->setBBSelectRegIn(InnerSelectOut);
2849    return InnerSelectOut;
2850  } else {
2851    MRT->setBBSelectRegOut(SelectOut);
2852    unsigned NewSelectIn = createBBSelectReg(TII, MRI);
2853    MRT->setBBSelectRegIn(NewSelectIn);
2854    return NewSelectIn;
2855  }
2856}
2857
2858static void checkRegOnlyPHIInputs(MachineFunction &MF) {
2859  for (auto &MBBI : MF) {
2860    for (MachineBasicBlock::instr_iterator I = MBBI.instr_begin(),
2861                                           E = MBBI.instr_end();
2862         I != E; ++I) {
2863      MachineInstr &Instr = *I;
2864      if (Instr.isPHI()) {
2865        int numPreds = getPHINumInputs(Instr);
2866        for (int i = 0; i < numPreds; ++i) {
2867          assert(Instr.getOperand(i * 2 + 1).isReg() &&
2868                 "PHI Operand not a register");
2869        }
2870      }
2871    }
2872  }
2873}
2874
2875bool AMDGPUMachineCFGStructurizer::runOnMachineFunction(MachineFunction &MF) {
2876  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
2877  const SIInstrInfo *TII = ST.getInstrInfo();
2878  TRI = ST.getRegisterInfo();
2879  MRI = &(MF.getRegInfo());
2880  initFallthroughMap(MF);
2881
2882  checkRegOnlyPHIInputs(MF);
2883  LLVM_DEBUG(dbgs() << "----STRUCTURIZER START----\n");
2884  LLVM_DEBUG(MF.dump());
2885
2886  Regions = &(getAnalysis<MachineRegionInfoPass>().getRegionInfo());
2887  LLVM_DEBUG(Regions->dump());
2888
2889  RegionMRT *RTree = MRT::buildMRT(MF, Regions, TII, MRI);
2890  setRegionMRT(RTree);
2891  initializeSelectRegisters(RTree, 0, MRI, TII);
2892  LLVM_DEBUG(RTree->dump(TRI));
2893  bool result = structurizeRegions(RTree, true);
2894  delete RTree;
2895  LLVM_DEBUG(dbgs() << "----STRUCTURIZER END----\n");
2896  initFallthroughMap(MF);
2897  return result;
2898}
2899
2900char AMDGPUMachineCFGStructurizerID = AMDGPUMachineCFGStructurizer::ID;
2901
2902INITIALIZE_PASS_BEGIN(AMDGPUMachineCFGStructurizer, "amdgpu-machine-cfg-structurizer",
2903                      "AMDGPU Machine CFG Structurizer", false, false)
2904INITIALIZE_PASS_DEPENDENCY(MachineRegionInfoPass)
2905INITIALIZE_PASS_END(AMDGPUMachineCFGStructurizer, "amdgpu-machine-cfg-structurizer",
2906                    "AMDGPU Machine CFG Structurizer", false, false)
2907
2908FunctionPass *llvm::createAMDGPUMachineCFGStructurizerPass() {
2909  return new AMDGPUMachineCFGStructurizer();
2910}
2911