1//===- StructurizeCFG.cpp -------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "llvm/Transforms/Scalar/StructurizeCFG.h"
10#include "llvm/ADT/DenseMap.h"
11#include "llvm/ADT/MapVector.h"
12#include "llvm/ADT/SCCIterator.h"
13#include "llvm/ADT/STLExtras.h"
14#include "llvm/ADT/SmallPtrSet.h"
15#include "llvm/ADT/SmallSet.h"
16#include "llvm/ADT/SmallVector.h"
17#include "llvm/Analysis/InstructionSimplify.h"
18#include "llvm/Analysis/RegionInfo.h"
19#include "llvm/Analysis/RegionIterator.h"
20#include "llvm/Analysis/RegionPass.h"
21#include "llvm/Analysis/UniformityAnalysis.h"
22#include "llvm/IR/BasicBlock.h"
23#include "llvm/IR/CFG.h"
24#include "llvm/IR/Constants.h"
25#include "llvm/IR/Dominators.h"
26#include "llvm/IR/Function.h"
27#include "llvm/IR/InstrTypes.h"
28#include "llvm/IR/Instruction.h"
29#include "llvm/IR/Instructions.h"
30#include "llvm/IR/Metadata.h"
31#include "llvm/IR/PassManager.h"
32#include "llvm/IR/PatternMatch.h"
33#include "llvm/IR/Type.h"
34#include "llvm/IR/Use.h"
35#include "llvm/IR/Value.h"
36#include "llvm/IR/ValueHandle.h"
37#include "llvm/InitializePasses.h"
38#include "llvm/Pass.h"
39#include "llvm/Support/Casting.h"
40#include "llvm/Support/CommandLine.h"
41#include "llvm/Support/Debug.h"
42#include "llvm/Support/raw_ostream.h"
43#include "llvm/Transforms/Scalar.h"
44#include "llvm/Transforms/Utils.h"
45#include "llvm/Transforms/Utils/BasicBlockUtils.h"
46#include "llvm/Transforms/Utils/Local.h"
47#include "llvm/Transforms/Utils/SSAUpdater.h"
48#include <algorithm>
49#include <cassert>
50#include <utility>
51
52using namespace llvm;
53using namespace llvm::PatternMatch;
54
55#define DEBUG_TYPE "structurizecfg"
56
57// The name for newly created blocks.
58const char FlowBlockName[] = "Flow";
59
60namespace {
61
62static cl::opt<bool> ForceSkipUniformRegions(
63  "structurizecfg-skip-uniform-regions",
64  cl::Hidden,
65  cl::desc("Force whether the StructurizeCFG pass skips uniform regions"),
66  cl::init(false));
67
68static cl::opt<bool>
69    RelaxedUniformRegions("structurizecfg-relaxed-uniform-regions", cl::Hidden,
70                          cl::desc("Allow relaxed uniform region checks"),
71                          cl::init(true));
72
73// Definition of the complex types used in this pass.
74
75using BBValuePair = std::pair<BasicBlock *, Value *>;
76
77using RNVector = SmallVector<RegionNode *, 8>;
78using BBVector = SmallVector<BasicBlock *, 8>;
79using BranchVector = SmallVector<BranchInst *, 8>;
80using BBValueVector = SmallVector<BBValuePair, 2>;
81
82using BBSet = SmallPtrSet<BasicBlock *, 8>;
83
84using PhiMap = MapVector<PHINode *, BBValueVector>;
85using BB2BBVecMap = MapVector<BasicBlock *, BBVector>;
86
87using BBPhiMap = DenseMap<BasicBlock *, PhiMap>;
88using BBPredicates = DenseMap<BasicBlock *, Value *>;
89using PredMap = DenseMap<BasicBlock *, BBPredicates>;
90using BB2BBMap = DenseMap<BasicBlock *, BasicBlock *>;
91
92using BranchDebugLocMap = DenseMap<BasicBlock *, DebugLoc>;
93
94// A traits type that is intended to be used in graph algorithms. The graph
95// traits starts at an entry node, and traverses the RegionNodes that are in
96// the Nodes set.
97struct SubGraphTraits {
98  using NodeRef = std::pair<RegionNode *, SmallDenseSet<RegionNode *> *>;
99  using BaseSuccIterator = GraphTraits<RegionNode *>::ChildIteratorType;
100
101  // This wraps a set of Nodes into the iterator, so we know which edges to
102  // filter out.
103  class WrappedSuccIterator
104      : public iterator_adaptor_base<
105            WrappedSuccIterator, BaseSuccIterator,
106            typename std::iterator_traits<BaseSuccIterator>::iterator_category,
107            NodeRef, std::ptrdiff_t, NodeRef *, NodeRef> {
108    SmallDenseSet<RegionNode *> *Nodes;
109
110  public:
111    WrappedSuccIterator(BaseSuccIterator It, SmallDenseSet<RegionNode *> *Nodes)
112        : iterator_adaptor_base(It), Nodes(Nodes) {}
113
114    NodeRef operator*() const { return {*I, Nodes}; }
115  };
116
117  static bool filterAll(const NodeRef &N) { return true; }
118  static bool filterSet(const NodeRef &N) { return N.second->count(N.first); }
119
120  using ChildIteratorType =
121      filter_iterator<WrappedSuccIterator, bool (*)(const NodeRef &)>;
122
123  static NodeRef getEntryNode(Region *R) {
124    return {GraphTraits<Region *>::getEntryNode(R), nullptr};
125  }
126
127  static NodeRef getEntryNode(NodeRef N) { return N; }
128
129  static iterator_range<ChildIteratorType> children(const NodeRef &N) {
130    auto *filter = N.second ? &filterSet : &filterAll;
131    return make_filter_range(
132        make_range<WrappedSuccIterator>(
133            {GraphTraits<RegionNode *>::child_begin(N.first), N.second},
134            {GraphTraits<RegionNode *>::child_end(N.first), N.second}),
135        filter);
136  }
137
138  static ChildIteratorType child_begin(const NodeRef &N) {
139    return children(N).begin();
140  }
141
142  static ChildIteratorType child_end(const NodeRef &N) {
143    return children(N).end();
144  }
145};
146
147/// Finds the nearest common dominator of a set of BasicBlocks.
148///
149/// For every BB you add to the set, you can specify whether we "remember" the
150/// block.  When you get the common dominator, you can also ask whether it's one
151/// of the blocks we remembered.
152class NearestCommonDominator {
153  DominatorTree *DT;
154  BasicBlock *Result = nullptr;
155  bool ResultIsRemembered = false;
156
157  /// Add BB to the resulting dominator.
158  void addBlock(BasicBlock *BB, bool Remember) {
159    if (!Result) {
160      Result = BB;
161      ResultIsRemembered = Remember;
162      return;
163    }
164
165    BasicBlock *NewResult = DT->findNearestCommonDominator(Result, BB);
166    if (NewResult != Result)
167      ResultIsRemembered = false;
168    if (NewResult == BB)
169      ResultIsRemembered |= Remember;
170    Result = NewResult;
171  }
172
173public:
174  explicit NearestCommonDominator(DominatorTree *DomTree) : DT(DomTree) {}
175
176  void addBlock(BasicBlock *BB) {
177    addBlock(BB, /* Remember = */ false);
178  }
179
180  void addAndRememberBlock(BasicBlock *BB) {
181    addBlock(BB, /* Remember = */ true);
182  }
183
184  /// Get the nearest common dominator of all the BBs added via addBlock() and
185  /// addAndRememberBlock().
186  BasicBlock *result() { return Result; }
187
188  /// Is the BB returned by getResult() one of the blocks we added to the set
189  /// with addAndRememberBlock()?
190  bool resultIsRememberedBlock() { return ResultIsRemembered; }
191};
192
193/// Transforms the control flow graph on one single entry/exit region
194/// at a time.
195///
196/// After the transform all "If"/"Then"/"Else" style control flow looks like
197/// this:
198///
199/// \verbatim
200/// 1
201/// ||
202/// | |
203/// 2 |
204/// | /
205/// |/
206/// 3
207/// ||   Where:
208/// | |  1 = "If" block, calculates the condition
209/// 4 |  2 = "Then" subregion, runs if the condition is true
210/// | /  3 = "Flow" blocks, newly inserted flow blocks, rejoins the flow
211/// |/   4 = "Else" optional subregion, runs if the condition is false
212/// 5    5 = "End" block, also rejoins the control flow
213/// \endverbatim
214///
215/// Control flow is expressed as a branch where the true exit goes into the
216/// "Then"/"Else" region, while the false exit skips the region
217/// The condition for the optional "Else" region is expressed as a PHI node.
218/// The incoming values of the PHI node are true for the "If" edge and false
219/// for the "Then" edge.
220///
221/// Additionally to that even complicated loops look like this:
222///
223/// \verbatim
224/// 1
225/// ||
226/// | |
227/// 2 ^  Where:
228/// | /  1 = "Entry" block
229/// |/   2 = "Loop" optional subregion, with all exits at "Flow" block
230/// 3    3 = "Flow" block, with back edge to entry block
231/// |
232/// \endverbatim
233///
234/// The back edge of the "Flow" block is always on the false side of the branch
235/// while the true side continues the general flow. So the loop condition
236/// consist of a network of PHI nodes where the true incoming values expresses
237/// breaks and the false values expresses continue states.
238
239class StructurizeCFG {
240  Type *Boolean;
241  ConstantInt *BoolTrue;
242  ConstantInt *BoolFalse;
243  Value *BoolPoison;
244
245  Function *Func;
246  Region *ParentRegion;
247
248  UniformityInfo *UA = nullptr;
249  DominatorTree *DT;
250
251  SmallVector<RegionNode *, 8> Order;
252  BBSet Visited;
253  BBSet FlowSet;
254
255  SmallVector<WeakVH, 8> AffectedPhis;
256  BBPhiMap DeletedPhis;
257  BB2BBVecMap AddedPhis;
258
259  PredMap Predicates;
260  BranchVector Conditions;
261
262  BB2BBMap Loops;
263  PredMap LoopPreds;
264  BranchVector LoopConds;
265
266  BranchDebugLocMap TermDL;
267
268  RegionNode *PrevNode;
269
270  void orderNodes();
271
272  void analyzeLoops(RegionNode *N);
273
274  Value *buildCondition(BranchInst *Term, unsigned Idx, bool Invert);
275
276  void gatherPredicates(RegionNode *N);
277
278  void collectInfos();
279
280  void insertConditions(bool Loops);
281
282  void simplifyConditions();
283
284  void delPhiValues(BasicBlock *From, BasicBlock *To);
285
286  void addPhiValues(BasicBlock *From, BasicBlock *To);
287
288  void findUndefBlocks(BasicBlock *PHIBlock,
289                       const SmallSet<BasicBlock *, 8> &Incomings,
290                       SmallVector<BasicBlock *> &UndefBlks) const;
291  void setPhiValues();
292
293  void simplifyAffectedPhis();
294
295  void killTerminator(BasicBlock *BB);
296
297  void changeExit(RegionNode *Node, BasicBlock *NewExit,
298                  bool IncludeDominator);
299
300  BasicBlock *getNextFlow(BasicBlock *Dominator);
301
302  BasicBlock *needPrefix(bool NeedEmpty);
303
304  BasicBlock *needPostfix(BasicBlock *Flow, bool ExitUseAllowed);
305
306  void setPrevNode(BasicBlock *BB);
307
308  bool dominatesPredicates(BasicBlock *BB, RegionNode *Node);
309
310  bool isPredictableTrue(RegionNode *Node);
311
312  void wireFlow(bool ExitUseAllowed, BasicBlock *LoopEnd);
313
314  void handleLoops(bool ExitUseAllowed, BasicBlock *LoopEnd);
315
316  void createFlow();
317
318  void rebuildSSA();
319
320public:
321  void init(Region *R);
322  bool run(Region *R, DominatorTree *DT);
323  bool makeUniformRegion(Region *R, UniformityInfo &UA);
324};
325
326class StructurizeCFGLegacyPass : public RegionPass {
327  bool SkipUniformRegions;
328
329public:
330  static char ID;
331
332  explicit StructurizeCFGLegacyPass(bool SkipUniformRegions_ = false)
333      : RegionPass(ID), SkipUniformRegions(SkipUniformRegions_) {
334    if (ForceSkipUniformRegions.getNumOccurrences())
335      SkipUniformRegions = ForceSkipUniformRegions.getValue();
336    initializeStructurizeCFGLegacyPassPass(*PassRegistry::getPassRegistry());
337  }
338
339  bool runOnRegion(Region *R, RGPassManager &RGM) override {
340    StructurizeCFG SCFG;
341    SCFG.init(R);
342    if (SkipUniformRegions) {
343      UniformityInfo &UA =
344          getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo();
345      if (SCFG.makeUniformRegion(R, UA))
346        return false;
347    }
348    DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
349    return SCFG.run(R, DT);
350  }
351
352  StringRef getPassName() const override { return "Structurize control flow"; }
353
354  void getAnalysisUsage(AnalysisUsage &AU) const override {
355    if (SkipUniformRegions)
356      AU.addRequired<UniformityInfoWrapperPass>();
357    AU.addRequired<DominatorTreeWrapperPass>();
358
359    AU.addPreserved<DominatorTreeWrapperPass>();
360    RegionPass::getAnalysisUsage(AU);
361  }
362};
363
364} // end anonymous namespace
365
366char StructurizeCFGLegacyPass::ID = 0;
367
368INITIALIZE_PASS_BEGIN(StructurizeCFGLegacyPass, "structurizecfg",
369                      "Structurize the CFG", false, false)
370INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass)
371INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
372INITIALIZE_PASS_DEPENDENCY(RegionInfoPass)
373INITIALIZE_PASS_END(StructurizeCFGLegacyPass, "structurizecfg",
374                    "Structurize the CFG", false, false)
375
376/// Build up the general order of nodes, by performing a topological sort of the
377/// parent region's nodes, while ensuring that there is no outer cycle node
378/// between any two inner cycle nodes.
379void StructurizeCFG::orderNodes() {
380  Order.resize(std::distance(GraphTraits<Region *>::nodes_begin(ParentRegion),
381                             GraphTraits<Region *>::nodes_end(ParentRegion)));
382  if (Order.empty())
383    return;
384
385  SmallDenseSet<RegionNode *> Nodes;
386  auto EntryNode = SubGraphTraits::getEntryNode(ParentRegion);
387
388  // A list of range indices of SCCs in Order, to be processed.
389  SmallVector<std::pair<unsigned, unsigned>, 8> WorkList;
390  unsigned I = 0, E = Order.size();
391  while (true) {
392    // Run through all the SCCs in the subgraph starting with Entry.
393    for (auto SCCI =
394             scc_iterator<SubGraphTraits::NodeRef, SubGraphTraits>::begin(
395                 EntryNode);
396         !SCCI.isAtEnd(); ++SCCI) {
397      auto &SCC = *SCCI;
398
399      // An SCC up to the size of 2, can be reduced to an entry (the last node),
400      // and a possible additional node. Therefore, it is already in order, and
401      // there is no need to add it to the work-list.
402      unsigned Size = SCC.size();
403      if (Size > 2)
404        WorkList.emplace_back(I, I + Size);
405
406      // Add the SCC nodes to the Order array.
407      for (const auto &N : SCC) {
408        assert(I < E && "SCC size mismatch!");
409        Order[I++] = N.first;
410      }
411    }
412    assert(I == E && "SCC size mismatch!");
413
414    // If there are no more SCCs to order, then we are done.
415    if (WorkList.empty())
416      break;
417
418    std::tie(I, E) = WorkList.pop_back_val();
419
420    // Collect the set of nodes in the SCC's subgraph. These are only the
421    // possible child nodes; we do not add the entry (last node) otherwise we
422    // will have the same exact SCC all over again.
423    Nodes.clear();
424    Nodes.insert(Order.begin() + I, Order.begin() + E - 1);
425
426    // Update the entry node.
427    EntryNode.first = Order[E - 1];
428    EntryNode.second = &Nodes;
429  }
430}
431
432/// Determine the end of the loops
433void StructurizeCFG::analyzeLoops(RegionNode *N) {
434  if (N->isSubRegion()) {
435    // Test for exit as back edge
436    BasicBlock *Exit = N->getNodeAs<Region>()->getExit();
437    if (Visited.count(Exit))
438      Loops[Exit] = N->getEntry();
439
440  } else {
441    // Test for successors as back edge
442    BasicBlock *BB = N->getNodeAs<BasicBlock>();
443    BranchInst *Term = cast<BranchInst>(BB->getTerminator());
444
445    for (BasicBlock *Succ : Term->successors())
446      if (Visited.count(Succ))
447        Loops[Succ] = BB;
448  }
449}
450
451/// Build the condition for one edge
452Value *StructurizeCFG::buildCondition(BranchInst *Term, unsigned Idx,
453                                      bool Invert) {
454  Value *Cond = Invert ? BoolFalse : BoolTrue;
455  if (Term->isConditional()) {
456    Cond = Term->getCondition();
457
458    if (Idx != (unsigned)Invert)
459      Cond = invertCondition(Cond);
460  }
461  return Cond;
462}
463
464/// Analyze the predecessors of each block and build up predicates
465void StructurizeCFG::gatherPredicates(RegionNode *N) {
466  RegionInfo *RI = ParentRegion->getRegionInfo();
467  BasicBlock *BB = N->getEntry();
468  BBPredicates &Pred = Predicates[BB];
469  BBPredicates &LPred = LoopPreds[BB];
470
471  for (BasicBlock *P : predecessors(BB)) {
472    // Ignore it if it's a branch from outside into our region entry
473    if (!ParentRegion->contains(P))
474      continue;
475
476    Region *R = RI->getRegionFor(P);
477    if (R == ParentRegion) {
478      // It's a top level block in our region
479      BranchInst *Term = cast<BranchInst>(P->getTerminator());
480      for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
481        BasicBlock *Succ = Term->getSuccessor(i);
482        if (Succ != BB)
483          continue;
484
485        if (Visited.count(P)) {
486          // Normal forward edge
487          if (Term->isConditional()) {
488            // Try to treat it like an ELSE block
489            BasicBlock *Other = Term->getSuccessor(!i);
490            if (Visited.count(Other) && !Loops.count(Other) &&
491                !Pred.count(Other) && !Pred.count(P)) {
492
493              Pred[Other] = BoolFalse;
494              Pred[P] = BoolTrue;
495              continue;
496            }
497          }
498          Pred[P] = buildCondition(Term, i, false);
499        } else {
500          // Back edge
501          LPred[P] = buildCondition(Term, i, true);
502        }
503      }
504    } else {
505      // It's an exit from a sub region
506      while (R->getParent() != ParentRegion)
507        R = R->getParent();
508
509      // Edge from inside a subregion to its entry, ignore it
510      if (*R == *N)
511        continue;
512
513      BasicBlock *Entry = R->getEntry();
514      if (Visited.count(Entry))
515        Pred[Entry] = BoolTrue;
516      else
517        LPred[Entry] = BoolFalse;
518    }
519  }
520}
521
522/// Collect various loop and predicate infos
523void StructurizeCFG::collectInfos() {
524  // Reset predicate
525  Predicates.clear();
526
527  // and loop infos
528  Loops.clear();
529  LoopPreds.clear();
530
531  // Reset the visited nodes
532  Visited.clear();
533
534  for (RegionNode *RN : reverse(Order)) {
535    LLVM_DEBUG(dbgs() << "Visiting: "
536                      << (RN->isSubRegion() ? "SubRegion with entry: " : "")
537                      << RN->getEntry()->getName() << "\n");
538
539    // Analyze all the conditions leading to a node
540    gatherPredicates(RN);
541
542    // Remember that we've seen this node
543    Visited.insert(RN->getEntry());
544
545    // Find the last back edges
546    analyzeLoops(RN);
547  }
548
549  // Reset the collected term debug locations
550  TermDL.clear();
551
552  for (BasicBlock &BB : *Func) {
553    if (const DebugLoc &DL = BB.getTerminator()->getDebugLoc())
554      TermDL[&BB] = DL;
555  }
556}
557
558/// Insert the missing branch conditions
559void StructurizeCFG::insertConditions(bool Loops) {
560  BranchVector &Conds = Loops ? LoopConds : Conditions;
561  Value *Default = Loops ? BoolTrue : BoolFalse;
562  SSAUpdater PhiInserter;
563
564  for (BranchInst *Term : Conds) {
565    assert(Term->isConditional());
566
567    BasicBlock *Parent = Term->getParent();
568    BasicBlock *SuccTrue = Term->getSuccessor(0);
569    BasicBlock *SuccFalse = Term->getSuccessor(1);
570
571    PhiInserter.Initialize(Boolean, "");
572    PhiInserter.AddAvailableValue(&Func->getEntryBlock(), Default);
573    PhiInserter.AddAvailableValue(Loops ? SuccFalse : Parent, Default);
574
575    BBPredicates &Preds = Loops ? LoopPreds[SuccFalse] : Predicates[SuccTrue];
576
577    NearestCommonDominator Dominator(DT);
578    Dominator.addBlock(Parent);
579
580    Value *ParentValue = nullptr;
581    for (std::pair<BasicBlock *, Value *> BBAndPred : Preds) {
582      BasicBlock *BB = BBAndPred.first;
583      Value *Pred = BBAndPred.second;
584
585      if (BB == Parent) {
586        ParentValue = Pred;
587        break;
588      }
589      PhiInserter.AddAvailableValue(BB, Pred);
590      Dominator.addAndRememberBlock(BB);
591    }
592
593    if (ParentValue) {
594      Term->setCondition(ParentValue);
595    } else {
596      if (!Dominator.resultIsRememberedBlock())
597        PhiInserter.AddAvailableValue(Dominator.result(), Default);
598
599      Term->setCondition(PhiInserter.GetValueInMiddleOfBlock(Parent));
600    }
601  }
602}
603
604/// Simplify any inverted conditions that were built by buildConditions.
605void StructurizeCFG::simplifyConditions() {
606  SmallVector<Instruction *> InstToErase;
607  for (auto &I : concat<PredMap::value_type>(Predicates, LoopPreds)) {
608    auto &Preds = I.second;
609    for (auto &J : Preds) {
610      auto &Cond = J.second;
611      Instruction *Inverted;
612      if (match(Cond, m_Not(m_OneUse(m_Instruction(Inverted)))) &&
613          !Cond->use_empty()) {
614        if (auto *InvertedCmp = dyn_cast<CmpInst>(Inverted)) {
615          InvertedCmp->setPredicate(InvertedCmp->getInversePredicate());
616          Cond->replaceAllUsesWith(InvertedCmp);
617          InstToErase.push_back(cast<Instruction>(Cond));
618        }
619      }
620    }
621  }
622  for (auto *I : InstToErase)
623    I->eraseFromParent();
624}
625
626/// Remove all PHI values coming from "From" into "To" and remember
627/// them in DeletedPhis
628void StructurizeCFG::delPhiValues(BasicBlock *From, BasicBlock *To) {
629  PhiMap &Map = DeletedPhis[To];
630  for (PHINode &Phi : To->phis()) {
631    bool Recorded = false;
632    while (Phi.getBasicBlockIndex(From) != -1) {
633      Value *Deleted = Phi.removeIncomingValue(From, false);
634      Map[&Phi].push_back(std::make_pair(From, Deleted));
635      if (!Recorded) {
636        AffectedPhis.push_back(&Phi);
637        Recorded = true;
638      }
639    }
640  }
641}
642
643/// Add a dummy PHI value as soon as we knew the new predecessor
644void StructurizeCFG::addPhiValues(BasicBlock *From, BasicBlock *To) {
645  for (PHINode &Phi : To->phis()) {
646    Value *Undef = UndefValue::get(Phi.getType());
647    Phi.addIncoming(Undef, From);
648  }
649  AddedPhis[To].push_back(From);
650}
651
652/// When we are reconstructing a PHI inside \p PHIBlock with incoming values
653/// from predecessors \p Incomings, we have a chance to mark the available value
654/// from some blocks as undefined. The function will find out all such blocks
655/// and return in \p UndefBlks.
656void StructurizeCFG::findUndefBlocks(
657    BasicBlock *PHIBlock, const SmallSet<BasicBlock *, 8> &Incomings,
658    SmallVector<BasicBlock *> &UndefBlks) const {
659  //  We may get a post-structured CFG like below:
660  //
661  //  | P1
662  //  |/
663  //  F1
664  //  |\
665  //  | N
666  //  |/
667  //  F2
668  //  |\
669  //  | P2
670  //  |/
671  //  F3
672  //  |\
673  //  B
674  //
675  // B is the block that has a PHI being reconstructed. P1/P2 are predecessors
676  // of B before structurization. F1/F2/F3 are flow blocks inserted during
677  // structurization process. Block N is not a predecessor of B before
678  // structurization, but are placed between the predecessors(P1/P2) of B after
679  // structurization. This usually means that threads went to N never take the
680  // path N->F2->F3->B. For example, the threads take the branch F1->N may
681  // always take the branch F2->P2. So, when we are reconstructing a PHI
682  // originally in B, we can safely say the incoming value from N is undefined.
683  SmallSet<BasicBlock *, 8> VisitedBlock;
684  SmallVector<BasicBlock *, 8> Stack;
685  if (PHIBlock == ParentRegion->getExit()) {
686    for (auto P : predecessors(PHIBlock)) {
687      if (ParentRegion->contains(P))
688        Stack.push_back(P);
689    }
690  } else {
691    append_range(Stack, predecessors(PHIBlock));
692  }
693
694  // Do a backward traversal over the CFG, and stop further searching if
695  // the block is not a Flow. If a block is neither flow block nor the
696  // incoming predecessor, then the incoming value from the block is
697  // undefined value for the PHI being reconstructed.
698  while (!Stack.empty()) {
699    BasicBlock *Current = Stack.pop_back_val();
700    if (VisitedBlock.contains(Current))
701      continue;
702
703    VisitedBlock.insert(Current);
704    if (FlowSet.contains(Current)) {
705      for (auto P : predecessors(Current))
706        Stack.push_back(P);
707    } else if (!Incomings.contains(Current)) {
708      UndefBlks.push_back(Current);
709    }
710  }
711}
712
713/// Add the real PHI value as soon as everything is set up
714void StructurizeCFG::setPhiValues() {
715  SmallVector<PHINode *, 8> InsertedPhis;
716  SSAUpdater Updater(&InsertedPhis);
717  for (const auto &AddedPhi : AddedPhis) {
718    BasicBlock *To = AddedPhi.first;
719    const BBVector &From = AddedPhi.second;
720
721    if (!DeletedPhis.count(To))
722      continue;
723
724    SmallVector<BasicBlock *> UndefBlks;
725    bool CachedUndefs = false;
726    PhiMap &Map = DeletedPhis[To];
727    for (const auto &PI : Map) {
728      PHINode *Phi = PI.first;
729      Value *Undef = UndefValue::get(Phi->getType());
730      Updater.Initialize(Phi->getType(), "");
731      Updater.AddAvailableValue(&Func->getEntryBlock(), Undef);
732      Updater.AddAvailableValue(To, Undef);
733
734      SmallSet<BasicBlock *, 8> Incomings;
735      SmallVector<BasicBlock *> ConstantPreds;
736      for (const auto &VI : PI.second) {
737        Incomings.insert(VI.first);
738        Updater.AddAvailableValue(VI.first, VI.second);
739        if (isa<Constant>(VI.second))
740          ConstantPreds.push_back(VI.first);
741      }
742
743      if (!CachedUndefs) {
744        findUndefBlocks(To, Incomings, UndefBlks);
745        CachedUndefs = true;
746      }
747
748      for (auto UB : UndefBlks) {
749        // If this undef block is dominated by any predecessor(before
750        // structurization) of reconstructed PHI with constant incoming value,
751        // don't mark the available value as undefined. Setting undef to such
752        // block will stop us from getting optimal phi insertion.
753        if (any_of(ConstantPreds,
754                   [&](BasicBlock *CP) { return DT->dominates(CP, UB); }))
755          continue;
756        Updater.AddAvailableValue(UB, Undef);
757      }
758
759      for (BasicBlock *FI : From)
760        Phi->setIncomingValueForBlock(FI, Updater.GetValueAtEndOfBlock(FI));
761      AffectedPhis.push_back(Phi);
762    }
763
764    DeletedPhis.erase(To);
765  }
766  assert(DeletedPhis.empty());
767
768  AffectedPhis.append(InsertedPhis.begin(), InsertedPhis.end());
769}
770
771void StructurizeCFG::simplifyAffectedPhis() {
772  bool Changed;
773  do {
774    Changed = false;
775    SimplifyQuery Q(Func->getParent()->getDataLayout());
776    Q.DT = DT;
777    // Setting CanUseUndef to true might extend value liveness, set it to false
778    // to achieve better register pressure.
779    Q.CanUseUndef = false;
780    for (WeakVH VH : AffectedPhis) {
781      if (auto Phi = dyn_cast_or_null<PHINode>(VH)) {
782        if (auto NewValue = simplifyInstruction(Phi, Q)) {
783          Phi->replaceAllUsesWith(NewValue);
784          Phi->eraseFromParent();
785          Changed = true;
786        }
787      }
788    }
789  } while (Changed);
790}
791
792/// Remove phi values from all successors and then remove the terminator.
793void StructurizeCFG::killTerminator(BasicBlock *BB) {
794  Instruction *Term = BB->getTerminator();
795  if (!Term)
796    return;
797
798  for (BasicBlock *Succ : successors(BB))
799    delPhiValues(BB, Succ);
800
801  Term->eraseFromParent();
802}
803
804/// Let node exit(s) point to NewExit
805void StructurizeCFG::changeExit(RegionNode *Node, BasicBlock *NewExit,
806                                bool IncludeDominator) {
807  if (Node->isSubRegion()) {
808    Region *SubRegion = Node->getNodeAs<Region>();
809    BasicBlock *OldExit = SubRegion->getExit();
810    BasicBlock *Dominator = nullptr;
811
812    // Find all the edges from the sub region to the exit.
813    // We use make_early_inc_range here because we modify BB's terminator.
814    for (BasicBlock *BB : llvm::make_early_inc_range(predecessors(OldExit))) {
815      if (!SubRegion->contains(BB))
816        continue;
817
818      // Modify the edges to point to the new exit
819      delPhiValues(BB, OldExit);
820      BB->getTerminator()->replaceUsesOfWith(OldExit, NewExit);
821      addPhiValues(BB, NewExit);
822
823      // Find the new dominator (if requested)
824      if (IncludeDominator) {
825        if (!Dominator)
826          Dominator = BB;
827        else
828          Dominator = DT->findNearestCommonDominator(Dominator, BB);
829      }
830    }
831
832    // Change the dominator (if requested)
833    if (Dominator)
834      DT->changeImmediateDominator(NewExit, Dominator);
835
836    // Update the region info
837    SubRegion->replaceExit(NewExit);
838  } else {
839    BasicBlock *BB = Node->getNodeAs<BasicBlock>();
840    killTerminator(BB);
841    BranchInst *Br = BranchInst::Create(NewExit, BB);
842    Br->setDebugLoc(TermDL[BB]);
843    addPhiValues(BB, NewExit);
844    if (IncludeDominator)
845      DT->changeImmediateDominator(NewExit, BB);
846  }
847}
848
849/// Create a new flow node and update dominator tree and region info
850BasicBlock *StructurizeCFG::getNextFlow(BasicBlock *Dominator) {
851  LLVMContext &Context = Func->getContext();
852  BasicBlock *Insert = Order.empty() ? ParentRegion->getExit() :
853                       Order.back()->getEntry();
854  BasicBlock *Flow = BasicBlock::Create(Context, FlowBlockName,
855                                        Func, Insert);
856  FlowSet.insert(Flow);
857
858  // use a temporary variable to avoid a use-after-free if the map's storage is
859  // reallocated
860  DebugLoc DL = TermDL[Dominator];
861  TermDL[Flow] = std::move(DL);
862
863  DT->addNewBlock(Flow, Dominator);
864  ParentRegion->getRegionInfo()->setRegionFor(Flow, ParentRegion);
865  return Flow;
866}
867
868/// Create a new or reuse the previous node as flow node
869BasicBlock *StructurizeCFG::needPrefix(bool NeedEmpty) {
870  BasicBlock *Entry = PrevNode->getEntry();
871
872  if (!PrevNode->isSubRegion()) {
873    killTerminator(Entry);
874    if (!NeedEmpty || Entry->getFirstInsertionPt() == Entry->end())
875      return Entry;
876  }
877
878  // create a new flow node
879  BasicBlock *Flow = getNextFlow(Entry);
880
881  // and wire it up
882  changeExit(PrevNode, Flow, true);
883  PrevNode = ParentRegion->getBBNode(Flow);
884  return Flow;
885}
886
887/// Returns the region exit if possible, otherwise just a new flow node
888BasicBlock *StructurizeCFG::needPostfix(BasicBlock *Flow,
889                                        bool ExitUseAllowed) {
890  if (!Order.empty() || !ExitUseAllowed)
891    return getNextFlow(Flow);
892
893  BasicBlock *Exit = ParentRegion->getExit();
894  DT->changeImmediateDominator(Exit, Flow);
895  addPhiValues(Flow, Exit);
896  return Exit;
897}
898
899/// Set the previous node
900void StructurizeCFG::setPrevNode(BasicBlock *BB) {
901  PrevNode = ParentRegion->contains(BB) ? ParentRegion->getBBNode(BB)
902                                        : nullptr;
903}
904
905/// Does BB dominate all the predicates of Node?
906bool StructurizeCFG::dominatesPredicates(BasicBlock *BB, RegionNode *Node) {
907  BBPredicates &Preds = Predicates[Node->getEntry()];
908  return llvm::all_of(Preds, [&](std::pair<BasicBlock *, Value *> Pred) {
909    return DT->dominates(BB, Pred.first);
910  });
911}
912
913/// Can we predict that this node will always be called?
914bool StructurizeCFG::isPredictableTrue(RegionNode *Node) {
915  BBPredicates &Preds = Predicates[Node->getEntry()];
916  bool Dominated = false;
917
918  // Regionentry is always true
919  if (!PrevNode)
920    return true;
921
922  for (std::pair<BasicBlock*, Value*> Pred : Preds) {
923    BasicBlock *BB = Pred.first;
924    Value *V = Pred.second;
925
926    if (V != BoolTrue)
927      return false;
928
929    if (!Dominated && DT->dominates(BB, PrevNode->getEntry()))
930      Dominated = true;
931  }
932
933  // TODO: The dominator check is too strict
934  return Dominated;
935}
936
937/// Take one node from the order vector and wire it up
938void StructurizeCFG::wireFlow(bool ExitUseAllowed,
939                              BasicBlock *LoopEnd) {
940  RegionNode *Node = Order.pop_back_val();
941  Visited.insert(Node->getEntry());
942
943  if (isPredictableTrue(Node)) {
944    // Just a linear flow
945    if (PrevNode) {
946      changeExit(PrevNode, Node->getEntry(), true);
947    }
948    PrevNode = Node;
949  } else {
950    // Insert extra prefix node (or reuse last one)
951    BasicBlock *Flow = needPrefix(false);
952
953    // Insert extra postfix node (or use exit instead)
954    BasicBlock *Entry = Node->getEntry();
955    BasicBlock *Next = needPostfix(Flow, ExitUseAllowed);
956
957    // let it point to entry and next block
958    BranchInst *Br = BranchInst::Create(Entry, Next, BoolPoison, Flow);
959    Br->setDebugLoc(TermDL[Flow]);
960    Conditions.push_back(Br);
961    addPhiValues(Flow, Entry);
962    DT->changeImmediateDominator(Entry, Flow);
963
964    PrevNode = Node;
965    while (!Order.empty() && !Visited.count(LoopEnd) &&
966           dominatesPredicates(Entry, Order.back())) {
967      handleLoops(false, LoopEnd);
968    }
969
970    changeExit(PrevNode, Next, false);
971    setPrevNode(Next);
972  }
973}
974
975void StructurizeCFG::handleLoops(bool ExitUseAllowed,
976                                 BasicBlock *LoopEnd) {
977  RegionNode *Node = Order.back();
978  BasicBlock *LoopStart = Node->getEntry();
979
980  if (!Loops.count(LoopStart)) {
981    wireFlow(ExitUseAllowed, LoopEnd);
982    return;
983  }
984
985  if (!isPredictableTrue(Node))
986    LoopStart = needPrefix(true);
987
988  LoopEnd = Loops[Node->getEntry()];
989  wireFlow(false, LoopEnd);
990  while (!Visited.count(LoopEnd)) {
991    handleLoops(false, LoopEnd);
992  }
993
994  assert(LoopStart != &LoopStart->getParent()->getEntryBlock());
995
996  // Create an extra loop end node
997  LoopEnd = needPrefix(false);
998  BasicBlock *Next = needPostfix(LoopEnd, ExitUseAllowed);
999  BranchInst *Br = BranchInst::Create(Next, LoopStart, BoolPoison, LoopEnd);
1000  Br->setDebugLoc(TermDL[LoopEnd]);
1001  LoopConds.push_back(Br);
1002  addPhiValues(LoopEnd, LoopStart);
1003  setPrevNode(Next);
1004}
1005
1006/// After this function control flow looks like it should be, but
1007/// branches and PHI nodes only have undefined conditions.
1008void StructurizeCFG::createFlow() {
1009  BasicBlock *Exit = ParentRegion->getExit();
1010  bool EntryDominatesExit = DT->dominates(ParentRegion->getEntry(), Exit);
1011
1012  AffectedPhis.clear();
1013  DeletedPhis.clear();
1014  AddedPhis.clear();
1015  Conditions.clear();
1016  LoopConds.clear();
1017
1018  PrevNode = nullptr;
1019  Visited.clear();
1020
1021  while (!Order.empty()) {
1022    handleLoops(EntryDominatesExit, nullptr);
1023  }
1024
1025  if (PrevNode)
1026    changeExit(PrevNode, Exit, EntryDominatesExit);
1027  else
1028    assert(EntryDominatesExit);
1029}
1030
1031/// Handle a rare case where the disintegrated nodes instructions
1032/// no longer dominate all their uses. Not sure if this is really necessary
1033void StructurizeCFG::rebuildSSA() {
1034  SSAUpdater Updater;
1035  for (BasicBlock *BB : ParentRegion->blocks())
1036    for (Instruction &I : *BB) {
1037      bool Initialized = false;
1038      // We may modify the use list as we iterate over it, so we use
1039      // make_early_inc_range.
1040      for (Use &U : llvm::make_early_inc_range(I.uses())) {
1041        Instruction *User = cast<Instruction>(U.getUser());
1042        if (User->getParent() == BB) {
1043          continue;
1044        } else if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
1045          if (UserPN->getIncomingBlock(U) == BB)
1046            continue;
1047        }
1048
1049        if (DT->dominates(&I, User))
1050          continue;
1051
1052        if (!Initialized) {
1053          Value *Undef = UndefValue::get(I.getType());
1054          Updater.Initialize(I.getType(), "");
1055          Updater.AddAvailableValue(&Func->getEntryBlock(), Undef);
1056          Updater.AddAvailableValue(BB, &I);
1057          Initialized = true;
1058        }
1059        Updater.RewriteUseAfterInsertions(U);
1060      }
1061    }
1062}
1063
1064static bool hasOnlyUniformBranches(Region *R, unsigned UniformMDKindID,
1065                                   const UniformityInfo &UA) {
1066  // Bool for if all sub-regions are uniform.
1067  bool SubRegionsAreUniform = true;
1068  // Count of how many direct children are conditional.
1069  unsigned ConditionalDirectChildren = 0;
1070
1071  for (auto *E : R->elements()) {
1072    if (!E->isSubRegion()) {
1073      auto Br = dyn_cast<BranchInst>(E->getEntry()->getTerminator());
1074      if (!Br || !Br->isConditional())
1075        continue;
1076
1077      if (!UA.isUniform(Br))
1078        return false;
1079
1080      // One of our direct children is conditional.
1081      ConditionalDirectChildren++;
1082
1083      LLVM_DEBUG(dbgs() << "BB: " << Br->getParent()->getName()
1084                        << " has uniform terminator\n");
1085    } else {
1086      // Explicitly refuse to treat regions as uniform if they have non-uniform
1087      // subregions. We cannot rely on UniformityAnalysis for branches in
1088      // subregions because those branches may have been removed and re-created,
1089      // so we look for our metadata instead.
1090      //
1091      // Warning: It would be nice to treat regions as uniform based only on
1092      // their direct child basic blocks' terminators, regardless of whether
1093      // subregions are uniform or not. However, this requires a very careful
1094      // look at SIAnnotateControlFlow to make sure nothing breaks there.
1095      for (auto *BB : E->getNodeAs<Region>()->blocks()) {
1096        auto Br = dyn_cast<BranchInst>(BB->getTerminator());
1097        if (!Br || !Br->isConditional())
1098          continue;
1099
1100        if (!Br->getMetadata(UniformMDKindID)) {
1101          // Early exit if we cannot have relaxed uniform regions.
1102          if (!RelaxedUniformRegions)
1103            return false;
1104
1105          SubRegionsAreUniform = false;
1106          break;
1107        }
1108      }
1109    }
1110  }
1111
1112  // Our region is uniform if:
1113  // 1. All conditional branches that are direct children are uniform (checked
1114  // above).
1115  // 2. And either:
1116  //   a. All sub-regions are uniform.
1117  //   b. There is one or less conditional branches among the direct children.
1118  return SubRegionsAreUniform || (ConditionalDirectChildren <= 1);
1119}
1120
1121void StructurizeCFG::init(Region *R) {
1122  LLVMContext &Context = R->getEntry()->getContext();
1123
1124  Boolean = Type::getInt1Ty(Context);
1125  BoolTrue = ConstantInt::getTrue(Context);
1126  BoolFalse = ConstantInt::getFalse(Context);
1127  BoolPoison = PoisonValue::get(Boolean);
1128
1129  this->UA = nullptr;
1130}
1131
1132bool StructurizeCFG::makeUniformRegion(Region *R, UniformityInfo &UA) {
1133  if (R->isTopLevelRegion())
1134    return false;
1135
1136  this->UA = &UA;
1137
1138  // TODO: We could probably be smarter here with how we handle sub-regions.
1139  // We currently rely on the fact that metadata is set by earlier invocations
1140  // of the pass on sub-regions, and that this metadata doesn't get lost --
1141  // but we shouldn't rely on metadata for correctness!
1142  unsigned UniformMDKindID =
1143      R->getEntry()->getContext().getMDKindID("structurizecfg.uniform");
1144
1145  if (hasOnlyUniformBranches(R, UniformMDKindID, UA)) {
1146    LLVM_DEBUG(dbgs() << "Skipping region with uniform control flow: " << *R
1147                      << '\n');
1148
1149    // Mark all direct child block terminators as having been treated as
1150    // uniform. To account for a possible future in which non-uniform
1151    // sub-regions are treated more cleverly, indirect children are not
1152    // marked as uniform.
1153    MDNode *MD = MDNode::get(R->getEntry()->getParent()->getContext(), {});
1154    for (RegionNode *E : R->elements()) {
1155      if (E->isSubRegion())
1156        continue;
1157
1158      if (Instruction *Term = E->getEntry()->getTerminator())
1159        Term->setMetadata(UniformMDKindID, MD);
1160    }
1161
1162    return true;
1163  }
1164  return false;
1165}
1166
1167/// Run the transformation for each region found
1168bool StructurizeCFG::run(Region *R, DominatorTree *DT) {
1169  if (R->isTopLevelRegion())
1170    return false;
1171
1172  this->DT = DT;
1173
1174  Func = R->getEntry()->getParent();
1175  assert(hasOnlySimpleTerminator(*Func) && "Unsupported block terminator.");
1176
1177  ParentRegion = R;
1178
1179  orderNodes();
1180  collectInfos();
1181  createFlow();
1182  insertConditions(false);
1183  insertConditions(true);
1184  setPhiValues();
1185  simplifyConditions();
1186  simplifyAffectedPhis();
1187  rebuildSSA();
1188
1189  // Cleanup
1190  Order.clear();
1191  Visited.clear();
1192  DeletedPhis.clear();
1193  AddedPhis.clear();
1194  Predicates.clear();
1195  Conditions.clear();
1196  Loops.clear();
1197  LoopPreds.clear();
1198  LoopConds.clear();
1199  FlowSet.clear();
1200  TermDL.clear();
1201
1202  return true;
1203}
1204
1205Pass *llvm::createStructurizeCFGPass(bool SkipUniformRegions) {
1206  return new StructurizeCFGLegacyPass(SkipUniformRegions);
1207}
1208
1209static void addRegionIntoQueue(Region &R, std::vector<Region *> &Regions) {
1210  Regions.push_back(&R);
1211  for (const auto &E : R)
1212    addRegionIntoQueue(*E, Regions);
1213}
1214
1215PreservedAnalyses StructurizeCFGPass::run(Function &F,
1216                                          FunctionAnalysisManager &AM) {
1217
1218  bool Changed = false;
1219  DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F);
1220  auto &RI = AM.getResult<RegionInfoAnalysis>(F);
1221  std::vector<Region *> Regions;
1222  addRegionIntoQueue(*RI.getTopLevelRegion(), Regions);
1223  while (!Regions.empty()) {
1224    Region *R = Regions.back();
1225    StructurizeCFG SCFG;
1226    SCFG.init(R);
1227    Changed |= SCFG.run(R, DT);
1228    Regions.pop_back();
1229  }
1230  if (!Changed)
1231    return PreservedAnalyses::all();
1232  PreservedAnalyses PA;
1233  PA.preserve<DominatorTreeAnalysis>();
1234  return PA;
1235}
1236