MustExecute.h revision 360784
1//===- MustExecute.h - Is an instruction known to execute--------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// Contains a collection of routines for determining if a given instruction is
10/// guaranteed to execute if a given point in control flow is reached. The most
11/// common example is an instruction within a loop being provably executed if we
12/// branch to the header of it's containing loop.
13///
14/// There are two interfaces available to determine if an instruction is
15/// executed once a given point in the control flow is reached:
16/// 1) A loop-centric one derived from LoopSafetyInfo.
17/// 2) A "must be executed context"-based one implemented in the
18///    MustBeExecutedContextExplorer.
19/// Please refer to the class comments for more information.
20///
21//===----------------------------------------------------------------------===//
22
23#ifndef LLVM_ANALYSIS_MUSTEXECUTE_H
24#define LLVM_ANALYSIS_MUSTEXECUTE_H
25
26#include "llvm/ADT/DenseMap.h"
27#include "llvm/Analysis/EHPersonalities.h"
28#include "llvm/Analysis/InstructionPrecedenceTracking.h"
29#include "llvm/Analysis/LoopInfo.h"
30#include "llvm/IR/BasicBlock.h"
31#include "llvm/IR/Dominators.h"
32#include "llvm/IR/Instruction.h"
33
34namespace llvm {
35
36namespace {
37template <typename T> using GetterTy = std::function<T *(const Function &F)>;
38}
39
40class Instruction;
41class DominatorTree;
42class PostDominatorTree;
43class Loop;
44
45/// Captures loop safety information.
46/// It keep information for loop blocks may throw exception or otherwise
47/// exit abnormaly on any iteration of the loop which might actually execute
48/// at runtime.  The primary way to consume this infromation is via
49/// isGuaranteedToExecute below, but some callers bailout or fallback to
50/// alternate reasoning if a loop contains any implicit control flow.
51/// NOTE: LoopSafetyInfo contains cached information regarding loops and their
52/// particular blocks. This information is only dropped on invocation of
53/// computeLoopSafetyInfo. If the loop or any of its block is deleted, or if
54/// any thrower instructions have been added or removed from them, or if the
55/// control flow has changed, or in case of other meaningful modifications, the
56/// LoopSafetyInfo needs to be recomputed. If a meaningful modifications to the
57/// loop were made and the info wasn't recomputed properly, the behavior of all
58/// methods except for computeLoopSafetyInfo is undefined.
59class LoopSafetyInfo {
60  // Used to update funclet bundle operands.
61  DenseMap<BasicBlock *, ColorVector> BlockColors;
62
63protected:
64  /// Computes block colors.
65  void computeBlockColors(const Loop *CurLoop);
66
67public:
68  /// Returns block colors map that is used to update funclet operand bundles.
69  const DenseMap<BasicBlock *, ColorVector> &getBlockColors() const;
70
71  /// Copy colors of block \p Old into the block \p New.
72  void copyColors(BasicBlock *New, BasicBlock *Old);
73
74  /// Returns true iff the block \p BB potentially may throw exception. It can
75  /// be false-positive in cases when we want to avoid complex analysis.
76  virtual bool blockMayThrow(const BasicBlock *BB) const = 0;
77
78  /// Returns true iff any block of the loop for which this info is contains an
79  /// instruction that may throw or otherwise exit abnormally.
80  virtual bool anyBlockMayThrow() const = 0;
81
82  /// Return true if we must reach the block \p BB under assumption that the
83  /// loop \p CurLoop is entered.
84  bool allLoopPathsLeadToBlock(const Loop *CurLoop, const BasicBlock *BB,
85                               const DominatorTree *DT) const;
86
87  /// Computes safety information for a loop checks loop body & header for
88  /// the possibility of may throw exception, it takes LoopSafetyInfo and loop
89  /// as argument. Updates safety information in LoopSafetyInfo argument.
90  /// Note: This is defined to clear and reinitialize an already initialized
91  /// LoopSafetyInfo.  Some callers rely on this fact.
92  virtual void computeLoopSafetyInfo(const Loop *CurLoop) = 0;
93
94  /// Returns true if the instruction in a loop is guaranteed to execute at
95  /// least once (under the assumption that the loop is entered).
96  virtual bool isGuaranteedToExecute(const Instruction &Inst,
97                                     const DominatorTree *DT,
98                                     const Loop *CurLoop) const = 0;
99
100  LoopSafetyInfo() = default;
101
102  virtual ~LoopSafetyInfo() = default;
103};
104
105
106/// Simple and conservative implementation of LoopSafetyInfo that can give
107/// false-positive answers to its queries in order to avoid complicated
108/// analysis.
109class SimpleLoopSafetyInfo: public LoopSafetyInfo {
110  bool MayThrow = false;       // The current loop contains an instruction which
111                               // may throw.
112  bool HeaderMayThrow = false; // Same as previous, but specific to loop header
113
114public:
115  virtual bool blockMayThrow(const BasicBlock *BB) const;
116
117  virtual bool anyBlockMayThrow() const;
118
119  virtual void computeLoopSafetyInfo(const Loop *CurLoop);
120
121  virtual bool isGuaranteedToExecute(const Instruction &Inst,
122                                     const DominatorTree *DT,
123                                     const Loop *CurLoop) const;
124
125  SimpleLoopSafetyInfo() : LoopSafetyInfo() {};
126
127  virtual ~SimpleLoopSafetyInfo() {};
128};
129
130/// This implementation of LoopSafetyInfo use ImplicitControlFlowTracking to
131/// give precise answers on "may throw" queries. This implementation uses cache
132/// that should be invalidated by calling the methods insertInstructionTo and
133/// removeInstruction whenever we modify a basic block's contents by adding or
134/// removing instructions.
135class ICFLoopSafetyInfo: public LoopSafetyInfo {
136  bool MayThrow = false;       // The current loop contains an instruction which
137                               // may throw.
138  // Contains information about implicit control flow in this loop's blocks.
139  mutable ImplicitControlFlowTracking ICF;
140  // Contains information about instruction that may possibly write memory.
141  mutable MemoryWriteTracking MW;
142
143public:
144  virtual bool blockMayThrow(const BasicBlock *BB) const;
145
146  virtual bool anyBlockMayThrow() const;
147
148  virtual void computeLoopSafetyInfo(const Loop *CurLoop);
149
150  virtual bool isGuaranteedToExecute(const Instruction &Inst,
151                                     const DominatorTree *DT,
152                                     const Loop *CurLoop) const;
153
154  /// Returns true if we could not execute a memory-modifying instruction before
155  /// we enter \p BB under assumption that \p CurLoop is entered.
156  bool doesNotWriteMemoryBefore(const BasicBlock *BB, const Loop *CurLoop)
157      const;
158
159  /// Returns true if we could not execute a memory-modifying instruction before
160  /// we execute \p I under assumption that \p CurLoop is entered.
161  bool doesNotWriteMemoryBefore(const Instruction &I, const Loop *CurLoop)
162      const;
163
164  /// Inform the safety info that we are planning to insert a new instruction
165  /// \p Inst into the basic block \p BB. It will make all cache updates to keep
166  /// it correct after this insertion.
167  void insertInstructionTo(const Instruction *Inst, const BasicBlock *BB);
168
169  /// Inform safety info that we are planning to remove the instruction \p Inst
170  /// from its block. It will make all cache updates to keep it correct after
171  /// this removal.
172  void removeInstruction(const Instruction *Inst);
173
174  ICFLoopSafetyInfo(DominatorTree *DT) : LoopSafetyInfo(), ICF(DT), MW(DT) {};
175
176  virtual ~ICFLoopSafetyInfo() {};
177};
178
179struct MustBeExecutedContextExplorer;
180
181/// Must be executed iterators visit stretches of instructions that are
182/// guaranteed to be executed together, potentially with other instruction
183/// executed in-between.
184///
185/// Given the following code, and assuming all statements are single
186/// instructions which transfer execution to the successor (see
187/// isGuaranteedToTransferExecutionToSuccessor), there are two possible
188/// outcomes. If we start the iterator at A, B, or E, we will visit only A, B,
189/// and E. If we start at C or D, we will visit all instructions A-E.
190///
191/// \code
192///   A;
193///   B;
194///   if (...) {
195///     C;
196///     D;
197///   }
198///   E;
199/// \endcode
200///
201///
202/// Below is the example extneded with instructions F and G. Now we assume F
203/// might not transfer execution to it's successor G. As a result we get the
204/// following visit sets:
205///
206/// Start Instruction   | Visit Set
207/// A                   | A, B,       E, F
208///    B                | A, B,       E, F
209///       C             | A, B, C, D, E, F
210///          D          | A, B, C, D, E, F
211///             E       | A, B,       E, F
212///                F    | A, B,       E, F
213///                   G | A, B,       E, F, G
214///
215///
216/// \code
217///   A;
218///   B;
219///   if (...) {
220///     C;
221///     D;
222///   }
223///   E;
224///   F;  // Might not transfer execution to its successor G.
225///   G;
226/// \endcode
227///
228///
229/// A more complex example involving conditionals, loops, break, and continue
230/// is shown below. We again assume all instructions will transmit control to
231/// the successor and we assume we can prove the inner loop to be finite. We
232/// omit non-trivial branch conditions as the exploration is oblivious to them.
233/// Constant branches are assumed to be unconditional in the CFG. The resulting
234/// visist sets are shown in the table below.
235///
236/// \code
237///   A;
238///   while (true) {
239///     B;
240///     if (...)
241///       C;
242///     if (...)
243///       continue;
244///     D;
245///     if (...)
246///       break;
247///     do {
248///       if (...)
249///         continue;
250///       E;
251///     } while (...);
252///     F;
253///   }
254///   G;
255/// \endcode
256///
257/// Start Instruction    | Visit Set
258/// A                    | A, B
259///    B                 | A, B
260///       C              | A, B, C
261///          D           | A, B,    D
262///             E        | A, B,    D, E, F
263///                F     | A, B,    D,    F
264///                   G  | A, B,    D,       G
265///
266///
267/// Note that the examples show optimal visist sets but not necessarily the ones
268/// derived by the explorer depending on the available CFG analyses (see
269/// MustBeExecutedContextExplorer). Also note that we, depending on the options,
270/// the visit set can contain instructions from other functions.
271struct MustBeExecutedIterator {
272  /// Type declarations that make his class an input iterator.
273  ///{
274  typedef const Instruction *value_type;
275  typedef std::ptrdiff_t difference_type;
276  typedef const Instruction **pointer;
277  typedef const Instruction *&reference;
278  typedef std::input_iterator_tag iterator_category;
279  ///}
280
281  using ExplorerTy = MustBeExecutedContextExplorer;
282
283  MustBeExecutedIterator(const MustBeExecutedIterator &Other)
284      : Visited(Other.Visited), Explorer(Other.Explorer),
285        CurInst(Other.CurInst) {}
286
287  MustBeExecutedIterator(MustBeExecutedIterator &&Other)
288      : Visited(std::move(Other.Visited)), Explorer(Other.Explorer),
289        CurInst(Other.CurInst) {}
290
291  MustBeExecutedIterator &operator=(MustBeExecutedIterator &&Other) {
292    if (this != &Other) {
293      std::swap(Visited, Other.Visited);
294      std::swap(CurInst, Other.CurInst);
295    }
296    return *this;
297  }
298
299  ~MustBeExecutedIterator() {}
300
301  /// Pre- and post-increment operators.
302  ///{
303  MustBeExecutedIterator &operator++() {
304    CurInst = advance();
305    return *this;
306  }
307
308  MustBeExecutedIterator operator++(int) {
309    MustBeExecutedIterator tmp(*this);
310    operator++();
311    return tmp;
312  }
313  ///}
314
315  /// Equality and inequality operators. Note that we ignore the history here.
316  ///{
317  bool operator==(const MustBeExecutedIterator &Other) const {
318    return CurInst == Other.CurInst;
319  }
320
321  bool operator!=(const MustBeExecutedIterator &Other) const {
322    return !(*this == Other);
323  }
324  ///}
325
326  /// Return the underlying instruction.
327  const Instruction *&operator*() { return CurInst; }
328  const Instruction *getCurrentInst() const { return CurInst; }
329
330  /// Return true if \p I was encountered by this iterator already.
331  bool count(const Instruction *I) const { return Visited.count(I); }
332
333private:
334  using VisitedSetTy = DenseSet<const Instruction *>;
335
336  /// Private constructors.
337  MustBeExecutedIterator(ExplorerTy &Explorer, const Instruction *I);
338
339  /// Reset the iterator to its initial state pointing at \p I.
340  void reset(const Instruction *I);
341
342  /// Try to advance one of the underlying positions (Head or Tail).
343  ///
344  /// \return The next instruction in the must be executed context, or nullptr
345  ///         if none was found.
346  const Instruction *advance();
347
348  /// A set to track the visited instructions in order to deal with endless
349  /// loops and recursion.
350  VisitedSetTy Visited;
351
352  /// A reference to the explorer that created this iterator.
353  ExplorerTy &Explorer;
354
355  /// The instruction we are currently exposing to the user. There is always an
356  /// instruction that we know is executed with the given program point,
357  /// initially the program point itself.
358  const Instruction *CurInst;
359
360  friend struct MustBeExecutedContextExplorer;
361};
362
363/// A "must be executed context" for a given program point PP is the set of
364/// instructions, potentially before and after PP, that are executed always when
365/// PP is reached. The MustBeExecutedContextExplorer an interface to explore
366/// "must be executed contexts" in a module through the use of
367/// MustBeExecutedIterator.
368///
369/// The explorer exposes "must be executed iterators" that traverse the must be
370/// executed context. There is little information sharing between iterators as
371/// the expected use case involves few iterators for "far apart" instructions.
372/// If that changes, we should consider caching more intermediate results.
373struct MustBeExecutedContextExplorer {
374
375  /// In the description of the parameters we use PP to denote a program point
376  /// for which the must be executed context is explored, or put differently,
377  /// for which the MustBeExecutedIterator is created.
378  ///
379  /// \param ExploreInterBlock    Flag to indicate if instructions in blocks
380  ///                             other than the parent of PP should be
381  ///                             explored.
382  MustBeExecutedContextExplorer(
383      bool ExploreInterBlock,
384      GetterTy<const LoopInfo> LIGetter =
385          [](const Function &) { return nullptr; },
386      GetterTy<const PostDominatorTree> PDTGetter =
387          [](const Function &) { return nullptr; })
388      : ExploreInterBlock(ExploreInterBlock), LIGetter(LIGetter),
389        PDTGetter(PDTGetter), EndIterator(*this, nullptr) {}
390
391  /// Clean up the dynamically allocated iterators.
392  ~MustBeExecutedContextExplorer() {
393    DeleteContainerSeconds(InstructionIteratorMap);
394  }
395
396  /// Iterator-based interface. \see MustBeExecutedIterator.
397  ///{
398  using iterator = MustBeExecutedIterator;
399  using const_iterator = const MustBeExecutedIterator;
400
401  /// Return an iterator to explore the context around \p PP.
402  iterator &begin(const Instruction *PP) {
403    auto *&It = InstructionIteratorMap[PP];
404    if (!It)
405      It = new iterator(*this, PP);
406    return *It;
407  }
408
409  /// Return an iterator to explore the cached context around \p PP.
410  const_iterator &begin(const Instruction *PP) const {
411    return *InstructionIteratorMap.lookup(PP);
412  }
413
414  /// Return an universal end iterator.
415  ///{
416  iterator &end() { return EndIterator; }
417  iterator &end(const Instruction *) { return EndIterator; }
418
419  const_iterator &end() const { return EndIterator; }
420  const_iterator &end(const Instruction *) const { return EndIterator; }
421  ///}
422
423  /// Return an iterator range to explore the context around \p PP.
424  llvm::iterator_range<iterator> range(const Instruction *PP) {
425    return llvm::make_range(begin(PP), end(PP));
426  }
427
428  /// Return an iterator range to explore the cached context around \p PP.
429  llvm::iterator_range<const_iterator> range(const Instruction *PP) const {
430    return llvm::make_range(begin(PP), end(PP));
431  }
432  ///}
433
434  /// Helper to look for \p I in the context of \p PP.
435  ///
436  /// The context is expanded until \p I was found or no more expansion is
437  /// possible.
438  ///
439  /// \returns True, iff \p I was found.
440  bool findInContextOf(const Instruction *I, const Instruction *PP) {
441    auto EIt = begin(PP), EEnd = end(PP);
442    return findInContextOf(I, EIt, EEnd);
443  }
444
445  /// Helper to look for \p I in the context defined by \p EIt and \p EEnd.
446  ///
447  /// The context is expanded until \p I was found or no more expansion is
448  /// possible.
449  ///
450  /// \returns True, iff \p I was found.
451  bool findInContextOf(const Instruction *I, iterator &EIt, iterator &EEnd) {
452    bool Found = EIt.count(I);
453    while (!Found && EIt != EEnd)
454      Found = (++EIt).getCurrentInst() == I;
455    return Found;
456  }
457
458  /// Return the next instruction that is guaranteed to be executed after \p PP.
459  ///
460  /// \param It              The iterator that is used to traverse the must be
461  ///                        executed context.
462  /// \param PP              The program point for which the next instruction
463  ///                        that is guaranteed to execute is determined.
464  const Instruction *
465  getMustBeExecutedNextInstruction(MustBeExecutedIterator &It,
466                                   const Instruction *PP);
467
468  /// Find the next join point from \p InitBB in forward direction.
469  const BasicBlock *findForwardJoinPoint(const BasicBlock *InitBB);
470
471  /// Parameter that limit the performed exploration. See the constructor for
472  /// their meaning.
473  ///{
474  const bool ExploreInterBlock;
475  ///}
476
477private:
478  /// Getters for common CFG analyses: LoopInfo, DominatorTree, and
479  /// PostDominatorTree.
480  ///{
481  GetterTy<const LoopInfo> LIGetter;
482  GetterTy<const PostDominatorTree> PDTGetter;
483  ///}
484
485  /// Map to cache isGuaranteedToTransferExecutionToSuccessor results.
486  DenseMap<const BasicBlock *, Optional<bool>> BlockTransferMap;
487
488  /// Map to cache containsIrreducibleCFG results.
489  DenseMap<const Function*, Optional<bool>> IrreducibleControlMap;
490
491  /// Map from instructions to associated must be executed iterators.
492  DenseMap<const Instruction *, MustBeExecutedIterator *>
493      InstructionIteratorMap;
494
495  /// A unique end iterator.
496  MustBeExecutedIterator EndIterator;
497};
498
499} // namespace llvm
500
501#endif
502