MustExecute.h revision 360784
1//===- MustExecute.h - Is an instruction known to execute--------*- C++ -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8/// \file 9/// Contains a collection of routines for determining if a given instruction is 10/// guaranteed to execute if a given point in control flow is reached. The most 11/// common example is an instruction within a loop being provably executed if we 12/// branch to the header of it's containing loop. 13/// 14/// There are two interfaces available to determine if an instruction is 15/// executed once a given point in the control flow is reached: 16/// 1) A loop-centric one derived from LoopSafetyInfo. 17/// 2) A "must be executed context"-based one implemented in the 18/// MustBeExecutedContextExplorer. 19/// Please refer to the class comments for more information. 20/// 21//===----------------------------------------------------------------------===// 22 23#ifndef LLVM_ANALYSIS_MUSTEXECUTE_H 24#define LLVM_ANALYSIS_MUSTEXECUTE_H 25 26#include "llvm/ADT/DenseMap.h" 27#include "llvm/Analysis/EHPersonalities.h" 28#include "llvm/Analysis/InstructionPrecedenceTracking.h" 29#include "llvm/Analysis/LoopInfo.h" 30#include "llvm/IR/BasicBlock.h" 31#include "llvm/IR/Dominators.h" 32#include "llvm/IR/Instruction.h" 33 34namespace llvm { 35 36namespace { 37template <typename T> using GetterTy = std::function<T *(const Function &F)>; 38} 39 40class Instruction; 41class DominatorTree; 42class PostDominatorTree; 43class Loop; 44 45/// Captures loop safety information. 46/// It keep information for loop blocks may throw exception or otherwise 47/// exit abnormaly on any iteration of the loop which might actually execute 48/// at runtime. The primary way to consume this infromation is via 49/// isGuaranteedToExecute below, but some callers bailout or fallback to 50/// alternate reasoning if a loop contains any implicit control flow. 51/// NOTE: LoopSafetyInfo contains cached information regarding loops and their 52/// particular blocks. This information is only dropped on invocation of 53/// computeLoopSafetyInfo. If the loop or any of its block is deleted, or if 54/// any thrower instructions have been added or removed from them, or if the 55/// control flow has changed, or in case of other meaningful modifications, the 56/// LoopSafetyInfo needs to be recomputed. If a meaningful modifications to the 57/// loop were made and the info wasn't recomputed properly, the behavior of all 58/// methods except for computeLoopSafetyInfo is undefined. 59class LoopSafetyInfo { 60 // Used to update funclet bundle operands. 61 DenseMap<BasicBlock *, ColorVector> BlockColors; 62 63protected: 64 /// Computes block colors. 65 void computeBlockColors(const Loop *CurLoop); 66 67public: 68 /// Returns block colors map that is used to update funclet operand bundles. 69 const DenseMap<BasicBlock *, ColorVector> &getBlockColors() const; 70 71 /// Copy colors of block \p Old into the block \p New. 72 void copyColors(BasicBlock *New, BasicBlock *Old); 73 74 /// Returns true iff the block \p BB potentially may throw exception. It can 75 /// be false-positive in cases when we want to avoid complex analysis. 76 virtual bool blockMayThrow(const BasicBlock *BB) const = 0; 77 78 /// Returns true iff any block of the loop for which this info is contains an 79 /// instruction that may throw or otherwise exit abnormally. 80 virtual bool anyBlockMayThrow() const = 0; 81 82 /// Return true if we must reach the block \p BB under assumption that the 83 /// loop \p CurLoop is entered. 84 bool allLoopPathsLeadToBlock(const Loop *CurLoop, const BasicBlock *BB, 85 const DominatorTree *DT) const; 86 87 /// Computes safety information for a loop checks loop body & header for 88 /// the possibility of may throw exception, it takes LoopSafetyInfo and loop 89 /// as argument. Updates safety information in LoopSafetyInfo argument. 90 /// Note: This is defined to clear and reinitialize an already initialized 91 /// LoopSafetyInfo. Some callers rely on this fact. 92 virtual void computeLoopSafetyInfo(const Loop *CurLoop) = 0; 93 94 /// Returns true if the instruction in a loop is guaranteed to execute at 95 /// least once (under the assumption that the loop is entered). 96 virtual bool isGuaranteedToExecute(const Instruction &Inst, 97 const DominatorTree *DT, 98 const Loop *CurLoop) const = 0; 99 100 LoopSafetyInfo() = default; 101 102 virtual ~LoopSafetyInfo() = default; 103}; 104 105 106/// Simple and conservative implementation of LoopSafetyInfo that can give 107/// false-positive answers to its queries in order to avoid complicated 108/// analysis. 109class SimpleLoopSafetyInfo: public LoopSafetyInfo { 110 bool MayThrow = false; // The current loop contains an instruction which 111 // may throw. 112 bool HeaderMayThrow = false; // Same as previous, but specific to loop header 113 114public: 115 virtual bool blockMayThrow(const BasicBlock *BB) const; 116 117 virtual bool anyBlockMayThrow() const; 118 119 virtual void computeLoopSafetyInfo(const Loop *CurLoop); 120 121 virtual bool isGuaranteedToExecute(const Instruction &Inst, 122 const DominatorTree *DT, 123 const Loop *CurLoop) const; 124 125 SimpleLoopSafetyInfo() : LoopSafetyInfo() {}; 126 127 virtual ~SimpleLoopSafetyInfo() {}; 128}; 129 130/// This implementation of LoopSafetyInfo use ImplicitControlFlowTracking to 131/// give precise answers on "may throw" queries. This implementation uses cache 132/// that should be invalidated by calling the methods insertInstructionTo and 133/// removeInstruction whenever we modify a basic block's contents by adding or 134/// removing instructions. 135class ICFLoopSafetyInfo: public LoopSafetyInfo { 136 bool MayThrow = false; // The current loop contains an instruction which 137 // may throw. 138 // Contains information about implicit control flow in this loop's blocks. 139 mutable ImplicitControlFlowTracking ICF; 140 // Contains information about instruction that may possibly write memory. 141 mutable MemoryWriteTracking MW; 142 143public: 144 virtual bool blockMayThrow(const BasicBlock *BB) const; 145 146 virtual bool anyBlockMayThrow() const; 147 148 virtual void computeLoopSafetyInfo(const Loop *CurLoop); 149 150 virtual bool isGuaranteedToExecute(const Instruction &Inst, 151 const DominatorTree *DT, 152 const Loop *CurLoop) const; 153 154 /// Returns true if we could not execute a memory-modifying instruction before 155 /// we enter \p BB under assumption that \p CurLoop is entered. 156 bool doesNotWriteMemoryBefore(const BasicBlock *BB, const Loop *CurLoop) 157 const; 158 159 /// Returns true if we could not execute a memory-modifying instruction before 160 /// we execute \p I under assumption that \p CurLoop is entered. 161 bool doesNotWriteMemoryBefore(const Instruction &I, const Loop *CurLoop) 162 const; 163 164 /// Inform the safety info that we are planning to insert a new instruction 165 /// \p Inst into the basic block \p BB. It will make all cache updates to keep 166 /// it correct after this insertion. 167 void insertInstructionTo(const Instruction *Inst, const BasicBlock *BB); 168 169 /// Inform safety info that we are planning to remove the instruction \p Inst 170 /// from its block. It will make all cache updates to keep it correct after 171 /// this removal. 172 void removeInstruction(const Instruction *Inst); 173 174 ICFLoopSafetyInfo(DominatorTree *DT) : LoopSafetyInfo(), ICF(DT), MW(DT) {}; 175 176 virtual ~ICFLoopSafetyInfo() {}; 177}; 178 179struct MustBeExecutedContextExplorer; 180 181/// Must be executed iterators visit stretches of instructions that are 182/// guaranteed to be executed together, potentially with other instruction 183/// executed in-between. 184/// 185/// Given the following code, and assuming all statements are single 186/// instructions which transfer execution to the successor (see 187/// isGuaranteedToTransferExecutionToSuccessor), there are two possible 188/// outcomes. If we start the iterator at A, B, or E, we will visit only A, B, 189/// and E. If we start at C or D, we will visit all instructions A-E. 190/// 191/// \code 192/// A; 193/// B; 194/// if (...) { 195/// C; 196/// D; 197/// } 198/// E; 199/// \endcode 200/// 201/// 202/// Below is the example extneded with instructions F and G. Now we assume F 203/// might not transfer execution to it's successor G. As a result we get the 204/// following visit sets: 205/// 206/// Start Instruction | Visit Set 207/// A | A, B, E, F 208/// B | A, B, E, F 209/// C | A, B, C, D, E, F 210/// D | A, B, C, D, E, F 211/// E | A, B, E, F 212/// F | A, B, E, F 213/// G | A, B, E, F, G 214/// 215/// 216/// \code 217/// A; 218/// B; 219/// if (...) { 220/// C; 221/// D; 222/// } 223/// E; 224/// F; // Might not transfer execution to its successor G. 225/// G; 226/// \endcode 227/// 228/// 229/// A more complex example involving conditionals, loops, break, and continue 230/// is shown below. We again assume all instructions will transmit control to 231/// the successor and we assume we can prove the inner loop to be finite. We 232/// omit non-trivial branch conditions as the exploration is oblivious to them. 233/// Constant branches are assumed to be unconditional in the CFG. The resulting 234/// visist sets are shown in the table below. 235/// 236/// \code 237/// A; 238/// while (true) { 239/// B; 240/// if (...) 241/// C; 242/// if (...) 243/// continue; 244/// D; 245/// if (...) 246/// break; 247/// do { 248/// if (...) 249/// continue; 250/// E; 251/// } while (...); 252/// F; 253/// } 254/// G; 255/// \endcode 256/// 257/// Start Instruction | Visit Set 258/// A | A, B 259/// B | A, B 260/// C | A, B, C 261/// D | A, B, D 262/// E | A, B, D, E, F 263/// F | A, B, D, F 264/// G | A, B, D, G 265/// 266/// 267/// Note that the examples show optimal visist sets but not necessarily the ones 268/// derived by the explorer depending on the available CFG analyses (see 269/// MustBeExecutedContextExplorer). Also note that we, depending on the options, 270/// the visit set can contain instructions from other functions. 271struct MustBeExecutedIterator { 272 /// Type declarations that make his class an input iterator. 273 ///{ 274 typedef const Instruction *value_type; 275 typedef std::ptrdiff_t difference_type; 276 typedef const Instruction **pointer; 277 typedef const Instruction *&reference; 278 typedef std::input_iterator_tag iterator_category; 279 ///} 280 281 using ExplorerTy = MustBeExecutedContextExplorer; 282 283 MustBeExecutedIterator(const MustBeExecutedIterator &Other) 284 : Visited(Other.Visited), Explorer(Other.Explorer), 285 CurInst(Other.CurInst) {} 286 287 MustBeExecutedIterator(MustBeExecutedIterator &&Other) 288 : Visited(std::move(Other.Visited)), Explorer(Other.Explorer), 289 CurInst(Other.CurInst) {} 290 291 MustBeExecutedIterator &operator=(MustBeExecutedIterator &&Other) { 292 if (this != &Other) { 293 std::swap(Visited, Other.Visited); 294 std::swap(CurInst, Other.CurInst); 295 } 296 return *this; 297 } 298 299 ~MustBeExecutedIterator() {} 300 301 /// Pre- and post-increment operators. 302 ///{ 303 MustBeExecutedIterator &operator++() { 304 CurInst = advance(); 305 return *this; 306 } 307 308 MustBeExecutedIterator operator++(int) { 309 MustBeExecutedIterator tmp(*this); 310 operator++(); 311 return tmp; 312 } 313 ///} 314 315 /// Equality and inequality operators. Note that we ignore the history here. 316 ///{ 317 bool operator==(const MustBeExecutedIterator &Other) const { 318 return CurInst == Other.CurInst; 319 } 320 321 bool operator!=(const MustBeExecutedIterator &Other) const { 322 return !(*this == Other); 323 } 324 ///} 325 326 /// Return the underlying instruction. 327 const Instruction *&operator*() { return CurInst; } 328 const Instruction *getCurrentInst() const { return CurInst; } 329 330 /// Return true if \p I was encountered by this iterator already. 331 bool count(const Instruction *I) const { return Visited.count(I); } 332 333private: 334 using VisitedSetTy = DenseSet<const Instruction *>; 335 336 /// Private constructors. 337 MustBeExecutedIterator(ExplorerTy &Explorer, const Instruction *I); 338 339 /// Reset the iterator to its initial state pointing at \p I. 340 void reset(const Instruction *I); 341 342 /// Try to advance one of the underlying positions (Head or Tail). 343 /// 344 /// \return The next instruction in the must be executed context, or nullptr 345 /// if none was found. 346 const Instruction *advance(); 347 348 /// A set to track the visited instructions in order to deal with endless 349 /// loops and recursion. 350 VisitedSetTy Visited; 351 352 /// A reference to the explorer that created this iterator. 353 ExplorerTy &Explorer; 354 355 /// The instruction we are currently exposing to the user. There is always an 356 /// instruction that we know is executed with the given program point, 357 /// initially the program point itself. 358 const Instruction *CurInst; 359 360 friend struct MustBeExecutedContextExplorer; 361}; 362 363/// A "must be executed context" for a given program point PP is the set of 364/// instructions, potentially before and after PP, that are executed always when 365/// PP is reached. The MustBeExecutedContextExplorer an interface to explore 366/// "must be executed contexts" in a module through the use of 367/// MustBeExecutedIterator. 368/// 369/// The explorer exposes "must be executed iterators" that traverse the must be 370/// executed context. There is little information sharing between iterators as 371/// the expected use case involves few iterators for "far apart" instructions. 372/// If that changes, we should consider caching more intermediate results. 373struct MustBeExecutedContextExplorer { 374 375 /// In the description of the parameters we use PP to denote a program point 376 /// for which the must be executed context is explored, or put differently, 377 /// for which the MustBeExecutedIterator is created. 378 /// 379 /// \param ExploreInterBlock Flag to indicate if instructions in blocks 380 /// other than the parent of PP should be 381 /// explored. 382 MustBeExecutedContextExplorer( 383 bool ExploreInterBlock, 384 GetterTy<const LoopInfo> LIGetter = 385 [](const Function &) { return nullptr; }, 386 GetterTy<const PostDominatorTree> PDTGetter = 387 [](const Function &) { return nullptr; }) 388 : ExploreInterBlock(ExploreInterBlock), LIGetter(LIGetter), 389 PDTGetter(PDTGetter), EndIterator(*this, nullptr) {} 390 391 /// Clean up the dynamically allocated iterators. 392 ~MustBeExecutedContextExplorer() { 393 DeleteContainerSeconds(InstructionIteratorMap); 394 } 395 396 /// Iterator-based interface. \see MustBeExecutedIterator. 397 ///{ 398 using iterator = MustBeExecutedIterator; 399 using const_iterator = const MustBeExecutedIterator; 400 401 /// Return an iterator to explore the context around \p PP. 402 iterator &begin(const Instruction *PP) { 403 auto *&It = InstructionIteratorMap[PP]; 404 if (!It) 405 It = new iterator(*this, PP); 406 return *It; 407 } 408 409 /// Return an iterator to explore the cached context around \p PP. 410 const_iterator &begin(const Instruction *PP) const { 411 return *InstructionIteratorMap.lookup(PP); 412 } 413 414 /// Return an universal end iterator. 415 ///{ 416 iterator &end() { return EndIterator; } 417 iterator &end(const Instruction *) { return EndIterator; } 418 419 const_iterator &end() const { return EndIterator; } 420 const_iterator &end(const Instruction *) const { return EndIterator; } 421 ///} 422 423 /// Return an iterator range to explore the context around \p PP. 424 llvm::iterator_range<iterator> range(const Instruction *PP) { 425 return llvm::make_range(begin(PP), end(PP)); 426 } 427 428 /// Return an iterator range to explore the cached context around \p PP. 429 llvm::iterator_range<const_iterator> range(const Instruction *PP) const { 430 return llvm::make_range(begin(PP), end(PP)); 431 } 432 ///} 433 434 /// Helper to look for \p I in the context of \p PP. 435 /// 436 /// The context is expanded until \p I was found or no more expansion is 437 /// possible. 438 /// 439 /// \returns True, iff \p I was found. 440 bool findInContextOf(const Instruction *I, const Instruction *PP) { 441 auto EIt = begin(PP), EEnd = end(PP); 442 return findInContextOf(I, EIt, EEnd); 443 } 444 445 /// Helper to look for \p I in the context defined by \p EIt and \p EEnd. 446 /// 447 /// The context is expanded until \p I was found or no more expansion is 448 /// possible. 449 /// 450 /// \returns True, iff \p I was found. 451 bool findInContextOf(const Instruction *I, iterator &EIt, iterator &EEnd) { 452 bool Found = EIt.count(I); 453 while (!Found && EIt != EEnd) 454 Found = (++EIt).getCurrentInst() == I; 455 return Found; 456 } 457 458 /// Return the next instruction that is guaranteed to be executed after \p PP. 459 /// 460 /// \param It The iterator that is used to traverse the must be 461 /// executed context. 462 /// \param PP The program point for which the next instruction 463 /// that is guaranteed to execute is determined. 464 const Instruction * 465 getMustBeExecutedNextInstruction(MustBeExecutedIterator &It, 466 const Instruction *PP); 467 468 /// Find the next join point from \p InitBB in forward direction. 469 const BasicBlock *findForwardJoinPoint(const BasicBlock *InitBB); 470 471 /// Parameter that limit the performed exploration. See the constructor for 472 /// their meaning. 473 ///{ 474 const bool ExploreInterBlock; 475 ///} 476 477private: 478 /// Getters for common CFG analyses: LoopInfo, DominatorTree, and 479 /// PostDominatorTree. 480 ///{ 481 GetterTy<const LoopInfo> LIGetter; 482 GetterTy<const PostDominatorTree> PDTGetter; 483 ///} 484 485 /// Map to cache isGuaranteedToTransferExecutionToSuccessor results. 486 DenseMap<const BasicBlock *, Optional<bool>> BlockTransferMap; 487 488 /// Map to cache containsIrreducibleCFG results. 489 DenseMap<const Function*, Optional<bool>> IrreducibleControlMap; 490 491 /// Map from instructions to associated must be executed iterators. 492 DenseMap<const Instruction *, MustBeExecutedIterator *> 493 InstructionIteratorMap; 494 495 /// A unique end iterator. 496 MustBeExecutedIterator EndIterator; 497}; 498 499} // namespace llvm 500 501#endif 502