1219019Sgabor//===- SROA.cpp - Scalar Replacement Of Aggregates ------------------------===// 2219019Sgabor// 3219019Sgabor// The LLVM Compiler Infrastructure 4219019Sgabor// 5219019Sgabor// This file is distributed under the University of Illinois Open Source 6219019Sgabor// License. See LICENSE.TXT for details. 7219019Sgabor// 8219019Sgabor//===----------------------------------------------------------------------===// 9219019Sgabor/// \file 10219019Sgabor/// This transformation implements the well known scalar replacement of 11219019Sgabor/// aggregates transformation. It tries to identify promotable elements of an 12219019Sgabor/// aggregate alloca, and promote them to registers. It will also try to 13219019Sgabor/// convert uses of an element (or set of elements) of an alloca into a vector 14219019Sgabor/// or bitfield-style integer scalar if appropriate. 15219019Sgabor/// 16219019Sgabor/// It works to do this with minimal slicing of the alloca so that regions 17219019Sgabor/// which are merely transferred in and out of external memory remain unchanged 18219019Sgabor/// and are not decomposed to scalar code. 19219019Sgabor/// 20219019Sgabor/// Because this also performs alloca promotion, it can be thought of as also 21219019Sgabor/// serving the purpose of SSA formation. The algorithm iterates on the 22219019Sgabor/// function until all opportunities for promotion have been realized. 23219019Sgabor/// 24219019Sgabor//===----------------------------------------------------------------------===// 25219019Sgabor 26219019Sgabor#define DEBUG_TYPE "sroa" 27219019Sgabor#include "llvm/Transforms/Scalar.h" 28219019Sgabor#include "llvm/ADT/STLExtras.h" 29219019Sgabor#include "llvm/ADT/SetVector.h" 30219019Sgabor#include "llvm/ADT/SmallVector.h" 31219019Sgabor#include "llvm/ADT/Statistic.h" 32219019Sgabor#include "llvm/Analysis/Dominators.h" 33219019Sgabor#include "llvm/Analysis/Loads.h" 34219019Sgabor#include "llvm/Analysis/PtrUseVisitor.h" 35219019Sgabor#include "llvm/Analysis/ValueTracking.h" 36219019Sgabor#include "llvm/DIBuilder.h" 37219019Sgabor#include "llvm/DebugInfo.h" 38219019Sgabor#include "llvm/IR/Constants.h" 39219019Sgabor#include "llvm/IR/DataLayout.h" 40219019Sgabor#include "llvm/IR/DerivedTypes.h" 41219019Sgabor#include "llvm/IR/Function.h" 42219019Sgabor#include "llvm/IR/IRBuilder.h" 43219019Sgabor#include "llvm/IR/Instructions.h" 44219019Sgabor#include "llvm/IR/IntrinsicInst.h" 45219019Sgabor#include "llvm/IR/LLVMContext.h" 46219019Sgabor#include "llvm/IR/Operator.h" 47219019Sgabor#include "llvm/InstVisitor.h" 48219019Sgabor#include "llvm/Pass.h" 49219019Sgabor#include "llvm/Support/CommandLine.h" 50219019Sgabor#include "llvm/Support/Debug.h" 51219019Sgabor#include "llvm/Support/ErrorHandling.h" 52219019Sgabor#include "llvm/Support/MathExtras.h" 53219019Sgabor#include "llvm/Support/raw_ostream.h" 54219019Sgabor#include "llvm/Transforms/Utils/Local.h" 55219019Sgabor#include "llvm/Transforms/Utils/PromoteMemToReg.h" 56219019Sgabor#include "llvm/Transforms/Utils/SSAUpdater.h" 57219019Sgaborusing namespace llvm; 58219019Sgabor 59219019SgaborSTATISTIC(NumAllocasAnalyzed, "Number of allocas analyzed for replacement"); 60219019SgaborSTATISTIC(NumAllocaPartitions, "Number of alloca partitions formed"); 61219019SgaborSTATISTIC(MaxPartitionsPerAlloca, "Maximum number of partitions"); 62219019SgaborSTATISTIC(NumAllocaPartitionUses, "Number of alloca partition uses found"); 63219019SgaborSTATISTIC(MaxPartitionUsesPerAlloca, "Maximum number of partition uses"); 64219019SgaborSTATISTIC(NumNewAllocas, "Number of new, smaller allocas introduced"); 65219019SgaborSTATISTIC(NumPromoted, "Number of allocas promoted to SSA values"); 66219019SgaborSTATISTIC(NumLoadsSpeculated, "Number of loads speculated to allow promotion"); 67219019SgaborSTATISTIC(NumDeleted, "Number of instructions deleted"); 68219019SgaborSTATISTIC(NumVectorized, "Number of vectorized aggregates"); 69219019Sgabor 70219019Sgabor/// Hidden option to force the pass to not use DomTree and mem2reg, instead 71219019Sgabor/// forming SSA values through the SSAUpdater infrastructure. 72219019Sgaborstatic cl::opt<bool> 73219019SgaborForceSSAUpdater("force-ssa-updater", cl::init(false), cl::Hidden); 74219019Sgabor 75219019Sgabornamespace { 76219019Sgabor/// \brief A custom IRBuilder inserter which prefixes all names if they are 77219019Sgabor/// preserved. 78219019Sgabortemplate <bool preserveNames = true> 79219019Sgaborclass IRBuilderPrefixedInserter : 80219019Sgabor public IRBuilderDefaultInserter<preserveNames> { 81219019Sgabor std::string Prefix; 82219019Sgabor 83219019Sgaborpublic: 84219019Sgabor void SetNamePrefix(const Twine &P) { Prefix = P.str(); } 85219019Sgabor 86219019Sgaborprotected: 87219019Sgabor void InsertHelper(Instruction *I, const Twine &Name, BasicBlock *BB, 88219019Sgabor BasicBlock::iterator InsertPt) const { 89219019Sgabor IRBuilderDefaultInserter<preserveNames>::InsertHelper( 90219019Sgabor I, Name.isTriviallyEmpty() ? Name : Prefix + Name, BB, InsertPt); 91219019Sgabor } 92219019Sgabor}; 93219019Sgabor 94219019Sgabor// Specialization for not preserving the name is trivial. 95219019Sgabortemplate <> 96219019Sgaborclass IRBuilderPrefixedInserter<false> : 97219019Sgabor public IRBuilderDefaultInserter<false> { 98219019Sgaborpublic: 99219019Sgabor void SetNamePrefix(const Twine &P) {} 100219019Sgabor}; 101219019Sgabor 102219019Sgabor/// \brief Provide a typedef for IRBuilder that drops names in release builds. 103219019Sgabor#ifndef NDEBUG 104219019Sgabortypedef llvm::IRBuilder<true, ConstantFolder, 105219019Sgabor IRBuilderPrefixedInserter<true> > IRBuilderTy; 106219019Sgabor#else 107219019Sgabortypedef llvm::IRBuilder<false, ConstantFolder, 108219019Sgabor IRBuilderPrefixedInserter<false> > IRBuilderTy; 109219019Sgabor#endif 110219019Sgabor} 111219019Sgabor 112219019Sgabornamespace { 113219019Sgabor/// \brief A common base class for representing a half-open byte range. 114219019Sgaborstruct ByteRange { 115219019Sgabor /// \brief The beginning offset of the range. 116219019Sgabor uint64_t BeginOffset; 117219019Sgabor 118219019Sgabor /// \brief The ending offset, not included in the range. 119219019Sgabor uint64_t EndOffset; 120219019Sgabor 121219019Sgabor ByteRange() : BeginOffset(), EndOffset() {} 122219019Sgabor ByteRange(uint64_t BeginOffset, uint64_t EndOffset) 123219019Sgabor : BeginOffset(BeginOffset), EndOffset(EndOffset) {} 124219019Sgabor 125219019Sgabor /// \brief Support for ordering ranges. 126219019Sgabor /// 127219019Sgabor /// This provides an ordering over ranges such that start offsets are 128219019Sgabor /// always increasing, and within equal start offsets, the end offsets are 129219019Sgabor /// decreasing. Thus the spanning range comes first in a cluster with the 130219019Sgabor /// same start position. 131219019Sgabor bool operator<(const ByteRange &RHS) const { 132219019Sgabor if (BeginOffset < RHS.BeginOffset) return true; 133219019Sgabor if (BeginOffset > RHS.BeginOffset) return false; 134219019Sgabor if (EndOffset > RHS.EndOffset) return true; 135219019Sgabor return false; 136219019Sgabor } 137219019Sgabor 138219019Sgabor /// \brief Support comparison with a single offset to allow binary searches. 139219019Sgabor friend bool operator<(const ByteRange &LHS, uint64_t RHSOffset) { 140219019Sgabor return LHS.BeginOffset < RHSOffset; 141219019Sgabor } 142219019Sgabor 143219019Sgabor friend LLVM_ATTRIBUTE_UNUSED bool operator<(uint64_t LHSOffset, 144219019Sgabor const ByteRange &RHS) { 145219019Sgabor return LHSOffset < RHS.BeginOffset; 146219019Sgabor } 147219019Sgabor 148219019Sgabor bool operator==(const ByteRange &RHS) const { 149219019Sgabor return BeginOffset == RHS.BeginOffset && EndOffset == RHS.EndOffset; 150219019Sgabor } 151219019Sgabor bool operator!=(const ByteRange &RHS) const { return !operator==(RHS); } 152219019Sgabor}; 153219019Sgabor 154219019Sgabor/// \brief A partition of an alloca. 155219019Sgabor/// 156219019Sgabor/// This structure represents a contiguous partition of the alloca. These are 157219019Sgabor/// formed by examining the uses of the alloca. During formation, they may 158219019Sgabor/// overlap but once an AllocaPartitioning is built, the Partitions within it 159219019Sgabor/// are all disjoint. 160219019Sgaborstruct Partition : public ByteRange { 161219019Sgabor /// \brief Whether this partition is splittable into smaller partitions. 162219019Sgabor /// 163219019Sgabor /// We flag partitions as splittable when they are formed entirely due to 164219019Sgabor /// accesses by trivially splittable operations such as memset and memcpy. 165219019Sgabor bool IsSplittable; 166219019Sgabor 167219019Sgabor /// \brief Test whether a partition has been marked as dead. 168219019Sgabor bool isDead() const { 169219019Sgabor if (BeginOffset == UINT64_MAX) { 170219019Sgabor assert(EndOffset == UINT64_MAX); 171219019Sgabor return true; 172219019Sgabor } 173219019Sgabor return false; 174219019Sgabor } 175219019Sgabor 176219019Sgabor /// \brief Kill a partition. 177219019Sgabor /// This is accomplished by setting both its beginning and end offset to 178219019Sgabor /// the maximum possible value. 179219019Sgabor void kill() { 180219019Sgabor assert(!isDead() && "He's Dead, Jim!"); 181219019Sgabor BeginOffset = EndOffset = UINT64_MAX; 182219019Sgabor } 183219019Sgabor 184219019Sgabor Partition() : ByteRange(), IsSplittable() {} 185219019Sgabor Partition(uint64_t BeginOffset, uint64_t EndOffset, bool IsSplittable) 186219019Sgabor : ByteRange(BeginOffset, EndOffset), IsSplittable(IsSplittable) {} 187219019Sgabor}; 188219019Sgabor 189219019Sgabor/// \brief A particular use of a partition of the alloca. 190219019Sgabor/// 191219019Sgabor/// This structure is used to associate uses of a partition with it. They 192219019Sgabor/// mark the range of bytes which are referenced by a particular instruction, 193219019Sgabor/// and includes a handle to the user itself and the pointer value in use. 194219019Sgabor/// The bounds of these uses are determined by intersecting the bounds of the 195219019Sgabor/// memory use itself with a particular partition. As a consequence there is 196219019Sgabor/// intentionally overlap between various uses of the same partition. 197219019Sgaborclass PartitionUse : public ByteRange { 198219019Sgabor /// \brief Combined storage for both the Use* and split state. 199219019Sgabor PointerIntPair<Use*, 1, bool> UsePtrAndIsSplit; 200219019Sgabor 201219019Sgaborpublic: 202219019Sgabor PartitionUse() : ByteRange(), UsePtrAndIsSplit() {} 203219019Sgabor PartitionUse(uint64_t BeginOffset, uint64_t EndOffset, Use *U, 204219019Sgabor bool IsSplit) 205219019Sgabor : ByteRange(BeginOffset, EndOffset), UsePtrAndIsSplit(U, IsSplit) {} 206219019Sgabor 207219019Sgabor /// \brief The use in question. Provides access to both user and used value. 208219019Sgabor /// 209219019Sgabor /// Note that this may be null if the partition use is *dead*, that is, it 210219019Sgabor /// should be ignored. 211219019Sgabor Use *getUse() const { return UsePtrAndIsSplit.getPointer(); } 212219019Sgabor 213219019Sgabor /// \brief Set the use for this partition use range. 214219019Sgabor void setUse(Use *U) { UsePtrAndIsSplit.setPointer(U); } 215219019Sgabor 216219019Sgabor /// \brief Whether this use is split across multiple partitions. 217219019Sgabor bool isSplit() const { return UsePtrAndIsSplit.getInt(); } 218219019Sgabor}; 219219019Sgabor} 220219019Sgabor 221219019Sgabornamespace llvm { 222219019Sgabortemplate <> struct isPodLike<Partition> : llvm::true_type {}; 223219019Sgabortemplate <> struct isPodLike<PartitionUse> : llvm::true_type {}; 224219019Sgabor} 225219019Sgabor 226219019Sgabornamespace { 227219019Sgabor/// \brief Alloca partitioning representation. 228219019Sgabor/// 229219019Sgabor/// This class represents a partitioning of an alloca into slices, and 230219019Sgabor/// information about the nature of uses of each slice of the alloca. The goal 231219019Sgabor/// is that this information is sufficient to decide if and how to split the 232219019Sgabor/// alloca apart and replace slices with scalars. It is also intended that this 233219019Sgabor/// structure can capture the relevant information needed both to decide about 234219019Sgabor/// and to enact these transformations. 235219019Sgaborclass AllocaPartitioning { 236219019Sgaborpublic: 237219019Sgabor /// \brief Construct a partitioning of a particular alloca. 238219019Sgabor /// 239219019Sgabor /// Construction does most of the work for partitioning the alloca. This 240219019Sgabor /// performs the necessary walks of users and builds a partitioning from it. 241219019Sgabor AllocaPartitioning(const DataLayout &TD, AllocaInst &AI); 242219019Sgabor 243219019Sgabor /// \brief Test whether a pointer to the allocation escapes our analysis. 244219019Sgabor /// 245219019Sgabor /// If this is true, the partitioning is never fully built and should be 246219019Sgabor /// ignored. 247219019Sgabor bool isEscaped() const { return PointerEscapingInstr; } 248219019Sgabor 249219019Sgabor /// \brief Support for iterating over the partitions. 250 /// @{ 251 typedef SmallVectorImpl<Partition>::iterator iterator; 252 iterator begin() { return Partitions.begin(); } 253 iterator end() { return Partitions.end(); } 254 255 typedef SmallVectorImpl<Partition>::const_iterator const_iterator; 256 const_iterator begin() const { return Partitions.begin(); } 257 const_iterator end() const { return Partitions.end(); } 258 /// @} 259 260 /// \brief Support for iterating over and manipulating a particular 261 /// partition's uses. 262 /// 263 /// The iteration support provided for uses is more limited, but also 264 /// includes some manipulation routines to support rewriting the uses of 265 /// partitions during SROA. 266 /// @{ 267 typedef SmallVectorImpl<PartitionUse>::iterator use_iterator; 268 use_iterator use_begin(unsigned Idx) { return Uses[Idx].begin(); } 269 use_iterator use_begin(const_iterator I) { return Uses[I - begin()].begin(); } 270 use_iterator use_end(unsigned Idx) { return Uses[Idx].end(); } 271 use_iterator use_end(const_iterator I) { return Uses[I - begin()].end(); } 272 273 typedef SmallVectorImpl<PartitionUse>::const_iterator const_use_iterator; 274 const_use_iterator use_begin(unsigned Idx) const { return Uses[Idx].begin(); } 275 const_use_iterator use_begin(const_iterator I) const { 276 return Uses[I - begin()].begin(); 277 } 278 const_use_iterator use_end(unsigned Idx) const { return Uses[Idx].end(); } 279 const_use_iterator use_end(const_iterator I) const { 280 return Uses[I - begin()].end(); 281 } 282 283 unsigned use_size(unsigned Idx) const { return Uses[Idx].size(); } 284 unsigned use_size(const_iterator I) const { return Uses[I - begin()].size(); } 285 const PartitionUse &getUse(unsigned PIdx, unsigned UIdx) const { 286 return Uses[PIdx][UIdx]; 287 } 288 const PartitionUse &getUse(const_iterator I, unsigned UIdx) const { 289 return Uses[I - begin()][UIdx]; 290 } 291 292 void use_push_back(unsigned Idx, const PartitionUse &PU) { 293 Uses[Idx].push_back(PU); 294 } 295 void use_push_back(const_iterator I, const PartitionUse &PU) { 296 Uses[I - begin()].push_back(PU); 297 } 298 /// @} 299 300 /// \brief Allow iterating the dead users for this alloca. 301 /// 302 /// These are instructions which will never actually use the alloca as they 303 /// are outside the allocated range. They are safe to replace with undef and 304 /// delete. 305 /// @{ 306 typedef SmallVectorImpl<Instruction *>::const_iterator dead_user_iterator; 307 dead_user_iterator dead_user_begin() const { return DeadUsers.begin(); } 308 dead_user_iterator dead_user_end() const { return DeadUsers.end(); } 309 /// @} 310 311 /// \brief Allow iterating the dead expressions referring to this alloca. 312 /// 313 /// These are operands which have cannot actually be used to refer to the 314 /// alloca as they are outside its range and the user doesn't correct for 315 /// that. These mostly consist of PHI node inputs and the like which we just 316 /// need to replace with undef. 317 /// @{ 318 typedef SmallVectorImpl<Use *>::const_iterator dead_op_iterator; 319 dead_op_iterator dead_op_begin() const { return DeadOperands.begin(); } 320 dead_op_iterator dead_op_end() const { return DeadOperands.end(); } 321 /// @} 322 323 /// \brief MemTransferInst auxiliary data. 324 /// This struct provides some auxiliary data about memory transfer 325 /// intrinsics such as memcpy and memmove. These intrinsics can use two 326 /// different ranges within the same alloca, and provide other challenges to 327 /// correctly represent. We stash extra data to help us untangle this 328 /// after the partitioning is complete. 329 struct MemTransferOffsets { 330 /// The destination begin and end offsets when the destination is within 331 /// this alloca. If the end offset is zero the destination is not within 332 /// this alloca. 333 uint64_t DestBegin, DestEnd; 334 335 /// The source begin and end offsets when the source is within this alloca. 336 /// If the end offset is zero, the source is not within this alloca. 337 uint64_t SourceBegin, SourceEnd; 338 339 /// Flag for whether an alloca is splittable. 340 bool IsSplittable; 341 }; 342 MemTransferOffsets getMemTransferOffsets(MemTransferInst &II) const { 343 return MemTransferInstData.lookup(&II); 344 } 345 346 /// \brief Map from a PHI or select operand back to a partition. 347 /// 348 /// When manipulating PHI nodes or selects, they can use more than one 349 /// partition of an alloca. We store a special mapping to allow finding the 350 /// partition referenced by each of these operands, if any. 351 iterator findPartitionForPHIOrSelectOperand(Use *U) { 352 SmallDenseMap<Use *, std::pair<unsigned, unsigned> >::const_iterator MapIt 353 = PHIOrSelectOpMap.find(U); 354 if (MapIt == PHIOrSelectOpMap.end()) 355 return end(); 356 357 return begin() + MapIt->second.first; 358 } 359 360 /// \brief Map from a PHI or select operand back to the specific use of 361 /// a partition. 362 /// 363 /// Similar to mapping these operands back to the partitions, this maps 364 /// directly to the use structure of that partition. 365 use_iterator findPartitionUseForPHIOrSelectOperand(Use *U) { 366 SmallDenseMap<Use *, std::pair<unsigned, unsigned> >::const_iterator MapIt 367 = PHIOrSelectOpMap.find(U); 368 assert(MapIt != PHIOrSelectOpMap.end()); 369 return Uses[MapIt->second.first].begin() + MapIt->second.second; 370 } 371 372 /// \brief Compute a common type among the uses of a particular partition. 373 /// 374 /// This routines walks all of the uses of a particular partition and tries 375 /// to find a common type between them. Untyped operations such as memset and 376 /// memcpy are ignored. 377 Type *getCommonType(iterator I) const; 378 379#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 380 void print(raw_ostream &OS, const_iterator I, StringRef Indent = " ") const; 381 void printUsers(raw_ostream &OS, const_iterator I, 382 StringRef Indent = " ") const; 383 void print(raw_ostream &OS) const; 384 void LLVM_ATTRIBUTE_NOINLINE LLVM_ATTRIBUTE_USED dump(const_iterator I) const; 385 void LLVM_ATTRIBUTE_NOINLINE LLVM_ATTRIBUTE_USED dump() const; 386#endif 387 388private: 389 template <typename DerivedT, typename RetT = void> class BuilderBase; 390 class PartitionBuilder; 391 friend class AllocaPartitioning::PartitionBuilder; 392 class UseBuilder; 393 friend class AllocaPartitioning::UseBuilder; 394 395#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 396 /// \brief Handle to alloca instruction to simplify method interfaces. 397 AllocaInst &AI; 398#endif 399 400 /// \brief The instruction responsible for this alloca having no partitioning. 401 /// 402 /// When an instruction (potentially) escapes the pointer to the alloca, we 403 /// store a pointer to that here and abort trying to partition the alloca. 404 /// This will be null if the alloca is partitioned successfully. 405 Instruction *PointerEscapingInstr; 406 407 /// \brief The partitions of the alloca. 408 /// 409 /// We store a vector of the partitions over the alloca here. This vector is 410 /// sorted by increasing begin offset, and then by decreasing end offset. See 411 /// the Partition inner class for more details. Initially (during 412 /// construction) there are overlaps, but we form a disjoint sequence of 413 /// partitions while finishing construction and a fully constructed object is 414 /// expected to always have this as a disjoint space. 415 SmallVector<Partition, 8> Partitions; 416 417 /// \brief The uses of the partitions. 418 /// 419 /// This is essentially a mapping from each partition to a list of uses of 420 /// that partition. The mapping is done with a Uses vector that has the exact 421 /// same number of entries as the partition vector. Each entry is itself 422 /// a vector of the uses. 423 SmallVector<SmallVector<PartitionUse, 2>, 8> Uses; 424 425 /// \brief Instructions which will become dead if we rewrite the alloca. 426 /// 427 /// Note that these are not separated by partition. This is because we expect 428 /// a partitioned alloca to be completely rewritten or not rewritten at all. 429 /// If rewritten, all these instructions can simply be removed and replaced 430 /// with undef as they come from outside of the allocated space. 431 SmallVector<Instruction *, 8> DeadUsers; 432 433 /// \brief Operands which will become dead if we rewrite the alloca. 434 /// 435 /// These are operands that in their particular use can be replaced with 436 /// undef when we rewrite the alloca. These show up in out-of-bounds inputs 437 /// to PHI nodes and the like. They aren't entirely dead (there might be 438 /// a GEP back into the bounds using it elsewhere) and nor is the PHI, but we 439 /// want to swap this particular input for undef to simplify the use lists of 440 /// the alloca. 441 SmallVector<Use *, 8> DeadOperands; 442 443 /// \brief The underlying storage for auxiliary memcpy and memset info. 444 SmallDenseMap<MemTransferInst *, MemTransferOffsets, 4> MemTransferInstData; 445 446 /// \brief A side datastructure used when building up the partitions and uses. 447 /// 448 /// This mapping is only really used during the initial building of the 449 /// partitioning so that we can retain information about PHI and select nodes 450 /// processed. 451 SmallDenseMap<Instruction *, std::pair<uint64_t, bool> > PHIOrSelectSizes; 452 453 /// \brief Auxiliary information for particular PHI or select operands. 454 SmallDenseMap<Use *, std::pair<unsigned, unsigned>, 4> PHIOrSelectOpMap; 455 456 /// \brief A utility routine called from the constructor. 457 /// 458 /// This does what it says on the tin. It is the key of the alloca partition 459 /// splitting and merging. After it is called we have the desired disjoint 460 /// collection of partitions. 461 void splitAndMergePartitions(); 462}; 463} 464 465static Value *foldSelectInst(SelectInst &SI) { 466 // If the condition being selected on is a constant or the same value is 467 // being selected between, fold the select. Yes this does (rarely) happen 468 // early on. 469 if (ConstantInt *CI = dyn_cast<ConstantInt>(SI.getCondition())) 470 return SI.getOperand(1+CI->isZero()); 471 if (SI.getOperand(1) == SI.getOperand(2)) 472 return SI.getOperand(1); 473 474 return 0; 475} 476 477/// \brief Builder for the alloca partitioning. 478/// 479/// This class builds an alloca partitioning by recursively visiting the uses 480/// of an alloca and splitting the partitions for each load and store at each 481/// offset. 482class AllocaPartitioning::PartitionBuilder 483 : public PtrUseVisitor<PartitionBuilder> { 484 friend class PtrUseVisitor<PartitionBuilder>; 485 friend class InstVisitor<PartitionBuilder>; 486 typedef PtrUseVisitor<PartitionBuilder> Base; 487 488 const uint64_t AllocSize; 489 AllocaPartitioning &P; 490 491 SmallDenseMap<Instruction *, unsigned> MemTransferPartitionMap; 492 493public: 494 PartitionBuilder(const DataLayout &DL, AllocaInst &AI, AllocaPartitioning &P) 495 : PtrUseVisitor<PartitionBuilder>(DL), 496 AllocSize(DL.getTypeAllocSize(AI.getAllocatedType())), 497 P(P) {} 498 499private: 500 void insertUse(Instruction &I, const APInt &Offset, uint64_t Size, 501 bool IsSplittable = false) { 502 // Completely skip uses which have a zero size or start either before or 503 // past the end of the allocation. 504 if (Size == 0 || Offset.isNegative() || Offset.uge(AllocSize)) { 505 DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte use @" << Offset 506 << " which has zero size or starts outside of the " 507 << AllocSize << " byte alloca:\n" 508 << " alloca: " << P.AI << "\n" 509 << " use: " << I << "\n"); 510 return; 511 } 512 513 uint64_t BeginOffset = Offset.getZExtValue(); 514 uint64_t EndOffset = BeginOffset + Size; 515 516 // Clamp the end offset to the end of the allocation. Note that this is 517 // formulated to handle even the case where "BeginOffset + Size" overflows. 518 // This may appear superficially to be something we could ignore entirely, 519 // but that is not so! There may be widened loads or PHI-node uses where 520 // some instructions are dead but not others. We can't completely ignore 521 // them, and so have to record at least the information here. 522 assert(AllocSize >= BeginOffset); // Established above. 523 if (Size > AllocSize - BeginOffset) { 524 DEBUG(dbgs() << "WARNING: Clamping a " << Size << " byte use @" << Offset 525 << " to remain within the " << AllocSize << " byte alloca:\n" 526 << " alloca: " << P.AI << "\n" 527 << " use: " << I << "\n"); 528 EndOffset = AllocSize; 529 } 530 531 Partition New(BeginOffset, EndOffset, IsSplittable); 532 P.Partitions.push_back(New); 533 } 534 535 void handleLoadOrStore(Type *Ty, Instruction &I, const APInt &Offset, 536 uint64_t Size, bool IsVolatile) { 537 // We allow splitting of loads and stores where the type is an integer type 538 // and cover the entire alloca. This prevents us from splitting over 539 // eagerly. 540 // FIXME: In the great blue eventually, we should eagerly split all integer 541 // loads and stores, and then have a separate step that merges adjacent 542 // alloca partitions into a single partition suitable for integer widening. 543 // Or we should skip the merge step and rely on GVN and other passes to 544 // merge adjacent loads and stores that survive mem2reg. 545 bool IsSplittable = 546 Ty->isIntegerTy() && !IsVolatile && Offset == 0 && Size >= AllocSize; 547 548 insertUse(I, Offset, Size, IsSplittable); 549 } 550 551 void visitLoadInst(LoadInst &LI) { 552 assert((!LI.isSimple() || LI.getType()->isSingleValueType()) && 553 "All simple FCA loads should have been pre-split"); 554 555 if (!IsOffsetKnown) 556 return PI.setAborted(&LI); 557 558 uint64_t Size = DL.getTypeStoreSize(LI.getType()); 559 return handleLoadOrStore(LI.getType(), LI, Offset, Size, LI.isVolatile()); 560 } 561 562 void visitStoreInst(StoreInst &SI) { 563 Value *ValOp = SI.getValueOperand(); 564 if (ValOp == *U) 565 return PI.setEscapedAndAborted(&SI); 566 if (!IsOffsetKnown) 567 return PI.setAborted(&SI); 568 569 uint64_t Size = DL.getTypeStoreSize(ValOp->getType()); 570 571 // If this memory access can be shown to *statically* extend outside the 572 // bounds of of the allocation, it's behavior is undefined, so simply 573 // ignore it. Note that this is more strict than the generic clamping 574 // behavior of insertUse. We also try to handle cases which might run the 575 // risk of overflow. 576 // FIXME: We should instead consider the pointer to have escaped if this 577 // function is being instrumented for addressing bugs or race conditions. 578 if (Offset.isNegative() || Size > AllocSize || 579 Offset.ugt(AllocSize - Size)) { 580 DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte store @" << Offset 581 << " which extends past the end of the " << AllocSize 582 << " byte alloca:\n" 583 << " alloca: " << P.AI << "\n" 584 << " use: " << SI << "\n"); 585 return; 586 } 587 588 assert((!SI.isSimple() || ValOp->getType()->isSingleValueType()) && 589 "All simple FCA stores should have been pre-split"); 590 handleLoadOrStore(ValOp->getType(), SI, Offset, Size, SI.isVolatile()); 591 } 592 593 594 void visitMemSetInst(MemSetInst &II) { 595 assert(II.getRawDest() == *U && "Pointer use is not the destination?"); 596 ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength()); 597 if ((Length && Length->getValue() == 0) || 598 (IsOffsetKnown && !Offset.isNegative() && Offset.uge(AllocSize))) 599 // Zero-length mem transfer intrinsics can be ignored entirely. 600 return; 601 602 if (!IsOffsetKnown) 603 return PI.setAborted(&II); 604 605 insertUse(II, Offset, 606 Length ? Length->getLimitedValue() 607 : AllocSize - Offset.getLimitedValue(), 608 (bool)Length); 609 } 610 611 void visitMemTransferInst(MemTransferInst &II) { 612 ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength()); 613 if ((Length && Length->getValue() == 0) || 614 (IsOffsetKnown && !Offset.isNegative() && Offset.uge(AllocSize))) 615 // Zero-length mem transfer intrinsics can be ignored entirely. 616 return; 617 618 if (!IsOffsetKnown) 619 return PI.setAborted(&II); 620 621 uint64_t RawOffset = Offset.getLimitedValue(); 622 uint64_t Size = Length ? Length->getLimitedValue() 623 : AllocSize - RawOffset; 624 625 MemTransferOffsets &Offsets = P.MemTransferInstData[&II]; 626 627 // Only intrinsics with a constant length can be split. 628 Offsets.IsSplittable = Length; 629 630 if (*U == II.getRawDest()) { 631 Offsets.DestBegin = RawOffset; 632 Offsets.DestEnd = RawOffset + Size; 633 } 634 if (*U == II.getRawSource()) { 635 Offsets.SourceBegin = RawOffset; 636 Offsets.SourceEnd = RawOffset + Size; 637 } 638 639 // If we have set up end offsets for both the source and the destination, 640 // we have found both sides of this transfer pointing at the same alloca. 641 bool SeenBothEnds = Offsets.SourceEnd && Offsets.DestEnd; 642 if (SeenBothEnds && II.getRawDest() != II.getRawSource()) { 643 unsigned PrevIdx = MemTransferPartitionMap[&II]; 644 645 // Check if the begin offsets match and this is a non-volatile transfer. 646 // In that case, we can completely elide the transfer. 647 if (!II.isVolatile() && Offsets.SourceBegin == Offsets.DestBegin) { 648 P.Partitions[PrevIdx].kill(); 649 return; 650 } 651 652 // Otherwise we have an offset transfer within the same alloca. We can't 653 // split those. 654 P.Partitions[PrevIdx].IsSplittable = Offsets.IsSplittable = false; 655 } else if (SeenBothEnds) { 656 // Handle the case where this exact use provides both ends of the 657 // operation. 658 assert(II.getRawDest() == II.getRawSource()); 659 660 // For non-volatile transfers this is a no-op. 661 if (!II.isVolatile()) 662 return; 663 664 // Otherwise just suppress splitting. 665 Offsets.IsSplittable = false; 666 } 667 668 669 // Insert the use now that we've fixed up the splittable nature. 670 insertUse(II, Offset, Size, Offsets.IsSplittable); 671 672 // Setup the mapping from intrinsic to partition of we've not seen both 673 // ends of this transfer. 674 if (!SeenBothEnds) { 675 unsigned NewIdx = P.Partitions.size() - 1; 676 bool Inserted 677 = MemTransferPartitionMap.insert(std::make_pair(&II, NewIdx)).second; 678 assert(Inserted && 679 "Already have intrinsic in map but haven't seen both ends"); 680 (void)Inserted; 681 } 682 } 683 684 // Disable SRoA for any intrinsics except for lifetime invariants. 685 // FIXME: What about debug intrinsics? This matches old behavior, but 686 // doesn't make sense. 687 void visitIntrinsicInst(IntrinsicInst &II) { 688 if (!IsOffsetKnown) 689 return PI.setAborted(&II); 690 691 if (II.getIntrinsicID() == Intrinsic::lifetime_start || 692 II.getIntrinsicID() == Intrinsic::lifetime_end) { 693 ConstantInt *Length = cast<ConstantInt>(II.getArgOperand(0)); 694 uint64_t Size = std::min(AllocSize - Offset.getLimitedValue(), 695 Length->getLimitedValue()); 696 insertUse(II, Offset, Size, true); 697 return; 698 } 699 700 Base::visitIntrinsicInst(II); 701 } 702 703 Instruction *hasUnsafePHIOrSelectUse(Instruction *Root, uint64_t &Size) { 704 // We consider any PHI or select that results in a direct load or store of 705 // the same offset to be a viable use for partitioning purposes. These uses 706 // are considered unsplittable and the size is the maximum loaded or stored 707 // size. 708 SmallPtrSet<Instruction *, 4> Visited; 709 SmallVector<std::pair<Instruction *, Instruction *>, 4> Uses; 710 Visited.insert(Root); 711 Uses.push_back(std::make_pair(cast<Instruction>(*U), Root)); 712 // If there are no loads or stores, the access is dead. We mark that as 713 // a size zero access. 714 Size = 0; 715 do { 716 Instruction *I, *UsedI; 717 llvm::tie(UsedI, I) = Uses.pop_back_val(); 718 719 if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 720 Size = std::max(Size, DL.getTypeStoreSize(LI->getType())); 721 continue; 722 } 723 if (StoreInst *SI = dyn_cast<StoreInst>(I)) { 724 Value *Op = SI->getOperand(0); 725 if (Op == UsedI) 726 return SI; 727 Size = std::max(Size, DL.getTypeStoreSize(Op->getType())); 728 continue; 729 } 730 731 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) { 732 if (!GEP->hasAllZeroIndices()) 733 return GEP; 734 } else if (!isa<BitCastInst>(I) && !isa<PHINode>(I) && 735 !isa<SelectInst>(I)) { 736 return I; 737 } 738 739 for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); UI != UE; 740 ++UI) 741 if (Visited.insert(cast<Instruction>(*UI))) 742 Uses.push_back(std::make_pair(I, cast<Instruction>(*UI))); 743 } while (!Uses.empty()); 744 745 return 0; 746 } 747 748 void visitPHINode(PHINode &PN) { 749 if (PN.use_empty()) 750 return; 751 if (!IsOffsetKnown) 752 return PI.setAborted(&PN); 753 754 // See if we already have computed info on this node. 755 std::pair<uint64_t, bool> &PHIInfo = P.PHIOrSelectSizes[&PN]; 756 if (PHIInfo.first) { 757 PHIInfo.second = true; 758 insertUse(PN, Offset, PHIInfo.first); 759 return; 760 } 761 762 // Check for an unsafe use of the PHI node. 763 if (Instruction *UnsafeI = hasUnsafePHIOrSelectUse(&PN, PHIInfo.first)) 764 return PI.setAborted(UnsafeI); 765 766 insertUse(PN, Offset, PHIInfo.first); 767 } 768 769 void visitSelectInst(SelectInst &SI) { 770 if (SI.use_empty()) 771 return; 772 if (Value *Result = foldSelectInst(SI)) { 773 if (Result == *U) 774 // If the result of the constant fold will be the pointer, recurse 775 // through the select as if we had RAUW'ed it. 776 enqueueUsers(SI); 777 778 return; 779 } 780 if (!IsOffsetKnown) 781 return PI.setAborted(&SI); 782 783 // See if we already have computed info on this node. 784 std::pair<uint64_t, bool> &SelectInfo = P.PHIOrSelectSizes[&SI]; 785 if (SelectInfo.first) { 786 SelectInfo.second = true; 787 insertUse(SI, Offset, SelectInfo.first); 788 return; 789 } 790 791 // Check for an unsafe use of the PHI node. 792 if (Instruction *UnsafeI = hasUnsafePHIOrSelectUse(&SI, SelectInfo.first)) 793 return PI.setAborted(UnsafeI); 794 795 insertUse(SI, Offset, SelectInfo.first); 796 } 797 798 /// \brief Disable SROA entirely if there are unhandled users of the alloca. 799 void visitInstruction(Instruction &I) { 800 PI.setAborted(&I); 801 } 802}; 803 804/// \brief Use adder for the alloca partitioning. 805/// 806/// This class adds the uses of an alloca to all of the partitions which they 807/// use. For splittable partitions, this can end up doing essentially a linear 808/// walk of the partitions, but the number of steps remains bounded by the 809/// total result instruction size: 810/// - The number of partitions is a result of the number unsplittable 811/// instructions using the alloca. 812/// - The number of users of each partition is at worst the total number of 813/// splittable instructions using the alloca. 814/// Thus we will produce N * M instructions in the end, where N are the number 815/// of unsplittable uses and M are the number of splittable. This visitor does 816/// the exact same number of updates to the partitioning. 817/// 818/// In the more common case, this visitor will leverage the fact that the 819/// partition space is pre-sorted, and do a logarithmic search for the 820/// partition needed, making the total visit a classical ((N + M) * log(N)) 821/// complexity operation. 822class AllocaPartitioning::UseBuilder : public PtrUseVisitor<UseBuilder> { 823 friend class PtrUseVisitor<UseBuilder>; 824 friend class InstVisitor<UseBuilder>; 825 typedef PtrUseVisitor<UseBuilder> Base; 826 827 const uint64_t AllocSize; 828 AllocaPartitioning &P; 829 830 /// \brief Set to de-duplicate dead instructions found in the use walk. 831 SmallPtrSet<Instruction *, 4> VisitedDeadInsts; 832 833public: 834 UseBuilder(const DataLayout &TD, AllocaInst &AI, AllocaPartitioning &P) 835 : PtrUseVisitor<UseBuilder>(TD), 836 AllocSize(TD.getTypeAllocSize(AI.getAllocatedType())), 837 P(P) {} 838 839private: 840 void markAsDead(Instruction &I) { 841 if (VisitedDeadInsts.insert(&I)) 842 P.DeadUsers.push_back(&I); 843 } 844 845 void insertUse(Instruction &User, const APInt &Offset, uint64_t Size) { 846 // If the use has a zero size or extends outside of the allocation, record 847 // it as a dead use for elimination later. 848 if (Size == 0 || Offset.isNegative() || Offset.uge(AllocSize)) 849 return markAsDead(User); 850 851 uint64_t BeginOffset = Offset.getZExtValue(); 852 uint64_t EndOffset = BeginOffset + Size; 853 854 // Clamp the end offset to the end of the allocation. Note that this is 855 // formulated to handle even the case where "BeginOffset + Size" overflows. 856 assert(AllocSize >= BeginOffset); // Established above. 857 if (Size > AllocSize - BeginOffset) 858 EndOffset = AllocSize; 859 860 // NB: This only works if we have zero overlapping partitions. 861 iterator I = std::lower_bound(P.begin(), P.end(), BeginOffset); 862 if (I != P.begin() && llvm::prior(I)->EndOffset > BeginOffset) 863 I = llvm::prior(I); 864 iterator E = P.end(); 865 bool IsSplit = llvm::next(I) != E && llvm::next(I)->BeginOffset < EndOffset; 866 for (; I != E && I->BeginOffset < EndOffset; ++I) { 867 PartitionUse NewPU(std::max(I->BeginOffset, BeginOffset), 868 std::min(I->EndOffset, EndOffset), U, IsSplit); 869 P.use_push_back(I, NewPU); 870 if (isa<PHINode>(U->getUser()) || isa<SelectInst>(U->getUser())) 871 P.PHIOrSelectOpMap[U] 872 = std::make_pair(I - P.begin(), P.Uses[I - P.begin()].size() - 1); 873 } 874 } 875 876 void visitBitCastInst(BitCastInst &BC) { 877 if (BC.use_empty()) 878 return markAsDead(BC); 879 880 return Base::visitBitCastInst(BC); 881 } 882 883 void visitGetElementPtrInst(GetElementPtrInst &GEPI) { 884 if (GEPI.use_empty()) 885 return markAsDead(GEPI); 886 887 return Base::visitGetElementPtrInst(GEPI); 888 } 889 890 void visitLoadInst(LoadInst &LI) { 891 assert(IsOffsetKnown); 892 uint64_t Size = DL.getTypeStoreSize(LI.getType()); 893 insertUse(LI, Offset, Size); 894 } 895 896 void visitStoreInst(StoreInst &SI) { 897 assert(IsOffsetKnown); 898 uint64_t Size = DL.getTypeStoreSize(SI.getOperand(0)->getType()); 899 900 // If this memory access can be shown to *statically* extend outside the 901 // bounds of of the allocation, it's behavior is undefined, so simply 902 // ignore it. Note that this is more strict than the generic clamping 903 // behavior of insertUse. 904 if (Offset.isNegative() || Size > AllocSize || 905 Offset.ugt(AllocSize - Size)) 906 return markAsDead(SI); 907 908 insertUse(SI, Offset, Size); 909 } 910 911 void visitMemSetInst(MemSetInst &II) { 912 ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength()); 913 if ((Length && Length->getValue() == 0) || 914 (IsOffsetKnown && !Offset.isNegative() && Offset.uge(AllocSize))) 915 return markAsDead(II); 916 917 assert(IsOffsetKnown); 918 insertUse(II, Offset, Length ? Length->getLimitedValue() 919 : AllocSize - Offset.getLimitedValue()); 920 } 921 922 void visitMemTransferInst(MemTransferInst &II) { 923 ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength()); 924 if ((Length && Length->getValue() == 0) || 925 (IsOffsetKnown && !Offset.isNegative() && Offset.uge(AllocSize))) 926 return markAsDead(II); 927 928 assert(IsOffsetKnown); 929 uint64_t Size = Length ? Length->getLimitedValue() 930 : AllocSize - Offset.getLimitedValue(); 931 932 const MemTransferOffsets &Offsets = P.MemTransferInstData[&II]; 933 if (!II.isVolatile() && Offsets.DestEnd && Offsets.SourceEnd && 934 Offsets.DestBegin == Offsets.SourceBegin) 935 return markAsDead(II); // Skip identity transfers without side-effects. 936 937 insertUse(II, Offset, Size); 938 } 939 940 void visitIntrinsicInst(IntrinsicInst &II) { 941 assert(IsOffsetKnown); 942 assert(II.getIntrinsicID() == Intrinsic::lifetime_start || 943 II.getIntrinsicID() == Intrinsic::lifetime_end); 944 945 ConstantInt *Length = cast<ConstantInt>(II.getArgOperand(0)); 946 insertUse(II, Offset, std::min(Length->getLimitedValue(), 947 AllocSize - Offset.getLimitedValue())); 948 } 949 950 void insertPHIOrSelect(Instruction &User, const APInt &Offset) { 951 uint64_t Size = P.PHIOrSelectSizes.lookup(&User).first; 952 953 // For PHI and select operands outside the alloca, we can't nuke the entire 954 // phi or select -- the other side might still be relevant, so we special 955 // case them here and use a separate structure to track the operands 956 // themselves which should be replaced with undef. 957 if ((Offset.isNegative() && Offset.uge(Size)) || 958 (!Offset.isNegative() && Offset.uge(AllocSize))) { 959 P.DeadOperands.push_back(U); 960 return; 961 } 962 963 insertUse(User, Offset, Size); 964 } 965 966 void visitPHINode(PHINode &PN) { 967 if (PN.use_empty()) 968 return markAsDead(PN); 969 970 assert(IsOffsetKnown); 971 insertPHIOrSelect(PN, Offset); 972 } 973 974 void visitSelectInst(SelectInst &SI) { 975 if (SI.use_empty()) 976 return markAsDead(SI); 977 978 if (Value *Result = foldSelectInst(SI)) { 979 if (Result == *U) 980 // If the result of the constant fold will be the pointer, recurse 981 // through the select as if we had RAUW'ed it. 982 enqueueUsers(SI); 983 else 984 // Otherwise the operand to the select is dead, and we can replace it 985 // with undef. 986 P.DeadOperands.push_back(U); 987 988 return; 989 } 990 991 assert(IsOffsetKnown); 992 insertPHIOrSelect(SI, Offset); 993 } 994 995 /// \brief Unreachable, we've already visited the alloca once. 996 void visitInstruction(Instruction &I) { 997 llvm_unreachable("Unhandled instruction in use builder."); 998 } 999}; 1000 1001void AllocaPartitioning::splitAndMergePartitions() { 1002 size_t NumDeadPartitions = 0; 1003 1004 // Track the range of splittable partitions that we pass when accumulating 1005 // overlapping unsplittable partitions. 1006 uint64_t SplitEndOffset = 0ull; 1007 1008 Partition New(0ull, 0ull, false); 1009 1010 for (unsigned i = 0, j = i, e = Partitions.size(); i != e; i = j) { 1011 ++j; 1012 1013 if (!Partitions[i].IsSplittable || New.BeginOffset == New.EndOffset) { 1014 assert(New.BeginOffset == New.EndOffset); 1015 New = Partitions[i]; 1016 } else { 1017 assert(New.IsSplittable); 1018 New.EndOffset = std::max(New.EndOffset, Partitions[i].EndOffset); 1019 } 1020 assert(New.BeginOffset != New.EndOffset); 1021 1022 // Scan the overlapping partitions. 1023 while (j != e && New.EndOffset > Partitions[j].BeginOffset) { 1024 // If the new partition we are forming is splittable, stop at the first 1025 // unsplittable partition. 1026 if (New.IsSplittable && !Partitions[j].IsSplittable) 1027 break; 1028 1029 // Grow the new partition to include any equally splittable range. 'j' is 1030 // always equally splittable when New is splittable, but when New is not 1031 // splittable, we may subsume some (or part of some) splitable partition 1032 // without growing the new one. 1033 if (New.IsSplittable == Partitions[j].IsSplittable) { 1034 New.EndOffset = std::max(New.EndOffset, Partitions[j].EndOffset); 1035 } else { 1036 assert(!New.IsSplittable); 1037 assert(Partitions[j].IsSplittable); 1038 SplitEndOffset = std::max(SplitEndOffset, Partitions[j].EndOffset); 1039 } 1040 1041 Partitions[j].kill(); 1042 ++NumDeadPartitions; 1043 ++j; 1044 } 1045 1046 // If the new partition is splittable, chop off the end as soon as the 1047 // unsplittable subsequent partition starts and ensure we eventually cover 1048 // the splittable area. 1049 if (j != e && New.IsSplittable) { 1050 SplitEndOffset = std::max(SplitEndOffset, New.EndOffset); 1051 New.EndOffset = std::min(New.EndOffset, Partitions[j].BeginOffset); 1052 } 1053 1054 // Add the new partition if it differs from the original one and is 1055 // non-empty. We can end up with an empty partition here if it was 1056 // splittable but there is an unsplittable one that starts at the same 1057 // offset. 1058 if (New != Partitions[i]) { 1059 if (New.BeginOffset != New.EndOffset) 1060 Partitions.push_back(New); 1061 // Mark the old one for removal. 1062 Partitions[i].kill(); 1063 ++NumDeadPartitions; 1064 } 1065 1066 New.BeginOffset = New.EndOffset; 1067 if (!New.IsSplittable) { 1068 New.EndOffset = std::max(New.EndOffset, SplitEndOffset); 1069 if (j != e && !Partitions[j].IsSplittable) 1070 New.EndOffset = std::min(New.EndOffset, Partitions[j].BeginOffset); 1071 New.IsSplittable = true; 1072 // If there is a trailing splittable partition which won't be fused into 1073 // the next splittable partition go ahead and add it onto the partitions 1074 // list. 1075 if (New.BeginOffset < New.EndOffset && 1076 (j == e || !Partitions[j].IsSplittable || 1077 New.EndOffset < Partitions[j].BeginOffset)) { 1078 Partitions.push_back(New); 1079 New.BeginOffset = New.EndOffset = 0ull; 1080 } 1081 } 1082 } 1083 1084 // Re-sort the partitions now that they have been split and merged into 1085 // disjoint set of partitions. Also remove any of the dead partitions we've 1086 // replaced in the process. 1087 std::sort(Partitions.begin(), Partitions.end()); 1088 if (NumDeadPartitions) { 1089 assert(Partitions.back().isDead()); 1090 assert((ptrdiff_t)NumDeadPartitions == 1091 std::count(Partitions.begin(), Partitions.end(), Partitions.back())); 1092 } 1093 Partitions.erase(Partitions.end() - NumDeadPartitions, Partitions.end()); 1094} 1095 1096AllocaPartitioning::AllocaPartitioning(const DataLayout &TD, AllocaInst &AI) 1097 : 1098#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1099 AI(AI), 1100#endif 1101 PointerEscapingInstr(0) { 1102 PartitionBuilder PB(TD, AI, *this); 1103 PartitionBuilder::PtrInfo PtrI = PB.visitPtr(AI); 1104 if (PtrI.isEscaped() || PtrI.isAborted()) { 1105 // FIXME: We should sink the escape vs. abort info into the caller nicely, 1106 // possibly by just storing the PtrInfo in the AllocaPartitioning. 1107 PointerEscapingInstr = PtrI.getEscapingInst() ? PtrI.getEscapingInst() 1108 : PtrI.getAbortingInst(); 1109 assert(PointerEscapingInstr && "Did not track a bad instruction"); 1110 return; 1111 } 1112 1113 // Sort the uses. This arranges for the offsets to be in ascending order, 1114 // and the sizes to be in descending order. 1115 std::sort(Partitions.begin(), Partitions.end()); 1116 1117 // Remove any partitions from the back which are marked as dead. 1118 while (!Partitions.empty() && Partitions.back().isDead()) 1119 Partitions.pop_back(); 1120 1121 if (Partitions.size() > 1) { 1122 // Intersect splittability for all partitions with equal offsets and sizes. 1123 // Then remove all but the first so that we have a sequence of non-equal but 1124 // potentially overlapping partitions. 1125 for (iterator I = Partitions.begin(), J = I, E = Partitions.end(); I != E; 1126 I = J) { 1127 ++J; 1128 while (J != E && *I == *J) { 1129 I->IsSplittable &= J->IsSplittable; 1130 ++J; 1131 } 1132 } 1133 Partitions.erase(std::unique(Partitions.begin(), Partitions.end()), 1134 Partitions.end()); 1135 1136 // Split splittable and merge unsplittable partitions into a disjoint set 1137 // of partitions over the used space of the allocation. 1138 splitAndMergePartitions(); 1139 } 1140 1141 // Record how many partitions we end up with. 1142 NumAllocaPartitions += Partitions.size(); 1143 MaxPartitionsPerAlloca = std::max<unsigned>(Partitions.size(), MaxPartitionsPerAlloca); 1144 1145 // Now build up the user lists for each of these disjoint partitions by 1146 // re-walking the recursive users of the alloca. 1147 Uses.resize(Partitions.size()); 1148 UseBuilder UB(TD, AI, *this); 1149 PtrI = UB.visitPtr(AI); 1150 assert(!PtrI.isEscaped() && "Previously analyzed pointer now escapes!"); 1151 assert(!PtrI.isAborted() && "Early aborted the visit of the pointer."); 1152 1153 unsigned NumUses = 0; 1154#if !defined(NDEBUG) || defined(LLVM_ENABLE_STATS) 1155 for (unsigned Idx = 0, Size = Uses.size(); Idx != Size; ++Idx) 1156 NumUses += Uses[Idx].size(); 1157#endif 1158 NumAllocaPartitionUses += NumUses; 1159 MaxPartitionUsesPerAlloca = std::max<unsigned>(NumUses, MaxPartitionUsesPerAlloca); 1160} 1161 1162Type *AllocaPartitioning::getCommonType(iterator I) const { 1163 Type *Ty = 0; 1164 for (const_use_iterator UI = use_begin(I), UE = use_end(I); UI != UE; ++UI) { 1165 Use *U = UI->getUse(); 1166 if (!U) 1167 continue; // Skip dead uses. 1168 if (isa<IntrinsicInst>(*U->getUser())) 1169 continue; 1170 if (UI->BeginOffset != I->BeginOffset || UI->EndOffset != I->EndOffset) 1171 continue; 1172 1173 Type *UserTy = 0; 1174 if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) 1175 UserTy = LI->getType(); 1176 else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) 1177 UserTy = SI->getValueOperand()->getType(); 1178 else 1179 return 0; // Bail if we have weird uses. 1180 1181 if (IntegerType *ITy = dyn_cast<IntegerType>(UserTy)) { 1182 // If the type is larger than the partition, skip it. We only encounter 1183 // this for split integer operations where we want to use the type of the 1184 // entity causing the split. 1185 if (ITy->getBitWidth() > (I->EndOffset - I->BeginOffset)*8) 1186 continue; 1187 1188 // If we have found an integer type use covering the alloca, use that 1189 // regardless of the other types, as integers are often used for a "bucket 1190 // of bits" type. 1191 return ITy; 1192 } 1193 1194 if (Ty && Ty != UserTy) 1195 return 0; 1196 1197 Ty = UserTy; 1198 } 1199 return Ty; 1200} 1201 1202#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1203 1204void AllocaPartitioning::print(raw_ostream &OS, const_iterator I, 1205 StringRef Indent) const { 1206 OS << Indent << "partition #" << (I - begin()) 1207 << " [" << I->BeginOffset << "," << I->EndOffset << ")" 1208 << (I->IsSplittable ? " (splittable)" : "") 1209 << (Uses[I - begin()].empty() ? " (zero uses)" : "") 1210 << "\n"; 1211} 1212 1213void AllocaPartitioning::printUsers(raw_ostream &OS, const_iterator I, 1214 StringRef Indent) const { 1215 for (const_use_iterator UI = use_begin(I), UE = use_end(I); UI != UE; ++UI) { 1216 if (!UI->getUse()) 1217 continue; // Skip dead uses. 1218 OS << Indent << " [" << UI->BeginOffset << "," << UI->EndOffset << ") " 1219 << "used by: " << *UI->getUse()->getUser() << "\n"; 1220 if (MemTransferInst *II = 1221 dyn_cast<MemTransferInst>(UI->getUse()->getUser())) { 1222 const MemTransferOffsets &MTO = MemTransferInstData.lookup(II); 1223 bool IsDest; 1224 if (!MTO.IsSplittable) 1225 IsDest = UI->BeginOffset == MTO.DestBegin; 1226 else 1227 IsDest = MTO.DestBegin != 0u; 1228 OS << Indent << " (original " << (IsDest ? "dest" : "source") << ": " 1229 << "[" << (IsDest ? MTO.DestBegin : MTO.SourceBegin) 1230 << "," << (IsDest ? MTO.DestEnd : MTO.SourceEnd) << ")\n"; 1231 } 1232 } 1233} 1234 1235void AllocaPartitioning::print(raw_ostream &OS) const { 1236 if (PointerEscapingInstr) { 1237 OS << "No partitioning for alloca: " << AI << "\n" 1238 << " A pointer to this alloca escaped by:\n" 1239 << " " << *PointerEscapingInstr << "\n"; 1240 return; 1241 } 1242 1243 OS << "Partitioning of alloca: " << AI << "\n"; 1244 for (const_iterator I = begin(), E = end(); I != E; ++I) { 1245 print(OS, I); 1246 printUsers(OS, I); 1247 } 1248} 1249 1250void AllocaPartitioning::dump(const_iterator I) const { print(dbgs(), I); } 1251void AllocaPartitioning::dump() const { print(dbgs()); } 1252 1253#endif // !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1254 1255 1256namespace { 1257/// \brief Implementation of LoadAndStorePromoter for promoting allocas. 1258/// 1259/// This subclass of LoadAndStorePromoter adds overrides to handle promoting 1260/// the loads and stores of an alloca instruction, as well as updating its 1261/// debug information. This is used when a domtree is unavailable and thus 1262/// mem2reg in its full form can't be used to handle promotion of allocas to 1263/// scalar values. 1264class AllocaPromoter : public LoadAndStorePromoter { 1265 AllocaInst &AI; 1266 DIBuilder &DIB; 1267 1268 SmallVector<DbgDeclareInst *, 4> DDIs; 1269 SmallVector<DbgValueInst *, 4> DVIs; 1270 1271public: 1272 AllocaPromoter(const SmallVectorImpl<Instruction*> &Insts, SSAUpdater &S, 1273 AllocaInst &AI, DIBuilder &DIB) 1274 : LoadAndStorePromoter(Insts, S), AI(AI), DIB(DIB) {} 1275 1276 void run(const SmallVectorImpl<Instruction*> &Insts) { 1277 // Remember which alloca we're promoting (for isInstInList). 1278 if (MDNode *DebugNode = MDNode::getIfExists(AI.getContext(), &AI)) { 1279 for (Value::use_iterator UI = DebugNode->use_begin(), 1280 UE = DebugNode->use_end(); 1281 UI != UE; ++UI) 1282 if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(*UI)) 1283 DDIs.push_back(DDI); 1284 else if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(*UI)) 1285 DVIs.push_back(DVI); 1286 } 1287 1288 LoadAndStorePromoter::run(Insts); 1289 AI.eraseFromParent(); 1290 while (!DDIs.empty()) 1291 DDIs.pop_back_val()->eraseFromParent(); 1292 while (!DVIs.empty()) 1293 DVIs.pop_back_val()->eraseFromParent(); 1294 } 1295 1296 virtual bool isInstInList(Instruction *I, 1297 const SmallVectorImpl<Instruction*> &Insts) const { 1298 if (LoadInst *LI = dyn_cast<LoadInst>(I)) 1299 return LI->getOperand(0) == &AI; 1300 return cast<StoreInst>(I)->getPointerOperand() == &AI; 1301 } 1302 1303 virtual void updateDebugInfo(Instruction *Inst) const { 1304 for (SmallVector<DbgDeclareInst *, 4>::const_iterator I = DDIs.begin(), 1305 E = DDIs.end(); I != E; ++I) { 1306 DbgDeclareInst *DDI = *I; 1307 if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) 1308 ConvertDebugDeclareToDebugValue(DDI, SI, DIB); 1309 else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) 1310 ConvertDebugDeclareToDebugValue(DDI, LI, DIB); 1311 } 1312 for (SmallVector<DbgValueInst *, 4>::const_iterator I = DVIs.begin(), 1313 E = DVIs.end(); I != E; ++I) { 1314 DbgValueInst *DVI = *I; 1315 Value *Arg = 0; 1316 if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { 1317 // If an argument is zero extended then use argument directly. The ZExt 1318 // may be zapped by an optimization pass in future. 1319 if (ZExtInst *ZExt = dyn_cast<ZExtInst>(SI->getOperand(0))) 1320 Arg = dyn_cast<Argument>(ZExt->getOperand(0)); 1321 else if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0))) 1322 Arg = dyn_cast<Argument>(SExt->getOperand(0)); 1323 if (!Arg) 1324 Arg = SI->getValueOperand(); 1325 } else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) { 1326 Arg = LI->getPointerOperand(); 1327 } else { 1328 continue; 1329 } 1330 Instruction *DbgVal = 1331 DIB.insertDbgValueIntrinsic(Arg, 0, DIVariable(DVI->getVariable()), 1332 Inst); 1333 DbgVal->setDebugLoc(DVI->getDebugLoc()); 1334 } 1335 } 1336}; 1337} // end anon namespace 1338 1339 1340namespace { 1341/// \brief An optimization pass providing Scalar Replacement of Aggregates. 1342/// 1343/// This pass takes allocations which can be completely analyzed (that is, they 1344/// don't escape) and tries to turn them into scalar SSA values. There are 1345/// a few steps to this process. 1346/// 1347/// 1) It takes allocations of aggregates and analyzes the ways in which they 1348/// are used to try to split them into smaller allocations, ideally of 1349/// a single scalar data type. It will split up memcpy and memset accesses 1350/// as necessary and try to isolate individual scalar accesses. 1351/// 2) It will transform accesses into forms which are suitable for SSA value 1352/// promotion. This can be replacing a memset with a scalar store of an 1353/// integer value, or it can involve speculating operations on a PHI or 1354/// select to be a PHI or select of the results. 1355/// 3) Finally, this will try to detect a pattern of accesses which map cleanly 1356/// onto insert and extract operations on a vector value, and convert them to 1357/// this form. By doing so, it will enable promotion of vector aggregates to 1358/// SSA vector values. 1359class SROA : public FunctionPass { 1360 const bool RequiresDomTree; 1361 1362 LLVMContext *C; 1363 const DataLayout *TD; 1364 DominatorTree *DT; 1365 1366 /// \brief Worklist of alloca instructions to simplify. 1367 /// 1368 /// Each alloca in the function is added to this. Each new alloca formed gets 1369 /// added to it as well to recursively simplify unless that alloca can be 1370 /// directly promoted. Finally, each time we rewrite a use of an alloca other 1371 /// the one being actively rewritten, we add it back onto the list if not 1372 /// already present to ensure it is re-visited. 1373 SetVector<AllocaInst *, SmallVector<AllocaInst *, 16> > Worklist; 1374 1375 /// \brief A collection of instructions to delete. 1376 /// We try to batch deletions to simplify code and make things a bit more 1377 /// efficient. 1378 SetVector<Instruction *, SmallVector<Instruction *, 8> > DeadInsts; 1379 1380 /// \brief Post-promotion worklist. 1381 /// 1382 /// Sometimes we discover an alloca which has a high probability of becoming 1383 /// viable for SROA after a round of promotion takes place. In those cases, 1384 /// the alloca is enqueued here for re-processing. 1385 /// 1386 /// Note that we have to be very careful to clear allocas out of this list in 1387 /// the event they are deleted. 1388 SetVector<AllocaInst *, SmallVector<AllocaInst *, 16> > PostPromotionWorklist; 1389 1390 /// \brief A collection of alloca instructions we can directly promote. 1391 std::vector<AllocaInst *> PromotableAllocas; 1392 1393public: 1394 SROA(bool RequiresDomTree = true) 1395 : FunctionPass(ID), RequiresDomTree(RequiresDomTree), 1396 C(0), TD(0), DT(0) { 1397 initializeSROAPass(*PassRegistry::getPassRegistry()); 1398 } 1399 bool runOnFunction(Function &F); 1400 void getAnalysisUsage(AnalysisUsage &AU) const; 1401 1402 const char *getPassName() const { return "SROA"; } 1403 static char ID; 1404 1405private: 1406 friend class PHIOrSelectSpeculator; 1407 friend class AllocaPartitionRewriter; 1408 friend class AllocaPartitionVectorRewriter; 1409 1410 bool rewriteAllocaPartition(AllocaInst &AI, 1411 AllocaPartitioning &P, 1412 AllocaPartitioning::iterator PI); 1413 bool splitAlloca(AllocaInst &AI, AllocaPartitioning &P); 1414 bool runOnAlloca(AllocaInst &AI); 1415 void deleteDeadInstructions(SmallPtrSet<AllocaInst *, 4> &DeletedAllocas); 1416 bool promoteAllocas(Function &F); 1417}; 1418} 1419 1420char SROA::ID = 0; 1421 1422FunctionPass *llvm::createSROAPass(bool RequiresDomTree) { 1423 return new SROA(RequiresDomTree); 1424} 1425 1426INITIALIZE_PASS_BEGIN(SROA, "sroa", "Scalar Replacement Of Aggregates", 1427 false, false) 1428INITIALIZE_PASS_DEPENDENCY(DominatorTree) 1429INITIALIZE_PASS_END(SROA, "sroa", "Scalar Replacement Of Aggregates", 1430 false, false) 1431 1432namespace { 1433/// \brief Visitor to speculate PHIs and Selects where possible. 1434class PHIOrSelectSpeculator : public InstVisitor<PHIOrSelectSpeculator> { 1435 // Befriend the base class so it can delegate to private visit methods. 1436 friend class llvm::InstVisitor<PHIOrSelectSpeculator>; 1437 1438 const DataLayout &TD; 1439 AllocaPartitioning &P; 1440 SROA &Pass; 1441 1442public: 1443 PHIOrSelectSpeculator(const DataLayout &TD, AllocaPartitioning &P, SROA &Pass) 1444 : TD(TD), P(P), Pass(Pass) {} 1445 1446 /// \brief Visit the users of an alloca partition and rewrite them. 1447 void visitUsers(AllocaPartitioning::const_iterator PI) { 1448 // Note that we need to use an index here as the underlying vector of uses 1449 // may be grown during speculation. However, we never need to re-visit the 1450 // new uses, and so we can use the initial size bound. 1451 for (unsigned Idx = 0, Size = P.use_size(PI); Idx != Size; ++Idx) { 1452 const PartitionUse &PU = P.getUse(PI, Idx); 1453 if (!PU.getUse()) 1454 continue; // Skip dead use. 1455 1456 visit(cast<Instruction>(PU.getUse()->getUser())); 1457 } 1458 } 1459 1460private: 1461 // By default, skip this instruction. 1462 void visitInstruction(Instruction &I) {} 1463 1464 /// PHI instructions that use an alloca and are subsequently loaded can be 1465 /// rewritten to load both input pointers in the pred blocks and then PHI the 1466 /// results, allowing the load of the alloca to be promoted. 1467 /// From this: 1468 /// %P2 = phi [i32* %Alloca, i32* %Other] 1469 /// %V = load i32* %P2 1470 /// to: 1471 /// %V1 = load i32* %Alloca -> will be mem2reg'd 1472 /// ... 1473 /// %V2 = load i32* %Other 1474 /// ... 1475 /// %V = phi [i32 %V1, i32 %V2] 1476 /// 1477 /// We can do this to a select if its only uses are loads and if the operands 1478 /// to the select can be loaded unconditionally. 1479 /// 1480 /// FIXME: This should be hoisted into a generic utility, likely in 1481 /// Transforms/Util/Local.h 1482 bool isSafePHIToSpeculate(PHINode &PN, SmallVectorImpl<LoadInst *> &Loads) { 1483 // For now, we can only do this promotion if the load is in the same block 1484 // as the PHI, and if there are no stores between the phi and load. 1485 // TODO: Allow recursive phi users. 1486 // TODO: Allow stores. 1487 BasicBlock *BB = PN.getParent(); 1488 unsigned MaxAlign = 0; 1489 for (Value::use_iterator UI = PN.use_begin(), UE = PN.use_end(); 1490 UI != UE; ++UI) { 1491 LoadInst *LI = dyn_cast<LoadInst>(*UI); 1492 if (LI == 0 || !LI->isSimple()) return false; 1493 1494 // For now we only allow loads in the same block as the PHI. This is 1495 // a common case that happens when instcombine merges two loads through 1496 // a PHI. 1497 if (LI->getParent() != BB) return false; 1498 1499 // Ensure that there are no instructions between the PHI and the load that 1500 // could store. 1501 for (BasicBlock::iterator BBI = &PN; &*BBI != LI; ++BBI) 1502 if (BBI->mayWriteToMemory()) 1503 return false; 1504 1505 MaxAlign = std::max(MaxAlign, LI->getAlignment()); 1506 Loads.push_back(LI); 1507 } 1508 1509 // We can only transform this if it is safe to push the loads into the 1510 // predecessor blocks. The only thing to watch out for is that we can't put 1511 // a possibly trapping load in the predecessor if it is a critical edge. 1512 for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num; ++Idx) { 1513 TerminatorInst *TI = PN.getIncomingBlock(Idx)->getTerminator(); 1514 Value *InVal = PN.getIncomingValue(Idx); 1515 1516 // If the value is produced by the terminator of the predecessor (an 1517 // invoke) or it has side-effects, there is no valid place to put a load 1518 // in the predecessor. 1519 if (TI == InVal || TI->mayHaveSideEffects()) 1520 return false; 1521 1522 // If the predecessor has a single successor, then the edge isn't 1523 // critical. 1524 if (TI->getNumSuccessors() == 1) 1525 continue; 1526 1527 // If this pointer is always safe to load, or if we can prove that there 1528 // is already a load in the block, then we can move the load to the pred 1529 // block. 1530 if (InVal->isDereferenceablePointer() || 1531 isSafeToLoadUnconditionally(InVal, TI, MaxAlign, &TD)) 1532 continue; 1533 1534 return false; 1535 } 1536 1537 return true; 1538 } 1539 1540 void visitPHINode(PHINode &PN) { 1541 DEBUG(dbgs() << " original: " << PN << "\n"); 1542 1543 SmallVector<LoadInst *, 4> Loads; 1544 if (!isSafePHIToSpeculate(PN, Loads)) 1545 return; 1546 1547 assert(!Loads.empty()); 1548 1549 Type *LoadTy = cast<PointerType>(PN.getType())->getElementType(); 1550 IRBuilderTy PHIBuilder(&PN); 1551 PHINode *NewPN = PHIBuilder.CreatePHI(LoadTy, PN.getNumIncomingValues(), 1552 PN.getName() + ".sroa.speculated"); 1553 1554 // Get the TBAA tag and alignment to use from one of the loads. It doesn't 1555 // matter which one we get and if any differ. 1556 LoadInst *SomeLoad = cast<LoadInst>(Loads.back()); 1557 MDNode *TBAATag = SomeLoad->getMetadata(LLVMContext::MD_tbaa); 1558 unsigned Align = SomeLoad->getAlignment(); 1559 1560 // Rewrite all loads of the PN to use the new PHI. 1561 do { 1562 LoadInst *LI = Loads.pop_back_val(); 1563 LI->replaceAllUsesWith(NewPN); 1564 Pass.DeadInsts.insert(LI); 1565 } while (!Loads.empty()); 1566 1567 // Inject loads into all of the pred blocks. 1568 for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num; ++Idx) { 1569 BasicBlock *Pred = PN.getIncomingBlock(Idx); 1570 TerminatorInst *TI = Pred->getTerminator(); 1571 Use *InUse = &PN.getOperandUse(PN.getOperandNumForIncomingValue(Idx)); 1572 Value *InVal = PN.getIncomingValue(Idx); 1573 IRBuilderTy PredBuilder(TI); 1574 1575 LoadInst *Load 1576 = PredBuilder.CreateLoad(InVal, (PN.getName() + ".sroa.speculate.load." + 1577 Pred->getName())); 1578 ++NumLoadsSpeculated; 1579 Load->setAlignment(Align); 1580 if (TBAATag) 1581 Load->setMetadata(LLVMContext::MD_tbaa, TBAATag); 1582 NewPN->addIncoming(Load, Pred); 1583 1584 Instruction *Ptr = dyn_cast<Instruction>(InVal); 1585 if (!Ptr) 1586 // No uses to rewrite. 1587 continue; 1588 1589 // Try to lookup and rewrite any partition uses corresponding to this phi 1590 // input. 1591 AllocaPartitioning::iterator PI 1592 = P.findPartitionForPHIOrSelectOperand(InUse); 1593 if (PI == P.end()) 1594 continue; 1595 1596 // Replace the Use in the PartitionUse for this operand with the Use 1597 // inside the load. 1598 AllocaPartitioning::use_iterator UI 1599 = P.findPartitionUseForPHIOrSelectOperand(InUse); 1600 assert(isa<PHINode>(*UI->getUse()->getUser())); 1601 UI->setUse(&Load->getOperandUse(Load->getPointerOperandIndex())); 1602 } 1603 DEBUG(dbgs() << " speculated to: " << *NewPN << "\n"); 1604 } 1605 1606 /// Select instructions that use an alloca and are subsequently loaded can be 1607 /// rewritten to load both input pointers and then select between the result, 1608 /// allowing the load of the alloca to be promoted. 1609 /// From this: 1610 /// %P2 = select i1 %cond, i32* %Alloca, i32* %Other 1611 /// %V = load i32* %P2 1612 /// to: 1613 /// %V1 = load i32* %Alloca -> will be mem2reg'd 1614 /// %V2 = load i32* %Other 1615 /// %V = select i1 %cond, i32 %V1, i32 %V2 1616 /// 1617 /// We can do this to a select if its only uses are loads and if the operand 1618 /// to the select can be loaded unconditionally. 1619 bool isSafeSelectToSpeculate(SelectInst &SI, 1620 SmallVectorImpl<LoadInst *> &Loads) { 1621 Value *TValue = SI.getTrueValue(); 1622 Value *FValue = SI.getFalseValue(); 1623 bool TDerefable = TValue->isDereferenceablePointer(); 1624 bool FDerefable = FValue->isDereferenceablePointer(); 1625 1626 for (Value::use_iterator UI = SI.use_begin(), UE = SI.use_end(); 1627 UI != UE; ++UI) { 1628 LoadInst *LI = dyn_cast<LoadInst>(*UI); 1629 if (LI == 0 || !LI->isSimple()) return false; 1630 1631 // Both operands to the select need to be dereferencable, either 1632 // absolutely (e.g. allocas) or at this point because we can see other 1633 // accesses to it. 1634 if (!TDerefable && !isSafeToLoadUnconditionally(TValue, LI, 1635 LI->getAlignment(), &TD)) 1636 return false; 1637 if (!FDerefable && !isSafeToLoadUnconditionally(FValue, LI, 1638 LI->getAlignment(), &TD)) 1639 return false; 1640 Loads.push_back(LI); 1641 } 1642 1643 return true; 1644 } 1645 1646 void visitSelectInst(SelectInst &SI) { 1647 DEBUG(dbgs() << " original: " << SI << "\n"); 1648 1649 // If the select isn't safe to speculate, just use simple logic to emit it. 1650 SmallVector<LoadInst *, 4> Loads; 1651 if (!isSafeSelectToSpeculate(SI, Loads)) 1652 return; 1653 1654 IRBuilderTy IRB(&SI); 1655 Use *Ops[2] = { &SI.getOperandUse(1), &SI.getOperandUse(2) }; 1656 AllocaPartitioning::iterator PIs[2]; 1657 PartitionUse PUs[2]; 1658 for (unsigned i = 0, e = 2; i != e; ++i) { 1659 PIs[i] = P.findPartitionForPHIOrSelectOperand(Ops[i]); 1660 if (PIs[i] != P.end()) { 1661 // If the pointer is within the partitioning, remove the select from 1662 // its uses. We'll add in the new loads below. 1663 AllocaPartitioning::use_iterator UI 1664 = P.findPartitionUseForPHIOrSelectOperand(Ops[i]); 1665 PUs[i] = *UI; 1666 // Clear out the use here so that the offsets into the use list remain 1667 // stable but this use is ignored when rewriting. 1668 UI->setUse(0); 1669 } 1670 } 1671 1672 Value *TV = SI.getTrueValue(); 1673 Value *FV = SI.getFalseValue(); 1674 // Replace the loads of the select with a select of two loads. 1675 while (!Loads.empty()) { 1676 LoadInst *LI = Loads.pop_back_val(); 1677 1678 IRB.SetInsertPoint(LI); 1679 LoadInst *TL = 1680 IRB.CreateLoad(TV, LI->getName() + ".sroa.speculate.load.true"); 1681 LoadInst *FL = 1682 IRB.CreateLoad(FV, LI->getName() + ".sroa.speculate.load.false"); 1683 NumLoadsSpeculated += 2; 1684 1685 // Transfer alignment and TBAA info if present. 1686 TL->setAlignment(LI->getAlignment()); 1687 FL->setAlignment(LI->getAlignment()); 1688 if (MDNode *Tag = LI->getMetadata(LLVMContext::MD_tbaa)) { 1689 TL->setMetadata(LLVMContext::MD_tbaa, Tag); 1690 FL->setMetadata(LLVMContext::MD_tbaa, Tag); 1691 } 1692 1693 Value *V = IRB.CreateSelect(SI.getCondition(), TL, FL, 1694 LI->getName() + ".sroa.speculated"); 1695 1696 LoadInst *Loads[2] = { TL, FL }; 1697 for (unsigned i = 0, e = 2; i != e; ++i) { 1698 if (PIs[i] != P.end()) { 1699 Use *LoadUse = &Loads[i]->getOperandUse(0); 1700 assert(PUs[i].getUse()->get() == LoadUse->get()); 1701 PUs[i].setUse(LoadUse); 1702 P.use_push_back(PIs[i], PUs[i]); 1703 } 1704 } 1705 1706 DEBUG(dbgs() << " speculated to: " << *V << "\n"); 1707 LI->replaceAllUsesWith(V); 1708 Pass.DeadInsts.insert(LI); 1709 } 1710 } 1711}; 1712} 1713 1714/// \brief Build a GEP out of a base pointer and indices. 1715/// 1716/// This will return the BasePtr if that is valid, or build a new GEP 1717/// instruction using the IRBuilder if GEP-ing is needed. 1718static Value *buildGEP(IRBuilderTy &IRB, Value *BasePtr, 1719 SmallVectorImpl<Value *> &Indices) { 1720 if (Indices.empty()) 1721 return BasePtr; 1722 1723 // A single zero index is a no-op, so check for this and avoid building a GEP 1724 // in that case. 1725 if (Indices.size() == 1 && cast<ConstantInt>(Indices.back())->isZero()) 1726 return BasePtr; 1727 1728 return IRB.CreateInBoundsGEP(BasePtr, Indices, "idx"); 1729} 1730 1731/// \brief Get a natural GEP off of the BasePtr walking through Ty toward 1732/// TargetTy without changing the offset of the pointer. 1733/// 1734/// This routine assumes we've already established a properly offset GEP with 1735/// Indices, and arrived at the Ty type. The goal is to continue to GEP with 1736/// zero-indices down through type layers until we find one the same as 1737/// TargetTy. If we can't find one with the same type, we at least try to use 1738/// one with the same size. If none of that works, we just produce the GEP as 1739/// indicated by Indices to have the correct offset. 1740static Value *getNaturalGEPWithType(IRBuilderTy &IRB, const DataLayout &TD, 1741 Value *BasePtr, Type *Ty, Type *TargetTy, 1742 SmallVectorImpl<Value *> &Indices) { 1743 if (Ty == TargetTy) 1744 return buildGEP(IRB, BasePtr, Indices); 1745 1746 // See if we can descend into a struct and locate a field with the correct 1747 // type. 1748 unsigned NumLayers = 0; 1749 Type *ElementTy = Ty; 1750 do { 1751 if (ElementTy->isPointerTy()) 1752 break; 1753 if (SequentialType *SeqTy = dyn_cast<SequentialType>(ElementTy)) { 1754 ElementTy = SeqTy->getElementType(); 1755 // Note that we use the default address space as this index is over an 1756 // array or a vector, not a pointer. 1757 Indices.push_back(IRB.getInt(APInt(TD.getPointerSizeInBits(0), 0))); 1758 } else if (StructType *STy = dyn_cast<StructType>(ElementTy)) { 1759 if (STy->element_begin() == STy->element_end()) 1760 break; // Nothing left to descend into. 1761 ElementTy = *STy->element_begin(); 1762 Indices.push_back(IRB.getInt32(0)); 1763 } else { 1764 break; 1765 } 1766 ++NumLayers; 1767 } while (ElementTy != TargetTy); 1768 if (ElementTy != TargetTy) 1769 Indices.erase(Indices.end() - NumLayers, Indices.end()); 1770 1771 return buildGEP(IRB, BasePtr, Indices); 1772} 1773 1774/// \brief Recursively compute indices for a natural GEP. 1775/// 1776/// This is the recursive step for getNaturalGEPWithOffset that walks down the 1777/// element types adding appropriate indices for the GEP. 1778static Value *getNaturalGEPRecursively(IRBuilderTy &IRB, const DataLayout &TD, 1779 Value *Ptr, Type *Ty, APInt &Offset, 1780 Type *TargetTy, 1781 SmallVectorImpl<Value *> &Indices) { 1782 if (Offset == 0) 1783 return getNaturalGEPWithType(IRB, TD, Ptr, Ty, TargetTy, Indices); 1784 1785 // We can't recurse through pointer types. 1786 if (Ty->isPointerTy()) 1787 return 0; 1788 1789 // We try to analyze GEPs over vectors here, but note that these GEPs are 1790 // extremely poorly defined currently. The long-term goal is to remove GEPing 1791 // over a vector from the IR completely. 1792 if (VectorType *VecTy = dyn_cast<VectorType>(Ty)) { 1793 unsigned ElementSizeInBits = TD.getTypeSizeInBits(VecTy->getScalarType()); 1794 if (ElementSizeInBits % 8) 1795 return 0; // GEPs over non-multiple of 8 size vector elements are invalid. 1796 APInt ElementSize(Offset.getBitWidth(), ElementSizeInBits / 8); 1797 APInt NumSkippedElements = Offset.sdiv(ElementSize); 1798 if (NumSkippedElements.ugt(VecTy->getNumElements())) 1799 return 0; 1800 Offset -= NumSkippedElements * ElementSize; 1801 Indices.push_back(IRB.getInt(NumSkippedElements)); 1802 return getNaturalGEPRecursively(IRB, TD, Ptr, VecTy->getElementType(), 1803 Offset, TargetTy, Indices); 1804 } 1805 1806 if (ArrayType *ArrTy = dyn_cast<ArrayType>(Ty)) { 1807 Type *ElementTy = ArrTy->getElementType(); 1808 APInt ElementSize(Offset.getBitWidth(), TD.getTypeAllocSize(ElementTy)); 1809 APInt NumSkippedElements = Offset.sdiv(ElementSize); 1810 if (NumSkippedElements.ugt(ArrTy->getNumElements())) 1811 return 0; 1812 1813 Offset -= NumSkippedElements * ElementSize; 1814 Indices.push_back(IRB.getInt(NumSkippedElements)); 1815 return getNaturalGEPRecursively(IRB, TD, Ptr, ElementTy, Offset, TargetTy, 1816 Indices); 1817 } 1818 1819 StructType *STy = dyn_cast<StructType>(Ty); 1820 if (!STy) 1821 return 0; 1822 1823 const StructLayout *SL = TD.getStructLayout(STy); 1824 uint64_t StructOffset = Offset.getZExtValue(); 1825 if (StructOffset >= SL->getSizeInBytes()) 1826 return 0; 1827 unsigned Index = SL->getElementContainingOffset(StructOffset); 1828 Offset -= APInt(Offset.getBitWidth(), SL->getElementOffset(Index)); 1829 Type *ElementTy = STy->getElementType(Index); 1830 if (Offset.uge(TD.getTypeAllocSize(ElementTy))) 1831 return 0; // The offset points into alignment padding. 1832 1833 Indices.push_back(IRB.getInt32(Index)); 1834 return getNaturalGEPRecursively(IRB, TD, Ptr, ElementTy, Offset, TargetTy, 1835 Indices); 1836} 1837 1838/// \brief Get a natural GEP from a base pointer to a particular offset and 1839/// resulting in a particular type. 1840/// 1841/// The goal is to produce a "natural" looking GEP that works with the existing 1842/// composite types to arrive at the appropriate offset and element type for 1843/// a pointer. TargetTy is the element type the returned GEP should point-to if 1844/// possible. We recurse by decreasing Offset, adding the appropriate index to 1845/// Indices, and setting Ty to the result subtype. 1846/// 1847/// If no natural GEP can be constructed, this function returns null. 1848static Value *getNaturalGEPWithOffset(IRBuilderTy &IRB, const DataLayout &TD, 1849 Value *Ptr, APInt Offset, Type *TargetTy, 1850 SmallVectorImpl<Value *> &Indices) { 1851 PointerType *Ty = cast<PointerType>(Ptr->getType()); 1852 1853 // Don't consider any GEPs through an i8* as natural unless the TargetTy is 1854 // an i8. 1855 if (Ty == IRB.getInt8PtrTy() && TargetTy->isIntegerTy(8)) 1856 return 0; 1857 1858 Type *ElementTy = Ty->getElementType(); 1859 if (!ElementTy->isSized()) 1860 return 0; // We can't GEP through an unsized element. 1861 APInt ElementSize(Offset.getBitWidth(), TD.getTypeAllocSize(ElementTy)); 1862 if (ElementSize == 0) 1863 return 0; // Zero-length arrays can't help us build a natural GEP. 1864 APInt NumSkippedElements = Offset.sdiv(ElementSize); 1865 1866 Offset -= NumSkippedElements * ElementSize; 1867 Indices.push_back(IRB.getInt(NumSkippedElements)); 1868 return getNaturalGEPRecursively(IRB, TD, Ptr, ElementTy, Offset, TargetTy, 1869 Indices); 1870} 1871 1872/// \brief Compute an adjusted pointer from Ptr by Offset bytes where the 1873/// resulting pointer has PointerTy. 1874/// 1875/// This tries very hard to compute a "natural" GEP which arrives at the offset 1876/// and produces the pointer type desired. Where it cannot, it will try to use 1877/// the natural GEP to arrive at the offset and bitcast to the type. Where that 1878/// fails, it will try to use an existing i8* and GEP to the byte offset and 1879/// bitcast to the type. 1880/// 1881/// The strategy for finding the more natural GEPs is to peel off layers of the 1882/// pointer, walking back through bit casts and GEPs, searching for a base 1883/// pointer from which we can compute a natural GEP with the desired 1884/// properties. The algorithm tries to fold as many constant indices into 1885/// a single GEP as possible, thus making each GEP more independent of the 1886/// surrounding code. 1887static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &TD, 1888 Value *Ptr, APInt Offset, Type *PointerTy) { 1889 // Even though we don't look through PHI nodes, we could be called on an 1890 // instruction in an unreachable block, which may be on a cycle. 1891 SmallPtrSet<Value *, 4> Visited; 1892 Visited.insert(Ptr); 1893 SmallVector<Value *, 4> Indices; 1894 1895 // We may end up computing an offset pointer that has the wrong type. If we 1896 // never are able to compute one directly that has the correct type, we'll 1897 // fall back to it, so keep it around here. 1898 Value *OffsetPtr = 0; 1899 1900 // Remember any i8 pointer we come across to re-use if we need to do a raw 1901 // byte offset. 1902 Value *Int8Ptr = 0; 1903 APInt Int8PtrOffset(Offset.getBitWidth(), 0); 1904 1905 Type *TargetTy = PointerTy->getPointerElementType(); 1906 1907 do { 1908 // First fold any existing GEPs into the offset. 1909 while (GEPOperator *GEP = dyn_cast<GEPOperator>(Ptr)) { 1910 APInt GEPOffset(Offset.getBitWidth(), 0); 1911 if (!GEP->accumulateConstantOffset(TD, GEPOffset)) 1912 break; 1913 Offset += GEPOffset; 1914 Ptr = GEP->getPointerOperand(); 1915 if (!Visited.insert(Ptr)) 1916 break; 1917 } 1918 1919 // See if we can perform a natural GEP here. 1920 Indices.clear(); 1921 if (Value *P = getNaturalGEPWithOffset(IRB, TD, Ptr, Offset, TargetTy, 1922 Indices)) { 1923 if (P->getType() == PointerTy) { 1924 // Zap any offset pointer that we ended up computing in previous rounds. 1925 if (OffsetPtr && OffsetPtr->use_empty()) 1926 if (Instruction *I = dyn_cast<Instruction>(OffsetPtr)) 1927 I->eraseFromParent(); 1928 return P; 1929 } 1930 if (!OffsetPtr) { 1931 OffsetPtr = P; 1932 } 1933 } 1934 1935 // Stash this pointer if we've found an i8*. 1936 if (Ptr->getType()->isIntegerTy(8)) { 1937 Int8Ptr = Ptr; 1938 Int8PtrOffset = Offset; 1939 } 1940 1941 // Peel off a layer of the pointer and update the offset appropriately. 1942 if (Operator::getOpcode(Ptr) == Instruction::BitCast) { 1943 Ptr = cast<Operator>(Ptr)->getOperand(0); 1944 } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(Ptr)) { 1945 if (GA->mayBeOverridden()) 1946 break; 1947 Ptr = GA->getAliasee(); 1948 } else { 1949 break; 1950 } 1951 assert(Ptr->getType()->isPointerTy() && "Unexpected operand type!"); 1952 } while (Visited.insert(Ptr)); 1953 1954 if (!OffsetPtr) { 1955 if (!Int8Ptr) { 1956 Int8Ptr = IRB.CreateBitCast(Ptr, IRB.getInt8PtrTy(), 1957 "raw_cast"); 1958 Int8PtrOffset = Offset; 1959 } 1960 1961 OffsetPtr = Int8PtrOffset == 0 ? Int8Ptr : 1962 IRB.CreateInBoundsGEP(Int8Ptr, IRB.getInt(Int8PtrOffset), 1963 "raw_idx"); 1964 } 1965 Ptr = OffsetPtr; 1966 1967 // On the off chance we were targeting i8*, guard the bitcast here. 1968 if (Ptr->getType() != PointerTy) 1969 Ptr = IRB.CreateBitCast(Ptr, PointerTy, "cast"); 1970 1971 return Ptr; 1972} 1973 1974/// \brief Test whether we can convert a value from the old to the new type. 1975/// 1976/// This predicate should be used to guard calls to convertValue in order to 1977/// ensure that we only try to convert viable values. The strategy is that we 1978/// will peel off single element struct and array wrappings to get to an 1979/// underlying value, and convert that value. 1980static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy) { 1981 if (OldTy == NewTy) 1982 return true; 1983 if (IntegerType *OldITy = dyn_cast<IntegerType>(OldTy)) 1984 if (IntegerType *NewITy = dyn_cast<IntegerType>(NewTy)) 1985 if (NewITy->getBitWidth() >= OldITy->getBitWidth()) 1986 return true; 1987 if (DL.getTypeSizeInBits(NewTy) != DL.getTypeSizeInBits(OldTy)) 1988 return false; 1989 if (!NewTy->isSingleValueType() || !OldTy->isSingleValueType()) 1990 return false; 1991 1992 if (NewTy->isPointerTy() || OldTy->isPointerTy()) { 1993 if (NewTy->isPointerTy() && OldTy->isPointerTy()) 1994 return true; 1995 if (NewTy->isIntegerTy() || OldTy->isIntegerTy()) 1996 return true; 1997 return false; 1998 } 1999 2000 return true; 2001} 2002 2003/// \brief Generic routine to convert an SSA value to a value of a different 2004/// type. 2005/// 2006/// This will try various different casting techniques, such as bitcasts, 2007/// inttoptr, and ptrtoint casts. Use the \c canConvertValue predicate to test 2008/// two types for viability with this routine. 2009static Value *convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V, 2010 Type *Ty) { 2011 assert(canConvertValue(DL, V->getType(), Ty) && 2012 "Value not convertable to type"); 2013 if (V->getType() == Ty) 2014 return V; 2015 if (IntegerType *OldITy = dyn_cast<IntegerType>(V->getType())) 2016 if (IntegerType *NewITy = dyn_cast<IntegerType>(Ty)) 2017 if (NewITy->getBitWidth() > OldITy->getBitWidth()) 2018 return IRB.CreateZExt(V, NewITy); 2019 if (V->getType()->isIntegerTy() && Ty->isPointerTy()) 2020 return IRB.CreateIntToPtr(V, Ty); 2021 if (V->getType()->isPointerTy() && Ty->isIntegerTy()) 2022 return IRB.CreatePtrToInt(V, Ty); 2023 2024 return IRB.CreateBitCast(V, Ty); 2025} 2026 2027/// \brief Test whether the given alloca partition can be promoted to a vector. 2028/// 2029/// This is a quick test to check whether we can rewrite a particular alloca 2030/// partition (and its newly formed alloca) into a vector alloca with only 2031/// whole-vector loads and stores such that it could be promoted to a vector 2032/// SSA value. We only can ensure this for a limited set of operations, and we 2033/// don't want to do the rewrites unless we are confident that the result will 2034/// be promotable, so we have an early test here. 2035static bool isVectorPromotionViable(const DataLayout &TD, 2036 Type *AllocaTy, 2037 AllocaPartitioning &P, 2038 uint64_t PartitionBeginOffset, 2039 uint64_t PartitionEndOffset, 2040 AllocaPartitioning::const_use_iterator I, 2041 AllocaPartitioning::const_use_iterator E) { 2042 VectorType *Ty = dyn_cast<VectorType>(AllocaTy); 2043 if (!Ty) 2044 return false; 2045 2046 uint64_t ElementSize = TD.getTypeSizeInBits(Ty->getScalarType()); 2047 2048 // While the definition of LLVM vectors is bitpacked, we don't support sizes 2049 // that aren't byte sized. 2050 if (ElementSize % 8) 2051 return false; 2052 assert((TD.getTypeSizeInBits(Ty) % 8) == 0 && 2053 "vector size not a multiple of element size?"); 2054 ElementSize /= 8; 2055 2056 for (; I != E; ++I) { 2057 Use *U = I->getUse(); 2058 if (!U) 2059 continue; // Skip dead use. 2060 2061 uint64_t BeginOffset = I->BeginOffset - PartitionBeginOffset; 2062 uint64_t BeginIndex = BeginOffset / ElementSize; 2063 if (BeginIndex * ElementSize != BeginOffset || 2064 BeginIndex >= Ty->getNumElements()) 2065 return false; 2066 uint64_t EndOffset = I->EndOffset - PartitionBeginOffset; 2067 uint64_t EndIndex = EndOffset / ElementSize; 2068 if (EndIndex * ElementSize != EndOffset || 2069 EndIndex > Ty->getNumElements()) 2070 return false; 2071 2072 assert(EndIndex > BeginIndex && "Empty vector!"); 2073 uint64_t NumElements = EndIndex - BeginIndex; 2074 Type *PartitionTy 2075 = (NumElements == 1) ? Ty->getElementType() 2076 : VectorType::get(Ty->getElementType(), NumElements); 2077 2078 if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U->getUser())) { 2079 if (MI->isVolatile()) 2080 return false; 2081 if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(U->getUser())) { 2082 const AllocaPartitioning::MemTransferOffsets &MTO 2083 = P.getMemTransferOffsets(*MTI); 2084 if (!MTO.IsSplittable) 2085 return false; 2086 } 2087 } else if (U->get()->getType()->getPointerElementType()->isStructTy()) { 2088 // Disable vector promotion when there are loads or stores of an FCA. 2089 return false; 2090 } else if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) { 2091 if (LI->isVolatile()) 2092 return false; 2093 if (!canConvertValue(TD, PartitionTy, LI->getType())) 2094 return false; 2095 } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) { 2096 if (SI->isVolatile()) 2097 return false; 2098 if (!canConvertValue(TD, SI->getValueOperand()->getType(), PartitionTy)) 2099 return false; 2100 } else { 2101 return false; 2102 } 2103 } 2104 return true; 2105} 2106 2107/// \brief Test whether the given alloca partition's integer operations can be 2108/// widened to promotable ones. 2109/// 2110/// This is a quick test to check whether we can rewrite the integer loads and 2111/// stores to a particular alloca into wider loads and stores and be able to 2112/// promote the resulting alloca. 2113static bool isIntegerWideningViable(const DataLayout &TD, 2114 Type *AllocaTy, 2115 uint64_t AllocBeginOffset, 2116 AllocaPartitioning &P, 2117 AllocaPartitioning::const_use_iterator I, 2118 AllocaPartitioning::const_use_iterator E) { 2119 uint64_t SizeInBits = TD.getTypeSizeInBits(AllocaTy); 2120 // Don't create integer types larger than the maximum bitwidth. 2121 if (SizeInBits > IntegerType::MAX_INT_BITS) 2122 return false; 2123 2124 // Don't try to handle allocas with bit-padding. 2125 if (SizeInBits != TD.getTypeStoreSizeInBits(AllocaTy)) 2126 return false; 2127 2128 // We need to ensure that an integer type with the appropriate bitwidth can 2129 // be converted to the alloca type, whatever that is. We don't want to force 2130 // the alloca itself to have an integer type if there is a more suitable one. 2131 Type *IntTy = Type::getIntNTy(AllocaTy->getContext(), SizeInBits); 2132 if (!canConvertValue(TD, AllocaTy, IntTy) || 2133 !canConvertValue(TD, IntTy, AllocaTy)) 2134 return false; 2135 2136 uint64_t Size = TD.getTypeStoreSize(AllocaTy); 2137 2138 // Check the uses to ensure the uses are (likely) promotable integer uses. 2139 // Also ensure that the alloca has a covering load or store. We don't want 2140 // to widen the integer operations only to fail to promote due to some other 2141 // unsplittable entry (which we may make splittable later). 2142 bool WholeAllocaOp = false; 2143 for (; I != E; ++I) { 2144 Use *U = I->getUse(); 2145 if (!U) 2146 continue; // Skip dead use. 2147 2148 uint64_t RelBegin = I->BeginOffset - AllocBeginOffset; 2149 uint64_t RelEnd = I->EndOffset - AllocBeginOffset; 2150 2151 // We can't reasonably handle cases where the load or store extends past 2152 // the end of the aloca's type and into its padding. 2153 if (RelEnd > Size) 2154 return false; 2155 2156 if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) { 2157 if (LI->isVolatile()) 2158 return false; 2159 if (RelBegin == 0 && RelEnd == Size) 2160 WholeAllocaOp = true; 2161 if (IntegerType *ITy = dyn_cast<IntegerType>(LI->getType())) { 2162 if (ITy->getBitWidth() < TD.getTypeStoreSizeInBits(ITy)) 2163 return false; 2164 continue; 2165 } 2166 // Non-integer loads need to be convertible from the alloca type so that 2167 // they are promotable. 2168 if (RelBegin != 0 || RelEnd != Size || 2169 !canConvertValue(TD, AllocaTy, LI->getType())) 2170 return false; 2171 } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) { 2172 Type *ValueTy = SI->getValueOperand()->getType(); 2173 if (SI->isVolatile()) 2174 return false; 2175 if (RelBegin == 0 && RelEnd == Size) 2176 WholeAllocaOp = true; 2177 if (IntegerType *ITy = dyn_cast<IntegerType>(ValueTy)) { 2178 if (ITy->getBitWidth() < TD.getTypeStoreSizeInBits(ITy)) 2179 return false; 2180 continue; 2181 } 2182 // Non-integer stores need to be convertible to the alloca type so that 2183 // they are promotable. 2184 if (RelBegin != 0 || RelEnd != Size || 2185 !canConvertValue(TD, ValueTy, AllocaTy)) 2186 return false; 2187 } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U->getUser())) { 2188 if (MI->isVolatile() || !isa<Constant>(MI->getLength())) 2189 return false; 2190 if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(U->getUser())) { 2191 const AllocaPartitioning::MemTransferOffsets &MTO 2192 = P.getMemTransferOffsets(*MTI); 2193 if (!MTO.IsSplittable) 2194 return false; 2195 } 2196 } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) { 2197 if (II->getIntrinsicID() != Intrinsic::lifetime_start && 2198 II->getIntrinsicID() != Intrinsic::lifetime_end) 2199 return false; 2200 } else { 2201 return false; 2202 } 2203 } 2204 return WholeAllocaOp; 2205} 2206 2207static Value *extractInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *V, 2208 IntegerType *Ty, uint64_t Offset, 2209 const Twine &Name) { 2210 DEBUG(dbgs() << " start: " << *V << "\n"); 2211 IntegerType *IntTy = cast<IntegerType>(V->getType()); 2212 assert(DL.getTypeStoreSize(Ty) + Offset <= DL.getTypeStoreSize(IntTy) && 2213 "Element extends past full value"); 2214 uint64_t ShAmt = 8*Offset; 2215 if (DL.isBigEndian()) 2216 ShAmt = 8*(DL.getTypeStoreSize(IntTy) - DL.getTypeStoreSize(Ty) - Offset); 2217 if (ShAmt) { 2218 V = IRB.CreateLShr(V, ShAmt, Name + ".shift"); 2219 DEBUG(dbgs() << " shifted: " << *V << "\n"); 2220 } 2221 assert(Ty->getBitWidth() <= IntTy->getBitWidth() && 2222 "Cannot extract to a larger integer!"); 2223 if (Ty != IntTy) { 2224 V = IRB.CreateTrunc(V, Ty, Name + ".trunc"); 2225 DEBUG(dbgs() << " trunced: " << *V << "\n"); 2226 } 2227 return V; 2228} 2229 2230static Value *insertInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *Old, 2231 Value *V, uint64_t Offset, const Twine &Name) { 2232 IntegerType *IntTy = cast<IntegerType>(Old->getType()); 2233 IntegerType *Ty = cast<IntegerType>(V->getType()); 2234 assert(Ty->getBitWidth() <= IntTy->getBitWidth() && 2235 "Cannot insert a larger integer!"); 2236 DEBUG(dbgs() << " start: " << *V << "\n"); 2237 if (Ty != IntTy) { 2238 V = IRB.CreateZExt(V, IntTy, Name + ".ext"); 2239 DEBUG(dbgs() << " extended: " << *V << "\n"); 2240 } 2241 assert(DL.getTypeStoreSize(Ty) + Offset <= DL.getTypeStoreSize(IntTy) && 2242 "Element store outside of alloca store"); 2243 uint64_t ShAmt = 8*Offset; 2244 if (DL.isBigEndian()) 2245 ShAmt = 8*(DL.getTypeStoreSize(IntTy) - DL.getTypeStoreSize(Ty) - Offset); 2246 if (ShAmt) { 2247 V = IRB.CreateShl(V, ShAmt, Name + ".shift"); 2248 DEBUG(dbgs() << " shifted: " << *V << "\n"); 2249 } 2250 2251 if (ShAmt || Ty->getBitWidth() < IntTy->getBitWidth()) { 2252 APInt Mask = ~Ty->getMask().zext(IntTy->getBitWidth()).shl(ShAmt); 2253 Old = IRB.CreateAnd(Old, Mask, Name + ".mask"); 2254 DEBUG(dbgs() << " masked: " << *Old << "\n"); 2255 V = IRB.CreateOr(Old, V, Name + ".insert"); 2256 DEBUG(dbgs() << " inserted: " << *V << "\n"); 2257 } 2258 return V; 2259} 2260 2261static Value *extractVector(IRBuilderTy &IRB, Value *V, 2262 unsigned BeginIndex, unsigned EndIndex, 2263 const Twine &Name) { 2264 VectorType *VecTy = cast<VectorType>(V->getType()); 2265 unsigned NumElements = EndIndex - BeginIndex; 2266 assert(NumElements <= VecTy->getNumElements() && "Too many elements!"); 2267 2268 if (NumElements == VecTy->getNumElements()) 2269 return V; 2270 2271 if (NumElements == 1) { 2272 V = IRB.CreateExtractElement(V, IRB.getInt32(BeginIndex), 2273 Name + ".extract"); 2274 DEBUG(dbgs() << " extract: " << *V << "\n"); 2275 return V; 2276 } 2277 2278 SmallVector<Constant*, 8> Mask; 2279 Mask.reserve(NumElements); 2280 for (unsigned i = BeginIndex; i != EndIndex; ++i) 2281 Mask.push_back(IRB.getInt32(i)); 2282 V = IRB.CreateShuffleVector(V, UndefValue::get(V->getType()), 2283 ConstantVector::get(Mask), 2284 Name + ".extract"); 2285 DEBUG(dbgs() << " shuffle: " << *V << "\n"); 2286 return V; 2287} 2288 2289static Value *insertVector(IRBuilderTy &IRB, Value *Old, Value *V, 2290 unsigned BeginIndex, const Twine &Name) { 2291 VectorType *VecTy = cast<VectorType>(Old->getType()); 2292 assert(VecTy && "Can only insert a vector into a vector"); 2293 2294 VectorType *Ty = dyn_cast<VectorType>(V->getType()); 2295 if (!Ty) { 2296 // Single element to insert. 2297 V = IRB.CreateInsertElement(Old, V, IRB.getInt32(BeginIndex), 2298 Name + ".insert"); 2299 DEBUG(dbgs() << " insert: " << *V << "\n"); 2300 return V; 2301 } 2302 2303 assert(Ty->getNumElements() <= VecTy->getNumElements() && 2304 "Too many elements!"); 2305 if (Ty->getNumElements() == VecTy->getNumElements()) { 2306 assert(V->getType() == VecTy && "Vector type mismatch"); 2307 return V; 2308 } 2309 unsigned EndIndex = BeginIndex + Ty->getNumElements(); 2310 2311 // When inserting a smaller vector into the larger to store, we first 2312 // use a shuffle vector to widen it with undef elements, and then 2313 // a second shuffle vector to select between the loaded vector and the 2314 // incoming vector. 2315 SmallVector<Constant*, 8> Mask; 2316 Mask.reserve(VecTy->getNumElements()); 2317 for (unsigned i = 0; i != VecTy->getNumElements(); ++i) 2318 if (i >= BeginIndex && i < EndIndex) 2319 Mask.push_back(IRB.getInt32(i - BeginIndex)); 2320 else 2321 Mask.push_back(UndefValue::get(IRB.getInt32Ty())); 2322 V = IRB.CreateShuffleVector(V, UndefValue::get(V->getType()), 2323 ConstantVector::get(Mask), 2324 Name + ".expand"); 2325 DEBUG(dbgs() << " shuffle: " << *V << "\n"); 2326 2327 Mask.clear(); 2328 for (unsigned i = 0; i != VecTy->getNumElements(); ++i) 2329 Mask.push_back(IRB.getInt1(i >= BeginIndex && i < EndIndex)); 2330 2331 V = IRB.CreateSelect(ConstantVector::get(Mask), V, Old, Name + "blend"); 2332 2333 DEBUG(dbgs() << " blend: " << *V << "\n"); 2334 return V; 2335} 2336 2337namespace { 2338/// \brief Visitor to rewrite instructions using a partition of an alloca to 2339/// use a new alloca. 2340/// 2341/// Also implements the rewriting to vector-based accesses when the partition 2342/// passes the isVectorPromotionViable predicate. Most of the rewriting logic 2343/// lives here. 2344class AllocaPartitionRewriter : public InstVisitor<AllocaPartitionRewriter, 2345 bool> { 2346 // Befriend the base class so it can delegate to private visit methods. 2347 friend class llvm::InstVisitor<AllocaPartitionRewriter, bool>; 2348 2349 const DataLayout &TD; 2350 AllocaPartitioning &P; 2351 SROA &Pass; 2352 AllocaInst &OldAI, &NewAI; 2353 const uint64_t NewAllocaBeginOffset, NewAllocaEndOffset; 2354 Type *NewAllocaTy; 2355 2356 // If we are rewriting an alloca partition which can be written as pure 2357 // vector operations, we stash extra information here. When VecTy is 2358 // non-null, we have some strict guarantees about the rewritten alloca: 2359 // - The new alloca is exactly the size of the vector type here. 2360 // - The accesses all either map to the entire vector or to a single 2361 // element. 2362 // - The set of accessing instructions is only one of those handled above 2363 // in isVectorPromotionViable. Generally these are the same access kinds 2364 // which are promotable via mem2reg. 2365 VectorType *VecTy; 2366 Type *ElementTy; 2367 uint64_t ElementSize; 2368 2369 // This is a convenience and flag variable that will be null unless the new 2370 // alloca's integer operations should be widened to this integer type due to 2371 // passing isIntegerWideningViable above. If it is non-null, the desired 2372 // integer type will be stored here for easy access during rewriting. 2373 IntegerType *IntTy; 2374 2375 // The offset of the partition user currently being rewritten. 2376 uint64_t BeginOffset, EndOffset; 2377 bool IsSplit; 2378 Use *OldUse; 2379 Instruction *OldPtr; 2380 2381 // Utility IR builder, whose name prefix is setup for each visited use, and 2382 // the insertion point is set to point to the user. 2383 IRBuilderTy IRB; 2384 2385public: 2386 AllocaPartitionRewriter(const DataLayout &TD, AllocaPartitioning &P, 2387 AllocaPartitioning::iterator PI, 2388 SROA &Pass, AllocaInst &OldAI, AllocaInst &NewAI, 2389 uint64_t NewBeginOffset, uint64_t NewEndOffset) 2390 : TD(TD), P(P), Pass(Pass), 2391 OldAI(OldAI), NewAI(NewAI), 2392 NewAllocaBeginOffset(NewBeginOffset), 2393 NewAllocaEndOffset(NewEndOffset), 2394 NewAllocaTy(NewAI.getAllocatedType()), 2395 VecTy(), ElementTy(), ElementSize(), IntTy(), 2396 BeginOffset(), EndOffset(), IsSplit(), OldUse(), OldPtr(), 2397 IRB(NewAI.getContext(), ConstantFolder()) { 2398 } 2399 2400 /// \brief Visit the users of the alloca partition and rewrite them. 2401 bool visitUsers(AllocaPartitioning::const_use_iterator I, 2402 AllocaPartitioning::const_use_iterator E) { 2403 if (isVectorPromotionViable(TD, NewAI.getAllocatedType(), P, 2404 NewAllocaBeginOffset, NewAllocaEndOffset, 2405 I, E)) { 2406 ++NumVectorized; 2407 VecTy = cast<VectorType>(NewAI.getAllocatedType()); 2408 ElementTy = VecTy->getElementType(); 2409 assert((TD.getTypeSizeInBits(VecTy->getScalarType()) % 8) == 0 && 2410 "Only multiple-of-8 sized vector elements are viable"); 2411 ElementSize = TD.getTypeSizeInBits(VecTy->getScalarType()) / 8; 2412 } else if (isIntegerWideningViable(TD, NewAI.getAllocatedType(), 2413 NewAllocaBeginOffset, P, I, E)) { 2414 IntTy = Type::getIntNTy(NewAI.getContext(), 2415 TD.getTypeSizeInBits(NewAI.getAllocatedType())); 2416 } 2417 bool CanSROA = true; 2418 for (; I != E; ++I) { 2419 if (!I->getUse()) 2420 continue; // Skip dead uses. 2421 BeginOffset = I->BeginOffset; 2422 EndOffset = I->EndOffset; 2423 IsSplit = I->isSplit(); 2424 OldUse = I->getUse(); 2425 OldPtr = cast<Instruction>(OldUse->get()); 2426 2427 Instruction *OldUserI = cast<Instruction>(OldUse->getUser()); 2428 IRB.SetInsertPoint(OldUserI); 2429 IRB.SetCurrentDebugLocation(OldUserI->getDebugLoc()); 2430 IRB.SetNamePrefix(Twine(NewAI.getName()) + "." + Twine(BeginOffset) + 2431 "."); 2432 2433 CanSROA &= visit(cast<Instruction>(OldUse->getUser())); 2434 } 2435 if (VecTy) { 2436 assert(CanSROA); 2437 VecTy = 0; 2438 ElementTy = 0; 2439 ElementSize = 0; 2440 } 2441 if (IntTy) { 2442 assert(CanSROA); 2443 IntTy = 0; 2444 } 2445 return CanSROA; 2446 } 2447 2448private: 2449 // Every instruction which can end up as a user must have a rewrite rule. 2450 bool visitInstruction(Instruction &I) { 2451 DEBUG(dbgs() << " !!!! Cannot rewrite: " << I << "\n"); 2452 llvm_unreachable("No rewrite rule for this instruction!"); 2453 } 2454 2455 Value *getAdjustedAllocaPtr(IRBuilderTy &IRB, Type *PointerTy) { 2456 assert(BeginOffset >= NewAllocaBeginOffset); 2457 APInt Offset(TD.getPointerSizeInBits(), BeginOffset - NewAllocaBeginOffset); 2458 return getAdjustedPtr(IRB, TD, &NewAI, Offset, PointerTy); 2459 } 2460 2461 /// \brief Compute suitable alignment to access an offset into the new alloca. 2462 unsigned getOffsetAlign(uint64_t Offset) { 2463 unsigned NewAIAlign = NewAI.getAlignment(); 2464 if (!NewAIAlign) 2465 NewAIAlign = TD.getABITypeAlignment(NewAI.getAllocatedType()); 2466 return MinAlign(NewAIAlign, Offset); 2467 } 2468 2469 /// \brief Compute suitable alignment to access this partition of the new 2470 /// alloca. 2471 unsigned getPartitionAlign() { 2472 return getOffsetAlign(BeginOffset - NewAllocaBeginOffset); 2473 } 2474 2475 /// \brief Compute suitable alignment to access a type at an offset of the 2476 /// new alloca. 2477 /// 2478 /// \returns zero if the type's ABI alignment is a suitable alignment, 2479 /// otherwise returns the maximal suitable alignment. 2480 unsigned getOffsetTypeAlign(Type *Ty, uint64_t Offset) { 2481 unsigned Align = getOffsetAlign(Offset); 2482 return Align == TD.getABITypeAlignment(Ty) ? 0 : Align; 2483 } 2484 2485 /// \brief Compute suitable alignment to access a type at the beginning of 2486 /// this partition of the new alloca. 2487 /// 2488 /// See \c getOffsetTypeAlign for details; this routine delegates to it. 2489 unsigned getPartitionTypeAlign(Type *Ty) { 2490 return getOffsetTypeAlign(Ty, BeginOffset - NewAllocaBeginOffset); 2491 } 2492 2493 unsigned getIndex(uint64_t Offset) { 2494 assert(VecTy && "Can only call getIndex when rewriting a vector"); 2495 uint64_t RelOffset = Offset - NewAllocaBeginOffset; 2496 assert(RelOffset / ElementSize < UINT32_MAX && "Index out of bounds"); 2497 uint32_t Index = RelOffset / ElementSize; 2498 assert(Index * ElementSize == RelOffset); 2499 return Index; 2500 } 2501 2502 void deleteIfTriviallyDead(Value *V) { 2503 Instruction *I = cast<Instruction>(V); 2504 if (isInstructionTriviallyDead(I)) 2505 Pass.DeadInsts.insert(I); 2506 } 2507 2508 Value *rewriteVectorizedLoadInst() { 2509 unsigned BeginIndex = getIndex(BeginOffset); 2510 unsigned EndIndex = getIndex(EndOffset); 2511 assert(EndIndex > BeginIndex && "Empty vector!"); 2512 2513 Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), 2514 "load"); 2515 return extractVector(IRB, V, BeginIndex, EndIndex, "vec"); 2516 } 2517 2518 Value *rewriteIntegerLoad(LoadInst &LI) { 2519 assert(IntTy && "We cannot insert an integer to the alloca"); 2520 assert(!LI.isVolatile()); 2521 Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), 2522 "load"); 2523 V = convertValue(TD, IRB, V, IntTy); 2524 assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset"); 2525 uint64_t Offset = BeginOffset - NewAllocaBeginOffset; 2526 if (Offset > 0 || EndOffset < NewAllocaEndOffset) 2527 V = extractInteger(TD, IRB, V, cast<IntegerType>(LI.getType()), Offset, 2528 "extract"); 2529 return V; 2530 } 2531 2532 bool visitLoadInst(LoadInst &LI) { 2533 DEBUG(dbgs() << " original: " << LI << "\n"); 2534 Value *OldOp = LI.getOperand(0); 2535 assert(OldOp == OldPtr); 2536 2537 uint64_t Size = EndOffset - BeginOffset; 2538 2539 Type *TargetTy = IsSplit ? Type::getIntNTy(LI.getContext(), Size * 8) 2540 : LI.getType(); 2541 bool IsPtrAdjusted = false; 2542 Value *V; 2543 if (VecTy) { 2544 V = rewriteVectorizedLoadInst(); 2545 } else if (IntTy && LI.getType()->isIntegerTy()) { 2546 V = rewriteIntegerLoad(LI); 2547 } else if (BeginOffset == NewAllocaBeginOffset && 2548 canConvertValue(TD, NewAllocaTy, LI.getType())) { 2549 V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), 2550 LI.isVolatile(), "load"); 2551 } else { 2552 Type *LTy = TargetTy->getPointerTo(); 2553 V = IRB.CreateAlignedLoad(getAdjustedAllocaPtr(IRB, LTy), 2554 getPartitionTypeAlign(TargetTy), 2555 LI.isVolatile(), "load"); 2556 IsPtrAdjusted = true; 2557 } 2558 V = convertValue(TD, IRB, V, TargetTy); 2559 2560 if (IsSplit) { 2561 assert(!LI.isVolatile()); 2562 assert(LI.getType()->isIntegerTy() && 2563 "Only integer type loads and stores are split"); 2564 assert(Size < TD.getTypeStoreSize(LI.getType()) && 2565 "Split load isn't smaller than original load"); 2566 assert(LI.getType()->getIntegerBitWidth() == 2567 TD.getTypeStoreSizeInBits(LI.getType()) && 2568 "Non-byte-multiple bit width"); 2569 // Move the insertion point just past the load so that we can refer to it. 2570 IRB.SetInsertPoint(llvm::next(BasicBlock::iterator(&LI))); 2571 // Create a placeholder value with the same type as LI to use as the 2572 // basis for the new value. This allows us to replace the uses of LI with 2573 // the computed value, and then replace the placeholder with LI, leaving 2574 // LI only used for this computation. 2575 Value *Placeholder 2576 = new LoadInst(UndefValue::get(LI.getType()->getPointerTo())); 2577 V = insertInteger(TD, IRB, Placeholder, V, BeginOffset, 2578 "insert"); 2579 LI.replaceAllUsesWith(V); 2580 Placeholder->replaceAllUsesWith(&LI); 2581 delete Placeholder; 2582 } else { 2583 LI.replaceAllUsesWith(V); 2584 } 2585 2586 Pass.DeadInsts.insert(&LI); 2587 deleteIfTriviallyDead(OldOp); 2588 DEBUG(dbgs() << " to: " << *V << "\n"); 2589 return !LI.isVolatile() && !IsPtrAdjusted; 2590 } 2591 2592 bool rewriteVectorizedStoreInst(Value *V, 2593 StoreInst &SI, Value *OldOp) { 2594 unsigned BeginIndex = getIndex(BeginOffset); 2595 unsigned EndIndex = getIndex(EndOffset); 2596 assert(EndIndex > BeginIndex && "Empty vector!"); 2597 unsigned NumElements = EndIndex - BeginIndex; 2598 assert(NumElements <= VecTy->getNumElements() && "Too many elements!"); 2599 Type *PartitionTy 2600 = (NumElements == 1) ? ElementTy 2601 : VectorType::get(ElementTy, NumElements); 2602 if (V->getType() != PartitionTy) 2603 V = convertValue(TD, IRB, V, PartitionTy); 2604 2605 // Mix in the existing elements. 2606 Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), 2607 "load"); 2608 V = insertVector(IRB, Old, V, BeginIndex, "vec"); 2609 2610 StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment()); 2611 Pass.DeadInsts.insert(&SI); 2612 2613 (void)Store; 2614 DEBUG(dbgs() << " to: " << *Store << "\n"); 2615 return true; 2616 } 2617 2618 bool rewriteIntegerStore(Value *V, StoreInst &SI) { 2619 assert(IntTy && "We cannot extract an integer from the alloca"); 2620 assert(!SI.isVolatile()); 2621 if (TD.getTypeSizeInBits(V->getType()) != IntTy->getBitWidth()) { 2622 Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), 2623 "oldload"); 2624 Old = convertValue(TD, IRB, Old, IntTy); 2625 assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset"); 2626 uint64_t Offset = BeginOffset - NewAllocaBeginOffset; 2627 V = insertInteger(TD, IRB, Old, SI.getValueOperand(), Offset, 2628 "insert"); 2629 } 2630 V = convertValue(TD, IRB, V, NewAllocaTy); 2631 StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment()); 2632 Pass.DeadInsts.insert(&SI); 2633 (void)Store; 2634 DEBUG(dbgs() << " to: " << *Store << "\n"); 2635 return true; 2636 } 2637 2638 bool visitStoreInst(StoreInst &SI) { 2639 DEBUG(dbgs() << " original: " << SI << "\n"); 2640 Value *OldOp = SI.getOperand(1); 2641 assert(OldOp == OldPtr); 2642 2643 Value *V = SI.getValueOperand(); 2644 2645 // Strip all inbounds GEPs and pointer casts to try to dig out any root 2646 // alloca that should be re-examined after promoting this alloca. 2647 if (V->getType()->isPointerTy()) 2648 if (AllocaInst *AI = dyn_cast<AllocaInst>(V->stripInBoundsOffsets())) 2649 Pass.PostPromotionWorklist.insert(AI); 2650 2651 uint64_t Size = EndOffset - BeginOffset; 2652 if (Size < TD.getTypeStoreSize(V->getType())) { 2653 assert(!SI.isVolatile()); 2654 assert(IsSplit && "A seemingly split store isn't splittable"); 2655 assert(V->getType()->isIntegerTy() && 2656 "Only integer type loads and stores are split"); 2657 assert(V->getType()->getIntegerBitWidth() == 2658 TD.getTypeStoreSizeInBits(V->getType()) && 2659 "Non-byte-multiple bit width"); 2660 IntegerType *NarrowTy = Type::getIntNTy(SI.getContext(), Size * 8); 2661 V = extractInteger(TD, IRB, V, NarrowTy, BeginOffset, 2662 "extract"); 2663 } 2664 2665 if (VecTy) 2666 return rewriteVectorizedStoreInst(V, SI, OldOp); 2667 if (IntTy && V->getType()->isIntegerTy()) 2668 return rewriteIntegerStore(V, SI); 2669 2670 StoreInst *NewSI; 2671 if (BeginOffset == NewAllocaBeginOffset && 2672 EndOffset == NewAllocaEndOffset && 2673 canConvertValue(TD, V->getType(), NewAllocaTy)) { 2674 V = convertValue(TD, IRB, V, NewAllocaTy); 2675 NewSI = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(), 2676 SI.isVolatile()); 2677 } else { 2678 Value *NewPtr = getAdjustedAllocaPtr(IRB, V->getType()->getPointerTo()); 2679 NewSI = IRB.CreateAlignedStore(V, NewPtr, 2680 getPartitionTypeAlign(V->getType()), 2681 SI.isVolatile()); 2682 } 2683 (void)NewSI; 2684 Pass.DeadInsts.insert(&SI); 2685 deleteIfTriviallyDead(OldOp); 2686 2687 DEBUG(dbgs() << " to: " << *NewSI << "\n"); 2688 return NewSI->getPointerOperand() == &NewAI && !SI.isVolatile(); 2689 } 2690 2691 /// \brief Compute an integer value from splatting an i8 across the given 2692 /// number of bytes. 2693 /// 2694 /// Note that this routine assumes an i8 is a byte. If that isn't true, don't 2695 /// call this routine. 2696 /// FIXME: Heed the advice above. 2697 /// 2698 /// \param V The i8 value to splat. 2699 /// \param Size The number of bytes in the output (assuming i8 is one byte) 2700 Value *getIntegerSplat(Value *V, unsigned Size) { 2701 assert(Size > 0 && "Expected a positive number of bytes."); 2702 IntegerType *VTy = cast<IntegerType>(V->getType()); 2703 assert(VTy->getBitWidth() == 8 && "Expected an i8 value for the byte"); 2704 if (Size == 1) 2705 return V; 2706 2707 Type *SplatIntTy = Type::getIntNTy(VTy->getContext(), Size*8); 2708 V = IRB.CreateMul(IRB.CreateZExt(V, SplatIntTy, "zext"), 2709 ConstantExpr::getUDiv( 2710 Constant::getAllOnesValue(SplatIntTy), 2711 ConstantExpr::getZExt( 2712 Constant::getAllOnesValue(V->getType()), 2713 SplatIntTy)), 2714 "isplat"); 2715 return V; 2716 } 2717 2718 /// \brief Compute a vector splat for a given element value. 2719 Value *getVectorSplat(Value *V, unsigned NumElements) { 2720 V = IRB.CreateVectorSplat(NumElements, V, "vsplat"); 2721 DEBUG(dbgs() << " splat: " << *V << "\n"); 2722 return V; 2723 } 2724 2725 bool visitMemSetInst(MemSetInst &II) { 2726 DEBUG(dbgs() << " original: " << II << "\n"); 2727 assert(II.getRawDest() == OldPtr); 2728 2729 // If the memset has a variable size, it cannot be split, just adjust the 2730 // pointer to the new alloca. 2731 if (!isa<Constant>(II.getLength())) { 2732 II.setDest(getAdjustedAllocaPtr(IRB, II.getRawDest()->getType())); 2733 Type *CstTy = II.getAlignmentCst()->getType(); 2734 II.setAlignment(ConstantInt::get(CstTy, getPartitionAlign())); 2735 2736 deleteIfTriviallyDead(OldPtr); 2737 return false; 2738 } 2739 2740 // Record this instruction for deletion. 2741 Pass.DeadInsts.insert(&II); 2742 2743 Type *AllocaTy = NewAI.getAllocatedType(); 2744 Type *ScalarTy = AllocaTy->getScalarType(); 2745 2746 // If this doesn't map cleanly onto the alloca type, and that type isn't 2747 // a single value type, just emit a memset. 2748 if (!VecTy && !IntTy && 2749 (BeginOffset != NewAllocaBeginOffset || 2750 EndOffset != NewAllocaEndOffset || 2751 !AllocaTy->isSingleValueType() || 2752 !TD.isLegalInteger(TD.getTypeSizeInBits(ScalarTy)) || 2753 TD.getTypeSizeInBits(ScalarTy)%8 != 0)) { 2754 Type *SizeTy = II.getLength()->getType(); 2755 Constant *Size = ConstantInt::get(SizeTy, EndOffset - BeginOffset); 2756 CallInst *New 2757 = IRB.CreateMemSet(getAdjustedAllocaPtr(IRB, 2758 II.getRawDest()->getType()), 2759 II.getValue(), Size, getPartitionAlign(), 2760 II.isVolatile()); 2761 (void)New; 2762 DEBUG(dbgs() << " to: " << *New << "\n"); 2763 return false; 2764 } 2765 2766 // If we can represent this as a simple value, we have to build the actual 2767 // value to store, which requires expanding the byte present in memset to 2768 // a sensible representation for the alloca type. This is essentially 2769 // splatting the byte to a sufficiently wide integer, splatting it across 2770 // any desired vector width, and bitcasting to the final type. 2771 Value *V; 2772 2773 if (VecTy) { 2774 // If this is a memset of a vectorized alloca, insert it. 2775 assert(ElementTy == ScalarTy); 2776 2777 unsigned BeginIndex = getIndex(BeginOffset); 2778 unsigned EndIndex = getIndex(EndOffset); 2779 assert(EndIndex > BeginIndex && "Empty vector!"); 2780 unsigned NumElements = EndIndex - BeginIndex; 2781 assert(NumElements <= VecTy->getNumElements() && "Too many elements!"); 2782 2783 Value *Splat = 2784 getIntegerSplat(II.getValue(), TD.getTypeSizeInBits(ElementTy) / 8); 2785 Splat = convertValue(TD, IRB, Splat, ElementTy); 2786 if (NumElements > 1) 2787 Splat = getVectorSplat(Splat, NumElements); 2788 2789 Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), 2790 "oldload"); 2791 V = insertVector(IRB, Old, Splat, BeginIndex, "vec"); 2792 } else if (IntTy) { 2793 // If this is a memset on an alloca where we can widen stores, insert the 2794 // set integer. 2795 assert(!II.isVolatile()); 2796 2797 uint64_t Size = EndOffset - BeginOffset; 2798 V = getIntegerSplat(II.getValue(), Size); 2799 2800 if (IntTy && (BeginOffset != NewAllocaBeginOffset || 2801 EndOffset != NewAllocaBeginOffset)) { 2802 Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), 2803 "oldload"); 2804 Old = convertValue(TD, IRB, Old, IntTy); 2805 assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset"); 2806 uint64_t Offset = BeginOffset - NewAllocaBeginOffset; 2807 V = insertInteger(TD, IRB, Old, V, Offset, "insert"); 2808 } else { 2809 assert(V->getType() == IntTy && 2810 "Wrong type for an alloca wide integer!"); 2811 } 2812 V = convertValue(TD, IRB, V, AllocaTy); 2813 } else { 2814 // Established these invariants above. 2815 assert(BeginOffset == NewAllocaBeginOffset); 2816 assert(EndOffset == NewAllocaEndOffset); 2817 2818 V = getIntegerSplat(II.getValue(), TD.getTypeSizeInBits(ScalarTy) / 8); 2819 if (VectorType *AllocaVecTy = dyn_cast<VectorType>(AllocaTy)) 2820 V = getVectorSplat(V, AllocaVecTy->getNumElements()); 2821 2822 V = convertValue(TD, IRB, V, AllocaTy); 2823 } 2824 2825 Value *New = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(), 2826 II.isVolatile()); 2827 (void)New; 2828 DEBUG(dbgs() << " to: " << *New << "\n"); 2829 return !II.isVolatile(); 2830 } 2831 2832 bool visitMemTransferInst(MemTransferInst &II) { 2833 // Rewriting of memory transfer instructions can be a bit tricky. We break 2834 // them into two categories: split intrinsics and unsplit intrinsics. 2835 2836 DEBUG(dbgs() << " original: " << II << "\n"); 2837 2838 assert(II.getRawSource() == OldPtr || II.getRawDest() == OldPtr); 2839 bool IsDest = II.getRawDest() == OldPtr; 2840 2841 const AllocaPartitioning::MemTransferOffsets &MTO 2842 = P.getMemTransferOffsets(II); 2843 2844 // Compute the relative offset within the transfer. 2845 unsigned IntPtrWidth = TD.getPointerSizeInBits(); 2846 APInt RelOffset(IntPtrWidth, BeginOffset - (IsDest ? MTO.DestBegin 2847 : MTO.SourceBegin)); 2848 2849 unsigned Align = II.getAlignment(); 2850 if (Align > 1) 2851 Align = MinAlign(RelOffset.zextOrTrunc(64).getZExtValue(), 2852 MinAlign(II.getAlignment(), getPartitionAlign())); 2853 2854 // For unsplit intrinsics, we simply modify the source and destination 2855 // pointers in place. This isn't just an optimization, it is a matter of 2856 // correctness. With unsplit intrinsics we may be dealing with transfers 2857 // within a single alloca before SROA ran, or with transfers that have 2858 // a variable length. We may also be dealing with memmove instead of 2859 // memcpy, and so simply updating the pointers is the necessary for us to 2860 // update both source and dest of a single call. 2861 if (!MTO.IsSplittable) { 2862 Value *OldOp = IsDest ? II.getRawDest() : II.getRawSource(); 2863 if (IsDest) 2864 II.setDest(getAdjustedAllocaPtr(IRB, II.getRawDest()->getType())); 2865 else 2866 II.setSource(getAdjustedAllocaPtr(IRB, II.getRawSource()->getType())); 2867 2868 Type *CstTy = II.getAlignmentCst()->getType(); 2869 II.setAlignment(ConstantInt::get(CstTy, Align)); 2870 2871 DEBUG(dbgs() << " to: " << II << "\n"); 2872 deleteIfTriviallyDead(OldOp); 2873 return false; 2874 } 2875 // For split transfer intrinsics we have an incredibly useful assurance: 2876 // the source and destination do not reside within the same alloca, and at 2877 // least one of them does not escape. This means that we can replace 2878 // memmove with memcpy, and we don't need to worry about all manner of 2879 // downsides to splitting and transforming the operations. 2880 2881 // If this doesn't map cleanly onto the alloca type, and that type isn't 2882 // a single value type, just emit a memcpy. 2883 bool EmitMemCpy 2884 = !VecTy && !IntTy && (BeginOffset != NewAllocaBeginOffset || 2885 EndOffset != NewAllocaEndOffset || 2886 !NewAI.getAllocatedType()->isSingleValueType()); 2887 2888 // If we're just going to emit a memcpy, the alloca hasn't changed, and the 2889 // size hasn't been shrunk based on analysis of the viable range, this is 2890 // a no-op. 2891 if (EmitMemCpy && &OldAI == &NewAI) { 2892 uint64_t OrigBegin = IsDest ? MTO.DestBegin : MTO.SourceBegin; 2893 uint64_t OrigEnd = IsDest ? MTO.DestEnd : MTO.SourceEnd; 2894 // Ensure the start lines up. 2895 assert(BeginOffset == OrigBegin); 2896 (void)OrigBegin; 2897 2898 // Rewrite the size as needed. 2899 if (EndOffset != OrigEnd) 2900 II.setLength(ConstantInt::get(II.getLength()->getType(), 2901 EndOffset - BeginOffset)); 2902 return false; 2903 } 2904 // Record this instruction for deletion. 2905 Pass.DeadInsts.insert(&II); 2906 2907 // Strip all inbounds GEPs and pointer casts to try to dig out any root 2908 // alloca that should be re-examined after rewriting this instruction. 2909 Value *OtherPtr = IsDest ? II.getRawSource() : II.getRawDest(); 2910 if (AllocaInst *AI 2911 = dyn_cast<AllocaInst>(OtherPtr->stripInBoundsOffsets())) 2912 Pass.Worklist.insert(AI); 2913 2914 if (EmitMemCpy) { 2915 Type *OtherPtrTy = IsDest ? II.getRawSource()->getType() 2916 : II.getRawDest()->getType(); 2917 2918 // Compute the other pointer, folding as much as possible to produce 2919 // a single, simple GEP in most cases. 2920 OtherPtr = getAdjustedPtr(IRB, TD, OtherPtr, RelOffset, OtherPtrTy); 2921 2922 Value *OurPtr 2923 = getAdjustedAllocaPtr(IRB, IsDest ? II.getRawDest()->getType() 2924 : II.getRawSource()->getType()); 2925 Type *SizeTy = II.getLength()->getType(); 2926 Constant *Size = ConstantInt::get(SizeTy, EndOffset - BeginOffset); 2927 2928 CallInst *New = IRB.CreateMemCpy(IsDest ? OurPtr : OtherPtr, 2929 IsDest ? OtherPtr : OurPtr, 2930 Size, Align, II.isVolatile()); 2931 (void)New; 2932 DEBUG(dbgs() << " to: " << *New << "\n"); 2933 return false; 2934 } 2935 2936 // Note that we clamp the alignment to 1 here as a 0 alignment for a memcpy 2937 // is equivalent to 1, but that isn't true if we end up rewriting this as 2938 // a load or store. 2939 if (!Align) 2940 Align = 1; 2941 2942 bool IsWholeAlloca = BeginOffset == NewAllocaBeginOffset && 2943 EndOffset == NewAllocaEndOffset; 2944 uint64_t Size = EndOffset - BeginOffset; 2945 unsigned BeginIndex = VecTy ? getIndex(BeginOffset) : 0; 2946 unsigned EndIndex = VecTy ? getIndex(EndOffset) : 0; 2947 unsigned NumElements = EndIndex - BeginIndex; 2948 IntegerType *SubIntTy 2949 = IntTy ? Type::getIntNTy(IntTy->getContext(), Size*8) : 0; 2950 2951 Type *OtherPtrTy = NewAI.getType(); 2952 if (VecTy && !IsWholeAlloca) { 2953 if (NumElements == 1) 2954 OtherPtrTy = VecTy->getElementType(); 2955 else 2956 OtherPtrTy = VectorType::get(VecTy->getElementType(), NumElements); 2957 2958 OtherPtrTy = OtherPtrTy->getPointerTo(); 2959 } else if (IntTy && !IsWholeAlloca) { 2960 OtherPtrTy = SubIntTy->getPointerTo(); 2961 } 2962 2963 Value *SrcPtr = getAdjustedPtr(IRB, TD, OtherPtr, RelOffset, OtherPtrTy); 2964 Value *DstPtr = &NewAI; 2965 if (!IsDest) 2966 std::swap(SrcPtr, DstPtr); 2967 2968 Value *Src; 2969 if (VecTy && !IsWholeAlloca && !IsDest) { 2970 Src = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), 2971 "load"); 2972 Src = extractVector(IRB, Src, BeginIndex, EndIndex, "vec"); 2973 } else if (IntTy && !IsWholeAlloca && !IsDest) { 2974 Src = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), 2975 "load"); 2976 Src = convertValue(TD, IRB, Src, IntTy); 2977 assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset"); 2978 uint64_t Offset = BeginOffset - NewAllocaBeginOffset; 2979 Src = extractInteger(TD, IRB, Src, SubIntTy, Offset, "extract"); 2980 } else { 2981 Src = IRB.CreateAlignedLoad(SrcPtr, Align, II.isVolatile(), 2982 "copyload"); 2983 } 2984 2985 if (VecTy && !IsWholeAlloca && IsDest) { 2986 Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), 2987 "oldload"); 2988 Src = insertVector(IRB, Old, Src, BeginIndex, "vec"); 2989 } else if (IntTy && !IsWholeAlloca && IsDest) { 2990 Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), 2991 "oldload"); 2992 Old = convertValue(TD, IRB, Old, IntTy); 2993 assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset"); 2994 uint64_t Offset = BeginOffset - NewAllocaBeginOffset; 2995 Src = insertInteger(TD, IRB, Old, Src, Offset, "insert"); 2996 Src = convertValue(TD, IRB, Src, NewAllocaTy); 2997 } 2998 2999 StoreInst *Store = cast<StoreInst>( 3000 IRB.CreateAlignedStore(Src, DstPtr, Align, II.isVolatile())); 3001 (void)Store; 3002 DEBUG(dbgs() << " to: " << *Store << "\n"); 3003 return !II.isVolatile(); 3004 } 3005 3006 bool visitIntrinsicInst(IntrinsicInst &II) { 3007 assert(II.getIntrinsicID() == Intrinsic::lifetime_start || 3008 II.getIntrinsicID() == Intrinsic::lifetime_end); 3009 DEBUG(dbgs() << " original: " << II << "\n"); 3010 assert(II.getArgOperand(1) == OldPtr); 3011 3012 // Record this instruction for deletion. 3013 Pass.DeadInsts.insert(&II); 3014 3015 ConstantInt *Size 3016 = ConstantInt::get(cast<IntegerType>(II.getArgOperand(0)->getType()), 3017 EndOffset - BeginOffset); 3018 Value *Ptr = getAdjustedAllocaPtr(IRB, II.getArgOperand(1)->getType()); 3019 Value *New; 3020 if (II.getIntrinsicID() == Intrinsic::lifetime_start) 3021 New = IRB.CreateLifetimeStart(Ptr, Size); 3022 else 3023 New = IRB.CreateLifetimeEnd(Ptr, Size); 3024 3025 (void)New; 3026 DEBUG(dbgs() << " to: " << *New << "\n"); 3027 return true; 3028 } 3029 3030 bool visitPHINode(PHINode &PN) { 3031 DEBUG(dbgs() << " original: " << PN << "\n"); 3032 3033 // We would like to compute a new pointer in only one place, but have it be 3034 // as local as possible to the PHI. To do that, we re-use the location of 3035 // the old pointer, which necessarily must be in the right position to 3036 // dominate the PHI. 3037 IRBuilderTy PtrBuilder(cast<Instruction>(OldPtr)); 3038 PtrBuilder.SetNamePrefix(Twine(NewAI.getName()) + "." + Twine(BeginOffset) + 3039 "."); 3040 3041 Value *NewPtr = getAdjustedAllocaPtr(PtrBuilder, OldPtr->getType()); 3042 // Replace the operands which were using the old pointer. 3043 std::replace(PN.op_begin(), PN.op_end(), cast<Value>(OldPtr), NewPtr); 3044 3045 DEBUG(dbgs() << " to: " << PN << "\n"); 3046 deleteIfTriviallyDead(OldPtr); 3047 return false; 3048 } 3049 3050 bool visitSelectInst(SelectInst &SI) { 3051 DEBUG(dbgs() << " original: " << SI << "\n"); 3052 assert((SI.getTrueValue() == OldPtr || SI.getFalseValue() == OldPtr) && 3053 "Pointer isn't an operand!"); 3054 3055 Value *NewPtr = getAdjustedAllocaPtr(IRB, OldPtr->getType()); 3056 // Replace the operands which were using the old pointer. 3057 if (SI.getOperand(1) == OldPtr) 3058 SI.setOperand(1, NewPtr); 3059 if (SI.getOperand(2) == OldPtr) 3060 SI.setOperand(2, NewPtr); 3061 3062 DEBUG(dbgs() << " to: " << SI << "\n"); 3063 deleteIfTriviallyDead(OldPtr); 3064 return false; 3065 } 3066 3067}; 3068} 3069 3070namespace { 3071/// \brief Visitor to rewrite aggregate loads and stores as scalar. 3072/// 3073/// This pass aggressively rewrites all aggregate loads and stores on 3074/// a particular pointer (or any pointer derived from it which we can identify) 3075/// with scalar loads and stores. 3076class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> { 3077 // Befriend the base class so it can delegate to private visit methods. 3078 friend class llvm::InstVisitor<AggLoadStoreRewriter, bool>; 3079 3080 const DataLayout &TD; 3081 3082 /// Queue of pointer uses to analyze and potentially rewrite. 3083 SmallVector<Use *, 8> Queue; 3084 3085 /// Set to prevent us from cycling with phi nodes and loops. 3086 SmallPtrSet<User *, 8> Visited; 3087 3088 /// The current pointer use being rewritten. This is used to dig up the used 3089 /// value (as opposed to the user). 3090 Use *U; 3091 3092public: 3093 AggLoadStoreRewriter(const DataLayout &TD) : TD(TD) {} 3094 3095 /// Rewrite loads and stores through a pointer and all pointers derived from 3096 /// it. 3097 bool rewrite(Instruction &I) { 3098 DEBUG(dbgs() << " Rewriting FCA loads and stores...\n"); 3099 enqueueUsers(I); 3100 bool Changed = false; 3101 while (!Queue.empty()) { 3102 U = Queue.pop_back_val(); 3103 Changed |= visit(cast<Instruction>(U->getUser())); 3104 } 3105 return Changed; 3106 } 3107 3108private: 3109 /// Enqueue all the users of the given instruction for further processing. 3110 /// This uses a set to de-duplicate users. 3111 void enqueueUsers(Instruction &I) { 3112 for (Value::use_iterator UI = I.use_begin(), UE = I.use_end(); UI != UE; 3113 ++UI) 3114 if (Visited.insert(*UI)) 3115 Queue.push_back(&UI.getUse()); 3116 } 3117 3118 // Conservative default is to not rewrite anything. 3119 bool visitInstruction(Instruction &I) { return false; } 3120 3121 /// \brief Generic recursive split emission class. 3122 template <typename Derived> 3123 class OpSplitter { 3124 protected: 3125 /// The builder used to form new instructions. 3126 IRBuilderTy IRB; 3127 /// The indices which to be used with insert- or extractvalue to select the 3128 /// appropriate value within the aggregate. 3129 SmallVector<unsigned, 4> Indices; 3130 /// The indices to a GEP instruction which will move Ptr to the correct slot 3131 /// within the aggregate. 3132 SmallVector<Value *, 4> GEPIndices; 3133 /// The base pointer of the original op, used as a base for GEPing the 3134 /// split operations. 3135 Value *Ptr; 3136 3137 /// Initialize the splitter with an insertion point, Ptr and start with a 3138 /// single zero GEP index. 3139 OpSplitter(Instruction *InsertionPoint, Value *Ptr) 3140 : IRB(InsertionPoint), GEPIndices(1, IRB.getInt32(0)), Ptr(Ptr) {} 3141 3142 public: 3143 /// \brief Generic recursive split emission routine. 3144 /// 3145 /// This method recursively splits an aggregate op (load or store) into 3146 /// scalar or vector ops. It splits recursively until it hits a single value 3147 /// and emits that single value operation via the template argument. 3148 /// 3149 /// The logic of this routine relies on GEPs and insertvalue and 3150 /// extractvalue all operating with the same fundamental index list, merely 3151 /// formatted differently (GEPs need actual values). 3152 /// 3153 /// \param Ty The type being split recursively into smaller ops. 3154 /// \param Agg The aggregate value being built up or stored, depending on 3155 /// whether this is splitting a load or a store respectively. 3156 void emitSplitOps(Type *Ty, Value *&Agg, const Twine &Name) { 3157 if (Ty->isSingleValueType()) 3158 return static_cast<Derived *>(this)->emitFunc(Ty, Agg, Name); 3159 3160 if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { 3161 unsigned OldSize = Indices.size(); 3162 (void)OldSize; 3163 for (unsigned Idx = 0, Size = ATy->getNumElements(); Idx != Size; 3164 ++Idx) { 3165 assert(Indices.size() == OldSize && "Did not return to the old size"); 3166 Indices.push_back(Idx); 3167 GEPIndices.push_back(IRB.getInt32(Idx)); 3168 emitSplitOps(ATy->getElementType(), Agg, Name + "." + Twine(Idx)); 3169 GEPIndices.pop_back(); 3170 Indices.pop_back(); 3171 } 3172 return; 3173 } 3174 3175 if (StructType *STy = dyn_cast<StructType>(Ty)) { 3176 unsigned OldSize = Indices.size(); 3177 (void)OldSize; 3178 for (unsigned Idx = 0, Size = STy->getNumElements(); Idx != Size; 3179 ++Idx) { 3180 assert(Indices.size() == OldSize && "Did not return to the old size"); 3181 Indices.push_back(Idx); 3182 GEPIndices.push_back(IRB.getInt32(Idx)); 3183 emitSplitOps(STy->getElementType(Idx), Agg, Name + "." + Twine(Idx)); 3184 GEPIndices.pop_back(); 3185 Indices.pop_back(); 3186 } 3187 return; 3188 } 3189 3190 llvm_unreachable("Only arrays and structs are aggregate loadable types"); 3191 } 3192 }; 3193 3194 struct LoadOpSplitter : public OpSplitter<LoadOpSplitter> { 3195 LoadOpSplitter(Instruction *InsertionPoint, Value *Ptr) 3196 : OpSplitter<LoadOpSplitter>(InsertionPoint, Ptr) {} 3197 3198 /// Emit a leaf load of a single value. This is called at the leaves of the 3199 /// recursive emission to actually load values. 3200 void emitFunc(Type *Ty, Value *&Agg, const Twine &Name) { 3201 assert(Ty->isSingleValueType()); 3202 // Load the single value and insert it using the indices. 3203 Value *GEP = IRB.CreateInBoundsGEP(Ptr, GEPIndices, Name + ".gep"); 3204 Value *Load = IRB.CreateLoad(GEP, Name + ".load"); 3205 Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name + ".insert"); 3206 DEBUG(dbgs() << " to: " << *Load << "\n"); 3207 } 3208 }; 3209 3210 bool visitLoadInst(LoadInst &LI) { 3211 assert(LI.getPointerOperand() == *U); 3212 if (!LI.isSimple() || LI.getType()->isSingleValueType()) 3213 return false; 3214 3215 // We have an aggregate being loaded, split it apart. 3216 DEBUG(dbgs() << " original: " << LI << "\n"); 3217 LoadOpSplitter Splitter(&LI, *U); 3218 Value *V = UndefValue::get(LI.getType()); 3219 Splitter.emitSplitOps(LI.getType(), V, LI.getName() + ".fca"); 3220 LI.replaceAllUsesWith(V); 3221 LI.eraseFromParent(); 3222 return true; 3223 } 3224 3225 struct StoreOpSplitter : public OpSplitter<StoreOpSplitter> { 3226 StoreOpSplitter(Instruction *InsertionPoint, Value *Ptr) 3227 : OpSplitter<StoreOpSplitter>(InsertionPoint, Ptr) {} 3228 3229 /// Emit a leaf store of a single value. This is called at the leaves of the 3230 /// recursive emission to actually produce stores. 3231 void emitFunc(Type *Ty, Value *&Agg, const Twine &Name) { 3232 assert(Ty->isSingleValueType()); 3233 // Extract the single value and store it using the indices. 3234 Value *Store = IRB.CreateStore( 3235 IRB.CreateExtractValue(Agg, Indices, Name + ".extract"), 3236 IRB.CreateInBoundsGEP(Ptr, GEPIndices, Name + ".gep")); 3237 (void)Store; 3238 DEBUG(dbgs() << " to: " << *Store << "\n"); 3239 } 3240 }; 3241 3242 bool visitStoreInst(StoreInst &SI) { 3243 if (!SI.isSimple() || SI.getPointerOperand() != *U) 3244 return false; 3245 Value *V = SI.getValueOperand(); 3246 if (V->getType()->isSingleValueType()) 3247 return false; 3248 3249 // We have an aggregate being stored, split it apart. 3250 DEBUG(dbgs() << " original: " << SI << "\n"); 3251 StoreOpSplitter Splitter(&SI, *U); 3252 Splitter.emitSplitOps(V->getType(), V, V->getName() + ".fca"); 3253 SI.eraseFromParent(); 3254 return true; 3255 } 3256 3257 bool visitBitCastInst(BitCastInst &BC) { 3258 enqueueUsers(BC); 3259 return false; 3260 } 3261 3262 bool visitGetElementPtrInst(GetElementPtrInst &GEPI) { 3263 enqueueUsers(GEPI); 3264 return false; 3265 } 3266 3267 bool visitPHINode(PHINode &PN) { 3268 enqueueUsers(PN); 3269 return false; 3270 } 3271 3272 bool visitSelectInst(SelectInst &SI) { 3273 enqueueUsers(SI); 3274 return false; 3275 } 3276}; 3277} 3278 3279/// \brief Strip aggregate type wrapping. 3280/// 3281/// This removes no-op aggregate types wrapping an underlying type. It will 3282/// strip as many layers of types as it can without changing either the type 3283/// size or the allocated size. 3284static Type *stripAggregateTypeWrapping(const DataLayout &DL, Type *Ty) { 3285 if (Ty->isSingleValueType()) 3286 return Ty; 3287 3288 uint64_t AllocSize = DL.getTypeAllocSize(Ty); 3289 uint64_t TypeSize = DL.getTypeSizeInBits(Ty); 3290 3291 Type *InnerTy; 3292 if (ArrayType *ArrTy = dyn_cast<ArrayType>(Ty)) { 3293 InnerTy = ArrTy->getElementType(); 3294 } else if (StructType *STy = dyn_cast<StructType>(Ty)) { 3295 const StructLayout *SL = DL.getStructLayout(STy); 3296 unsigned Index = SL->getElementContainingOffset(0); 3297 InnerTy = STy->getElementType(Index); 3298 } else { 3299 return Ty; 3300 } 3301 3302 if (AllocSize > DL.getTypeAllocSize(InnerTy) || 3303 TypeSize > DL.getTypeSizeInBits(InnerTy)) 3304 return Ty; 3305 3306 return stripAggregateTypeWrapping(DL, InnerTy); 3307} 3308 3309/// \brief Try to find a partition of the aggregate type passed in for a given 3310/// offset and size. 3311/// 3312/// This recurses through the aggregate type and tries to compute a subtype 3313/// based on the offset and size. When the offset and size span a sub-section 3314/// of an array, it will even compute a new array type for that sub-section, 3315/// and the same for structs. 3316/// 3317/// Note that this routine is very strict and tries to find a partition of the 3318/// type which produces the *exact* right offset and size. It is not forgiving 3319/// when the size or offset cause either end of type-based partition to be off. 3320/// Also, this is a best-effort routine. It is reasonable to give up and not 3321/// return a type if necessary. 3322static Type *getTypePartition(const DataLayout &TD, Type *Ty, 3323 uint64_t Offset, uint64_t Size) { 3324 if (Offset == 0 && TD.getTypeAllocSize(Ty) == Size) 3325 return stripAggregateTypeWrapping(TD, Ty); 3326 if (Offset > TD.getTypeAllocSize(Ty) || 3327 (TD.getTypeAllocSize(Ty) - Offset) < Size) 3328 return 0; 3329 3330 if (SequentialType *SeqTy = dyn_cast<SequentialType>(Ty)) { 3331 // We can't partition pointers... 3332 if (SeqTy->isPointerTy()) 3333 return 0; 3334 3335 Type *ElementTy = SeqTy->getElementType(); 3336 uint64_t ElementSize = TD.getTypeAllocSize(ElementTy); 3337 uint64_t NumSkippedElements = Offset / ElementSize; 3338 if (ArrayType *ArrTy = dyn_cast<ArrayType>(SeqTy)) { 3339 if (NumSkippedElements >= ArrTy->getNumElements()) 3340 return 0; 3341 } else if (VectorType *VecTy = dyn_cast<VectorType>(SeqTy)) { 3342 if (NumSkippedElements >= VecTy->getNumElements()) 3343 return 0; 3344 } 3345 Offset -= NumSkippedElements * ElementSize; 3346 3347 // First check if we need to recurse. 3348 if (Offset > 0 || Size < ElementSize) { 3349 // Bail if the partition ends in a different array element. 3350 if ((Offset + Size) > ElementSize) 3351 return 0; 3352 // Recurse through the element type trying to peel off offset bytes. 3353 return getTypePartition(TD, ElementTy, Offset, Size); 3354 } 3355 assert(Offset == 0); 3356 3357 if (Size == ElementSize) 3358 return stripAggregateTypeWrapping(TD, ElementTy); 3359 assert(Size > ElementSize); 3360 uint64_t NumElements = Size / ElementSize; 3361 if (NumElements * ElementSize != Size) 3362 return 0; 3363 return ArrayType::get(ElementTy, NumElements); 3364 } 3365 3366 StructType *STy = dyn_cast<StructType>(Ty); 3367 if (!STy) 3368 return 0; 3369 3370 const StructLayout *SL = TD.getStructLayout(STy); 3371 if (Offset >= SL->getSizeInBytes()) 3372 return 0; 3373 uint64_t EndOffset = Offset + Size; 3374 if (EndOffset > SL->getSizeInBytes()) 3375 return 0; 3376 3377 unsigned Index = SL->getElementContainingOffset(Offset); 3378 Offset -= SL->getElementOffset(Index); 3379 3380 Type *ElementTy = STy->getElementType(Index); 3381 uint64_t ElementSize = TD.getTypeAllocSize(ElementTy); 3382 if (Offset >= ElementSize) 3383 return 0; // The offset points into alignment padding. 3384 3385 // See if any partition must be contained by the element. 3386 if (Offset > 0 || Size < ElementSize) { 3387 if ((Offset + Size) > ElementSize) 3388 return 0; 3389 return getTypePartition(TD, ElementTy, Offset, Size); 3390 } 3391 assert(Offset == 0); 3392 3393 if (Size == ElementSize) 3394 return stripAggregateTypeWrapping(TD, ElementTy); 3395 3396 StructType::element_iterator EI = STy->element_begin() + Index, 3397 EE = STy->element_end(); 3398 if (EndOffset < SL->getSizeInBytes()) { 3399 unsigned EndIndex = SL->getElementContainingOffset(EndOffset); 3400 if (Index == EndIndex) 3401 return 0; // Within a single element and its padding. 3402 3403 // Don't try to form "natural" types if the elements don't line up with the 3404 // expected size. 3405 // FIXME: We could potentially recurse down through the last element in the 3406 // sub-struct to find a natural end point. 3407 if (SL->getElementOffset(EndIndex) != EndOffset) 3408 return 0; 3409 3410 assert(Index < EndIndex); 3411 EE = STy->element_begin() + EndIndex; 3412 } 3413 3414 // Try to build up a sub-structure. 3415 StructType *SubTy = StructType::get(STy->getContext(), makeArrayRef(EI, EE), 3416 STy->isPacked()); 3417 const StructLayout *SubSL = TD.getStructLayout(SubTy); 3418 if (Size != SubSL->getSizeInBytes()) 3419 return 0; // The sub-struct doesn't have quite the size needed. 3420 3421 return SubTy; 3422} 3423 3424/// \brief Rewrite an alloca partition's users. 3425/// 3426/// This routine drives both of the rewriting goals of the SROA pass. It tries 3427/// to rewrite uses of an alloca partition to be conducive for SSA value 3428/// promotion. If the partition needs a new, more refined alloca, this will 3429/// build that new alloca, preserving as much type information as possible, and 3430/// rewrite the uses of the old alloca to point at the new one and have the 3431/// appropriate new offsets. It also evaluates how successful the rewrite was 3432/// at enabling promotion and if it was successful queues the alloca to be 3433/// promoted. 3434bool SROA::rewriteAllocaPartition(AllocaInst &AI, 3435 AllocaPartitioning &P, 3436 AllocaPartitioning::iterator PI) { 3437 uint64_t AllocaSize = PI->EndOffset - PI->BeginOffset; 3438 bool IsLive = false; 3439 for (AllocaPartitioning::use_iterator UI = P.use_begin(PI), 3440 UE = P.use_end(PI); 3441 UI != UE && !IsLive; ++UI) 3442 if (UI->getUse()) 3443 IsLive = true; 3444 if (!IsLive) 3445 return false; // No live uses left of this partition. 3446 3447 DEBUG(dbgs() << "Speculating PHIs and selects in partition " 3448 << "[" << PI->BeginOffset << "," << PI->EndOffset << ")\n"); 3449 3450 PHIOrSelectSpeculator Speculator(*TD, P, *this); 3451 DEBUG(dbgs() << " speculating "); 3452 DEBUG(P.print(dbgs(), PI, "")); 3453 Speculator.visitUsers(PI); 3454 3455 // Try to compute a friendly type for this partition of the alloca. This 3456 // won't always succeed, in which case we fall back to a legal integer type 3457 // or an i8 array of an appropriate size. 3458 Type *AllocaTy = 0; 3459 if (Type *PartitionTy = P.getCommonType(PI)) 3460 if (TD->getTypeAllocSize(PartitionTy) >= AllocaSize) 3461 AllocaTy = PartitionTy; 3462 if (!AllocaTy) 3463 if (Type *PartitionTy = getTypePartition(*TD, AI.getAllocatedType(), 3464 PI->BeginOffset, AllocaSize)) 3465 AllocaTy = PartitionTy; 3466 if ((!AllocaTy || 3467 (AllocaTy->isArrayTy() && 3468 AllocaTy->getArrayElementType()->isIntegerTy())) && 3469 TD->isLegalInteger(AllocaSize * 8)) 3470 AllocaTy = Type::getIntNTy(*C, AllocaSize * 8); 3471 if (!AllocaTy) 3472 AllocaTy = ArrayType::get(Type::getInt8Ty(*C), AllocaSize); 3473 assert(TD->getTypeAllocSize(AllocaTy) >= AllocaSize); 3474 3475 // Check for the case where we're going to rewrite to a new alloca of the 3476 // exact same type as the original, and with the same access offsets. In that 3477 // case, re-use the existing alloca, but still run through the rewriter to 3478 // perform phi and select speculation. 3479 AllocaInst *NewAI; 3480 if (AllocaTy == AI.getAllocatedType()) { 3481 assert(PI->BeginOffset == 0 && 3482 "Non-zero begin offset but same alloca type"); 3483 assert(PI == P.begin() && "Begin offset is zero on later partition"); 3484 NewAI = &AI; 3485 } else { 3486 unsigned Alignment = AI.getAlignment(); 3487 if (!Alignment) { 3488 // The minimum alignment which users can rely on when the explicit 3489 // alignment is omitted or zero is that required by the ABI for this 3490 // type. 3491 Alignment = TD->getABITypeAlignment(AI.getAllocatedType()); 3492 } 3493 Alignment = MinAlign(Alignment, PI->BeginOffset); 3494 // If we will get at least this much alignment from the type alone, leave 3495 // the alloca's alignment unconstrained. 3496 if (Alignment <= TD->getABITypeAlignment(AllocaTy)) 3497 Alignment = 0; 3498 NewAI = new AllocaInst(AllocaTy, 0, Alignment, 3499 AI.getName() + ".sroa." + Twine(PI - P.begin()), 3500 &AI); 3501 ++NumNewAllocas; 3502 } 3503 3504 DEBUG(dbgs() << "Rewriting alloca partition " 3505 << "[" << PI->BeginOffset << "," << PI->EndOffset << ") to: " 3506 << *NewAI << "\n"); 3507 3508 // Track the high watermark of the post-promotion worklist. We will reset it 3509 // to this point if the alloca is not in fact scheduled for promotion. 3510 unsigned PPWOldSize = PostPromotionWorklist.size(); 3511 3512 AllocaPartitionRewriter Rewriter(*TD, P, PI, *this, AI, *NewAI, 3513 PI->BeginOffset, PI->EndOffset); 3514 DEBUG(dbgs() << " rewriting "); 3515 DEBUG(P.print(dbgs(), PI, "")); 3516 bool Promotable = Rewriter.visitUsers(P.use_begin(PI), P.use_end(PI)); 3517 if (Promotable) { 3518 DEBUG(dbgs() << " and queuing for promotion\n"); 3519 PromotableAllocas.push_back(NewAI); 3520 } else if (NewAI != &AI) { 3521 // If we can't promote the alloca, iterate on it to check for new 3522 // refinements exposed by splitting the current alloca. Don't iterate on an 3523 // alloca which didn't actually change and didn't get promoted. 3524 Worklist.insert(NewAI); 3525 } 3526 3527 // Drop any post-promotion work items if promotion didn't happen. 3528 if (!Promotable) 3529 while (PostPromotionWorklist.size() > PPWOldSize) 3530 PostPromotionWorklist.pop_back(); 3531 3532 return true; 3533} 3534 3535/// \brief Walks the partitioning of an alloca rewriting uses of each partition. 3536bool SROA::splitAlloca(AllocaInst &AI, AllocaPartitioning &P) { 3537 bool Changed = false; 3538 for (AllocaPartitioning::iterator PI = P.begin(), PE = P.end(); PI != PE; 3539 ++PI) 3540 Changed |= rewriteAllocaPartition(AI, P, PI); 3541 3542 return Changed; 3543} 3544 3545/// \brief Analyze an alloca for SROA. 3546/// 3547/// This analyzes the alloca to ensure we can reason about it, builds 3548/// a partitioning of the alloca, and then hands it off to be split and 3549/// rewritten as needed. 3550bool SROA::runOnAlloca(AllocaInst &AI) { 3551 DEBUG(dbgs() << "SROA alloca: " << AI << "\n"); 3552 ++NumAllocasAnalyzed; 3553 3554 // Special case dead allocas, as they're trivial. 3555 if (AI.use_empty()) { 3556 AI.eraseFromParent(); 3557 return true; 3558 } 3559 3560 // Skip alloca forms that this analysis can't handle. 3561 if (AI.isArrayAllocation() || !AI.getAllocatedType()->isSized() || 3562 TD->getTypeAllocSize(AI.getAllocatedType()) == 0) 3563 return false; 3564 3565 bool Changed = false; 3566 3567 // First, split any FCA loads and stores touching this alloca to promote 3568 // better splitting and promotion opportunities. 3569 AggLoadStoreRewriter AggRewriter(*TD); 3570 Changed |= AggRewriter.rewrite(AI); 3571 3572 // Build the partition set using a recursive instruction-visiting builder. 3573 AllocaPartitioning P(*TD, AI); 3574 DEBUG(P.print(dbgs())); 3575 if (P.isEscaped()) 3576 return Changed; 3577 3578 // Delete all the dead users of this alloca before splitting and rewriting it. 3579 for (AllocaPartitioning::dead_user_iterator DI = P.dead_user_begin(), 3580 DE = P.dead_user_end(); 3581 DI != DE; ++DI) { 3582 Changed = true; 3583 (*DI)->replaceAllUsesWith(UndefValue::get((*DI)->getType())); 3584 DeadInsts.insert(*DI); 3585 } 3586 for (AllocaPartitioning::dead_op_iterator DO = P.dead_op_begin(), 3587 DE = P.dead_op_end(); 3588 DO != DE; ++DO) { 3589 Value *OldV = **DO; 3590 // Clobber the use with an undef value. 3591 **DO = UndefValue::get(OldV->getType()); 3592 if (Instruction *OldI = dyn_cast<Instruction>(OldV)) 3593 if (isInstructionTriviallyDead(OldI)) { 3594 Changed = true; 3595 DeadInsts.insert(OldI); 3596 } 3597 } 3598 3599 // No partitions to split. Leave the dead alloca for a later pass to clean up. 3600 if (P.begin() == P.end()) 3601 return Changed; 3602 3603 return splitAlloca(AI, P) || Changed; 3604} 3605 3606/// \brief Delete the dead instructions accumulated in this run. 3607/// 3608/// Recursively deletes the dead instructions we've accumulated. This is done 3609/// at the very end to maximize locality of the recursive delete and to 3610/// minimize the problems of invalidated instruction pointers as such pointers 3611/// are used heavily in the intermediate stages of the algorithm. 3612/// 3613/// We also record the alloca instructions deleted here so that they aren't 3614/// subsequently handed to mem2reg to promote. 3615void SROA::deleteDeadInstructions(SmallPtrSet<AllocaInst*, 4> &DeletedAllocas) { 3616 while (!DeadInsts.empty()) { 3617 Instruction *I = DeadInsts.pop_back_val(); 3618 DEBUG(dbgs() << "Deleting dead instruction: " << *I << "\n"); 3619 3620 I->replaceAllUsesWith(UndefValue::get(I->getType())); 3621 3622 for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI) 3623 if (Instruction *U = dyn_cast<Instruction>(*OI)) { 3624 // Zero out the operand and see if it becomes trivially dead. 3625 *OI = 0; 3626 if (isInstructionTriviallyDead(U)) 3627 DeadInsts.insert(U); 3628 } 3629 3630 if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) 3631 DeletedAllocas.insert(AI); 3632 3633 ++NumDeleted; 3634 I->eraseFromParent(); 3635 } 3636} 3637 3638/// \brief Promote the allocas, using the best available technique. 3639/// 3640/// This attempts to promote whatever allocas have been identified as viable in 3641/// the PromotableAllocas list. If that list is empty, there is nothing to do. 3642/// If there is a domtree available, we attempt to promote using the full power 3643/// of mem2reg. Otherwise, we build and use the AllocaPromoter above which is 3644/// based on the SSAUpdater utilities. This function returns whether any 3645/// promotion occurred. 3646bool SROA::promoteAllocas(Function &F) { 3647 if (PromotableAllocas.empty()) 3648 return false; 3649 3650 NumPromoted += PromotableAllocas.size(); 3651 3652 if (DT && !ForceSSAUpdater) { 3653 DEBUG(dbgs() << "Promoting allocas with mem2reg...\n"); 3654 PromoteMemToReg(PromotableAllocas, *DT); 3655 PromotableAllocas.clear(); 3656 return true; 3657 } 3658 3659 DEBUG(dbgs() << "Promoting allocas with SSAUpdater...\n"); 3660 SSAUpdater SSA; 3661 DIBuilder DIB(*F.getParent()); 3662 SmallVector<Instruction*, 64> Insts; 3663 3664 for (unsigned Idx = 0, Size = PromotableAllocas.size(); Idx != Size; ++Idx) { 3665 AllocaInst *AI = PromotableAllocas[Idx]; 3666 for (Value::use_iterator UI = AI->use_begin(), UE = AI->use_end(); 3667 UI != UE;) { 3668 Instruction *I = cast<Instruction>(*UI++); 3669 // FIXME: Currently the SSAUpdater infrastructure doesn't reason about 3670 // lifetime intrinsics and so we strip them (and the bitcasts+GEPs 3671 // leading to them) here. Eventually it should use them to optimize the 3672 // scalar values produced. 3673 if (isa<BitCastInst>(I) || isa<GetElementPtrInst>(I)) { 3674 assert(onlyUsedByLifetimeMarkers(I) && 3675 "Found a bitcast used outside of a lifetime marker."); 3676 while (!I->use_empty()) 3677 cast<Instruction>(*I->use_begin())->eraseFromParent(); 3678 I->eraseFromParent(); 3679 continue; 3680 } 3681 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { 3682 assert(II->getIntrinsicID() == Intrinsic::lifetime_start || 3683 II->getIntrinsicID() == Intrinsic::lifetime_end); 3684 II->eraseFromParent(); 3685 continue; 3686 } 3687 3688 Insts.push_back(I); 3689 } 3690 AllocaPromoter(Insts, SSA, *AI, DIB).run(Insts); 3691 Insts.clear(); 3692 } 3693 3694 PromotableAllocas.clear(); 3695 return true; 3696} 3697 3698namespace { 3699 /// \brief A predicate to test whether an alloca belongs to a set. 3700 class IsAllocaInSet { 3701 typedef SmallPtrSet<AllocaInst *, 4> SetType; 3702 const SetType &Set; 3703 3704 public: 3705 typedef AllocaInst *argument_type; 3706 3707 IsAllocaInSet(const SetType &Set) : Set(Set) {} 3708 bool operator()(AllocaInst *AI) const { return Set.count(AI); } 3709 }; 3710} 3711 3712bool SROA::runOnFunction(Function &F) { 3713 DEBUG(dbgs() << "SROA function: " << F.getName() << "\n"); 3714 C = &F.getContext(); 3715 TD = getAnalysisIfAvailable<DataLayout>(); 3716 if (!TD) { 3717 DEBUG(dbgs() << " Skipping SROA -- no target data!\n"); 3718 return false; 3719 } 3720 DT = getAnalysisIfAvailable<DominatorTree>(); 3721 3722 BasicBlock &EntryBB = F.getEntryBlock(); 3723 for (BasicBlock::iterator I = EntryBB.begin(), E = llvm::prior(EntryBB.end()); 3724 I != E; ++I) 3725 if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) 3726 Worklist.insert(AI); 3727 3728 bool Changed = false; 3729 // A set of deleted alloca instruction pointers which should be removed from 3730 // the list of promotable allocas. 3731 SmallPtrSet<AllocaInst *, 4> DeletedAllocas; 3732 3733 do { 3734 while (!Worklist.empty()) { 3735 Changed |= runOnAlloca(*Worklist.pop_back_val()); 3736 deleteDeadInstructions(DeletedAllocas); 3737 3738 // Remove the deleted allocas from various lists so that we don't try to 3739 // continue processing them. 3740 if (!DeletedAllocas.empty()) { 3741 Worklist.remove_if(IsAllocaInSet(DeletedAllocas)); 3742 PostPromotionWorklist.remove_if(IsAllocaInSet(DeletedAllocas)); 3743 PromotableAllocas.erase(std::remove_if(PromotableAllocas.begin(), 3744 PromotableAllocas.end(), 3745 IsAllocaInSet(DeletedAllocas)), 3746 PromotableAllocas.end()); 3747 DeletedAllocas.clear(); 3748 } 3749 } 3750 3751 Changed |= promoteAllocas(F); 3752 3753 Worklist = PostPromotionWorklist; 3754 PostPromotionWorklist.clear(); 3755 } while (!Worklist.empty()); 3756 3757 return Changed; 3758} 3759 3760void SROA::getAnalysisUsage(AnalysisUsage &AU) const { 3761 if (RequiresDomTree) 3762 AU.addRequired<DominatorTree>(); 3763 AU.setPreservesCFG(); 3764} 3765