GlobalOpt.cpp revision 263508
1//===- GlobalOpt.cpp - Optimize Global Variables --------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This pass transforms simple global variables that never have their address
11// taken.  If obviously true, it marks read/write globals as constant, deletes
12// variables only stored to, etc.
13//
14//===----------------------------------------------------------------------===//
15
16#define DEBUG_TYPE "globalopt"
17#include "llvm/Transforms/IPO.h"
18#include "llvm/ADT/DenseMap.h"
19#include "llvm/ADT/STLExtras.h"
20#include "llvm/ADT/SmallPtrSet.h"
21#include "llvm/ADT/SmallVector.h"
22#include "llvm/ADT/Statistic.h"
23#include "llvm/Analysis/ConstantFolding.h"
24#include "llvm/Analysis/MemoryBuiltins.h"
25#include "llvm/IR/CallingConv.h"
26#include "llvm/IR/Constants.h"
27#include "llvm/IR/DataLayout.h"
28#include "llvm/IR/DerivedTypes.h"
29#include "llvm/IR/Instructions.h"
30#include "llvm/IR/IntrinsicInst.h"
31#include "llvm/IR/Module.h"
32#include "llvm/IR/Operator.h"
33#include "llvm/Pass.h"
34#include "llvm/Support/CallSite.h"
35#include "llvm/Support/Debug.h"
36#include "llvm/Support/ErrorHandling.h"
37#include "llvm/Support/GetElementPtrTypeIterator.h"
38#include "llvm/Support/MathExtras.h"
39#include "llvm/Support/raw_ostream.h"
40#include "llvm/Support/ValueHandle.h"
41#include "llvm/Target/TargetLibraryInfo.h"
42#include "llvm/Transforms/Utils/GlobalStatus.h"
43#include "llvm/Transforms/Utils/ModuleUtils.h"
44#include <algorithm>
45using namespace llvm;
46
47STATISTIC(NumMarked    , "Number of globals marked constant");
48STATISTIC(NumUnnamed   , "Number of globals marked unnamed_addr");
49STATISTIC(NumSRA       , "Number of aggregate globals broken into scalars");
50STATISTIC(NumHeapSRA   , "Number of heap objects SRA'd");
51STATISTIC(NumSubstitute,"Number of globals with initializers stored into them");
52STATISTIC(NumDeleted   , "Number of globals deleted");
53STATISTIC(NumFnDeleted , "Number of functions deleted");
54STATISTIC(NumGlobUses  , "Number of global uses devirtualized");
55STATISTIC(NumLocalized , "Number of globals localized");
56STATISTIC(NumShrunkToBool  , "Number of global vars shrunk to booleans");
57STATISTIC(NumFastCallFns   , "Number of functions converted to fastcc");
58STATISTIC(NumCtorsEvaluated, "Number of static ctors evaluated");
59STATISTIC(NumNestRemoved   , "Number of nest attributes removed");
60STATISTIC(NumAliasesResolved, "Number of global aliases resolved");
61STATISTIC(NumAliasesRemoved, "Number of global aliases eliminated");
62STATISTIC(NumCXXDtorsRemoved, "Number of global C++ destructors removed");
63
64namespace {
65  struct GlobalOpt : public ModulePass {
66    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
67      AU.addRequired<TargetLibraryInfo>();
68    }
69    static char ID; // Pass identification, replacement for typeid
70    GlobalOpt() : ModulePass(ID) {
71      initializeGlobalOptPass(*PassRegistry::getPassRegistry());
72    }
73
74    bool runOnModule(Module &M);
75
76  private:
77    GlobalVariable *FindGlobalCtors(Module &M);
78    bool OptimizeFunctions(Module &M);
79    bool OptimizeGlobalVars(Module &M);
80    bool OptimizeGlobalAliases(Module &M);
81    bool OptimizeGlobalCtorsList(GlobalVariable *&GCL);
82    bool ProcessGlobal(GlobalVariable *GV,Module::global_iterator &GVI);
83    bool ProcessInternalGlobal(GlobalVariable *GV,Module::global_iterator &GVI,
84                               const GlobalStatus &GS);
85    bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn);
86
87    DataLayout *TD;
88    TargetLibraryInfo *TLI;
89  };
90}
91
92char GlobalOpt::ID = 0;
93INITIALIZE_PASS_BEGIN(GlobalOpt, "globalopt",
94                "Global Variable Optimizer", false, false)
95INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
96INITIALIZE_PASS_END(GlobalOpt, "globalopt",
97                "Global Variable Optimizer", false, false)
98
99ModulePass *llvm::createGlobalOptimizerPass() { return new GlobalOpt(); }
100
101/// isLeakCheckerRoot - Is this global variable possibly used by a leak checker
102/// as a root?  If so, we might not really want to eliminate the stores to it.
103static bool isLeakCheckerRoot(GlobalVariable *GV) {
104  // A global variable is a root if it is a pointer, or could plausibly contain
105  // a pointer.  There are two challenges; one is that we could have a struct
106  // the has an inner member which is a pointer.  We recurse through the type to
107  // detect these (up to a point).  The other is that we may actually be a union
108  // of a pointer and another type, and so our LLVM type is an integer which
109  // gets converted into a pointer, or our type is an [i8 x #] with a pointer
110  // potentially contained here.
111
112  if (GV->hasPrivateLinkage())
113    return false;
114
115  SmallVector<Type *, 4> Types;
116  Types.push_back(cast<PointerType>(GV->getType())->getElementType());
117
118  unsigned Limit = 20;
119  do {
120    Type *Ty = Types.pop_back_val();
121    switch (Ty->getTypeID()) {
122      default: break;
123      case Type::PointerTyID: return true;
124      case Type::ArrayTyID:
125      case Type::VectorTyID: {
126        SequentialType *STy = cast<SequentialType>(Ty);
127        Types.push_back(STy->getElementType());
128        break;
129      }
130      case Type::StructTyID: {
131        StructType *STy = cast<StructType>(Ty);
132        if (STy->isOpaque()) return true;
133        for (StructType::element_iterator I = STy->element_begin(),
134                 E = STy->element_end(); I != E; ++I) {
135          Type *InnerTy = *I;
136          if (isa<PointerType>(InnerTy)) return true;
137          if (isa<CompositeType>(InnerTy))
138            Types.push_back(InnerTy);
139        }
140        break;
141      }
142    }
143    if (--Limit == 0) return true;
144  } while (!Types.empty());
145  return false;
146}
147
148/// Given a value that is stored to a global but never read, determine whether
149/// it's safe to remove the store and the chain of computation that feeds the
150/// store.
151static bool IsSafeComputationToRemove(Value *V, const TargetLibraryInfo *TLI) {
152  do {
153    if (isa<Constant>(V))
154      return true;
155    if (!V->hasOneUse())
156      return false;
157    if (isa<LoadInst>(V) || isa<InvokeInst>(V) || isa<Argument>(V) ||
158        isa<GlobalValue>(V))
159      return false;
160    if (isAllocationFn(V, TLI))
161      return true;
162
163    Instruction *I = cast<Instruction>(V);
164    if (I->mayHaveSideEffects())
165      return false;
166    if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
167      if (!GEP->hasAllConstantIndices())
168        return false;
169    } else if (I->getNumOperands() != 1) {
170      return false;
171    }
172
173    V = I->getOperand(0);
174  } while (1);
175}
176
177/// CleanupPointerRootUsers - This GV is a pointer root.  Loop over all users
178/// of the global and clean up any that obviously don't assign the global a
179/// value that isn't dynamically allocated.
180///
181static bool CleanupPointerRootUsers(GlobalVariable *GV,
182                                    const TargetLibraryInfo *TLI) {
183  // A brief explanation of leak checkers.  The goal is to find bugs where
184  // pointers are forgotten, causing an accumulating growth in memory
185  // usage over time.  The common strategy for leak checkers is to whitelist the
186  // memory pointed to by globals at exit.  This is popular because it also
187  // solves another problem where the main thread of a C++ program may shut down
188  // before other threads that are still expecting to use those globals.  To
189  // handle that case, we expect the program may create a singleton and never
190  // destroy it.
191
192  bool Changed = false;
193
194  // If Dead[n].first is the only use of a malloc result, we can delete its
195  // chain of computation and the store to the global in Dead[n].second.
196  SmallVector<std::pair<Instruction *, Instruction *>, 32> Dead;
197
198  // Constants can't be pointers to dynamically allocated memory.
199  for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end();
200       UI != E;) {
201    User *U = *UI++;
202    if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
203      Value *V = SI->getValueOperand();
204      if (isa<Constant>(V)) {
205        Changed = true;
206        SI->eraseFromParent();
207      } else if (Instruction *I = dyn_cast<Instruction>(V)) {
208        if (I->hasOneUse())
209          Dead.push_back(std::make_pair(I, SI));
210      }
211    } else if (MemSetInst *MSI = dyn_cast<MemSetInst>(U)) {
212      if (isa<Constant>(MSI->getValue())) {
213        Changed = true;
214        MSI->eraseFromParent();
215      } else if (Instruction *I = dyn_cast<Instruction>(MSI->getValue())) {
216        if (I->hasOneUse())
217          Dead.push_back(std::make_pair(I, MSI));
218      }
219    } else if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(U)) {
220      GlobalVariable *MemSrc = dyn_cast<GlobalVariable>(MTI->getSource());
221      if (MemSrc && MemSrc->isConstant()) {
222        Changed = true;
223        MTI->eraseFromParent();
224      } else if (Instruction *I = dyn_cast<Instruction>(MemSrc)) {
225        if (I->hasOneUse())
226          Dead.push_back(std::make_pair(I, MTI));
227      }
228    } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
229      if (CE->use_empty()) {
230        CE->destroyConstant();
231        Changed = true;
232      }
233    } else if (Constant *C = dyn_cast<Constant>(U)) {
234      if (isSafeToDestroyConstant(C)) {
235        C->destroyConstant();
236        // This could have invalidated UI, start over from scratch.
237        Dead.clear();
238        CleanupPointerRootUsers(GV, TLI);
239        return true;
240      }
241    }
242  }
243
244  for (int i = 0, e = Dead.size(); i != e; ++i) {
245    if (IsSafeComputationToRemove(Dead[i].first, TLI)) {
246      Dead[i].second->eraseFromParent();
247      Instruction *I = Dead[i].first;
248      do {
249        if (isAllocationFn(I, TLI))
250          break;
251        Instruction *J = dyn_cast<Instruction>(I->getOperand(0));
252        if (!J)
253          break;
254        I->eraseFromParent();
255        I = J;
256      } while (1);
257      I->eraseFromParent();
258    }
259  }
260
261  return Changed;
262}
263
264/// CleanupConstantGlobalUsers - We just marked GV constant.  Loop over all
265/// users of the global, cleaning up the obvious ones.  This is largely just a
266/// quick scan over the use list to clean up the easy and obvious cruft.  This
267/// returns true if it made a change.
268static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
269                                       DataLayout *TD, TargetLibraryInfo *TLI) {
270  bool Changed = false;
271  // Note that we need to use a weak value handle for the worklist items. When
272  // we delete a constant array, we may also be holding pointer to one of its
273  // elements (or an element of one of its elements if we're dealing with an
274  // array of arrays) in the worklist.
275  SmallVector<WeakVH, 8> WorkList(V->use_begin(), V->use_end());
276  while (!WorkList.empty()) {
277    Value *UV = WorkList.pop_back_val();
278    if (!UV)
279      continue;
280
281    User *U = cast<User>(UV);
282
283    if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
284      if (Init) {
285        // Replace the load with the initializer.
286        LI->replaceAllUsesWith(Init);
287        LI->eraseFromParent();
288        Changed = true;
289      }
290    } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
291      // Store must be unreachable or storing Init into the global.
292      SI->eraseFromParent();
293      Changed = true;
294    } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
295      if (CE->getOpcode() == Instruction::GetElementPtr) {
296        Constant *SubInit = 0;
297        if (Init)
298          SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE);
299        Changed |= CleanupConstantGlobalUsers(CE, SubInit, TD, TLI);
300      } else if (CE->getOpcode() == Instruction::BitCast &&
301                 CE->getType()->isPointerTy()) {
302        // Pointer cast, delete any stores and memsets to the global.
303        Changed |= CleanupConstantGlobalUsers(CE, 0, TD, TLI);
304      }
305
306      if (CE->use_empty()) {
307        CE->destroyConstant();
308        Changed = true;
309      }
310    } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
311      // Do not transform "gepinst (gep constexpr (GV))" here, because forming
312      // "gepconstexpr (gep constexpr (GV))" will cause the two gep's to fold
313      // and will invalidate our notion of what Init is.
314      Constant *SubInit = 0;
315      if (!isa<ConstantExpr>(GEP->getOperand(0))) {
316        ConstantExpr *CE =
317          dyn_cast_or_null<ConstantExpr>(ConstantFoldInstruction(GEP, TD, TLI));
318        if (Init && CE && CE->getOpcode() == Instruction::GetElementPtr)
319          SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE);
320
321        // If the initializer is an all-null value and we have an inbounds GEP,
322        // we already know what the result of any load from that GEP is.
323        // TODO: Handle splats.
324        if (Init && isa<ConstantAggregateZero>(Init) && GEP->isInBounds())
325          SubInit = Constant::getNullValue(GEP->getType()->getElementType());
326      }
327      Changed |= CleanupConstantGlobalUsers(GEP, SubInit, TD, TLI);
328
329      if (GEP->use_empty()) {
330        GEP->eraseFromParent();
331        Changed = true;
332      }
333    } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U)) { // memset/cpy/mv
334      if (MI->getRawDest() == V) {
335        MI->eraseFromParent();
336        Changed = true;
337      }
338
339    } else if (Constant *C = dyn_cast<Constant>(U)) {
340      // If we have a chain of dead constantexprs or other things dangling from
341      // us, and if they are all dead, nuke them without remorse.
342      if (isSafeToDestroyConstant(C)) {
343        C->destroyConstant();
344        CleanupConstantGlobalUsers(V, Init, TD, TLI);
345        return true;
346      }
347    }
348  }
349  return Changed;
350}
351
352/// isSafeSROAElementUse - Return true if the specified instruction is a safe
353/// user of a derived expression from a global that we want to SROA.
354static bool isSafeSROAElementUse(Value *V) {
355  // We might have a dead and dangling constant hanging off of here.
356  if (Constant *C = dyn_cast<Constant>(V))
357    return isSafeToDestroyConstant(C);
358
359  Instruction *I = dyn_cast<Instruction>(V);
360  if (!I) return false;
361
362  // Loads are ok.
363  if (isa<LoadInst>(I)) return true;
364
365  // Stores *to* the pointer are ok.
366  if (StoreInst *SI = dyn_cast<StoreInst>(I))
367    return SI->getOperand(0) != V;
368
369  // Otherwise, it must be a GEP.
370  GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I);
371  if (GEPI == 0) return false;
372
373  if (GEPI->getNumOperands() < 3 || !isa<Constant>(GEPI->getOperand(1)) ||
374      !cast<Constant>(GEPI->getOperand(1))->isNullValue())
375    return false;
376
377  for (Value::use_iterator I = GEPI->use_begin(), E = GEPI->use_end();
378       I != E; ++I)
379    if (!isSafeSROAElementUse(*I))
380      return false;
381  return true;
382}
383
384
385/// IsUserOfGlobalSafeForSRA - U is a direct user of the specified global value.
386/// Look at it and its uses and decide whether it is safe to SROA this global.
387///
388static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) {
389  // The user of the global must be a GEP Inst or a ConstantExpr GEP.
390  if (!isa<GetElementPtrInst>(U) &&
391      (!isa<ConstantExpr>(U) ||
392       cast<ConstantExpr>(U)->getOpcode() != Instruction::GetElementPtr))
393    return false;
394
395  // Check to see if this ConstantExpr GEP is SRA'able.  In particular, we
396  // don't like < 3 operand CE's, and we don't like non-constant integer
397  // indices.  This enforces that all uses are 'gep GV, 0, C, ...' for some
398  // value of C.
399  if (U->getNumOperands() < 3 || !isa<Constant>(U->getOperand(1)) ||
400      !cast<Constant>(U->getOperand(1))->isNullValue() ||
401      !isa<ConstantInt>(U->getOperand(2)))
402    return false;
403
404  gep_type_iterator GEPI = gep_type_begin(U), E = gep_type_end(U);
405  ++GEPI;  // Skip over the pointer index.
406
407  // If this is a use of an array allocation, do a bit more checking for sanity.
408  if (ArrayType *AT = dyn_cast<ArrayType>(*GEPI)) {
409    uint64_t NumElements = AT->getNumElements();
410    ConstantInt *Idx = cast<ConstantInt>(U->getOperand(2));
411
412    // Check to make sure that index falls within the array.  If not,
413    // something funny is going on, so we won't do the optimization.
414    //
415    if (Idx->getZExtValue() >= NumElements)
416      return false;
417
418    // We cannot scalar repl this level of the array unless any array
419    // sub-indices are in-range constants.  In particular, consider:
420    // A[0][i].  We cannot know that the user isn't doing invalid things like
421    // allowing i to index an out-of-range subscript that accesses A[1].
422    //
423    // Scalar replacing *just* the outer index of the array is probably not
424    // going to be a win anyway, so just give up.
425    for (++GEPI; // Skip array index.
426         GEPI != E;
427         ++GEPI) {
428      uint64_t NumElements;
429      if (ArrayType *SubArrayTy = dyn_cast<ArrayType>(*GEPI))
430        NumElements = SubArrayTy->getNumElements();
431      else if (VectorType *SubVectorTy = dyn_cast<VectorType>(*GEPI))
432        NumElements = SubVectorTy->getNumElements();
433      else {
434        assert((*GEPI)->isStructTy() &&
435               "Indexed GEP type is not array, vector, or struct!");
436        continue;
437      }
438
439      ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPI.getOperand());
440      if (!IdxVal || IdxVal->getZExtValue() >= NumElements)
441        return false;
442    }
443  }
444
445  for (Value::use_iterator I = U->use_begin(), E = U->use_end(); I != E; ++I)
446    if (!isSafeSROAElementUse(*I))
447      return false;
448  return true;
449}
450
451/// GlobalUsersSafeToSRA - Look at all uses of the global and decide whether it
452/// is safe for us to perform this transformation.
453///
454static bool GlobalUsersSafeToSRA(GlobalValue *GV) {
455  for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end();
456       UI != E; ++UI) {
457    if (!IsUserOfGlobalSafeForSRA(*UI, GV))
458      return false;
459  }
460  return true;
461}
462
463
464/// SRAGlobal - Perform scalar replacement of aggregates on the specified global
465/// variable.  This opens the door for other optimizations by exposing the
466/// behavior of the program in a more fine-grained way.  We have determined that
467/// this transformation is safe already.  We return the first global variable we
468/// insert so that the caller can reprocess it.
469static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &TD) {
470  // Make sure this global only has simple uses that we can SRA.
471  if (!GlobalUsersSafeToSRA(GV))
472    return 0;
473
474  assert(GV->hasLocalLinkage() && !GV->isConstant());
475  Constant *Init = GV->getInitializer();
476  Type *Ty = Init->getType();
477
478  std::vector<GlobalVariable*> NewGlobals;
479  Module::GlobalListType &Globals = GV->getParent()->getGlobalList();
480
481  // Get the alignment of the global, either explicit or target-specific.
482  unsigned StartAlignment = GV->getAlignment();
483  if (StartAlignment == 0)
484    StartAlignment = TD.getABITypeAlignment(GV->getType());
485
486  if (StructType *STy = dyn_cast<StructType>(Ty)) {
487    NewGlobals.reserve(STy->getNumElements());
488    const StructLayout &Layout = *TD.getStructLayout(STy);
489    for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
490      Constant *In = Init->getAggregateElement(i);
491      assert(In && "Couldn't get element of initializer?");
492      GlobalVariable *NGV = new GlobalVariable(STy->getElementType(i), false,
493                                               GlobalVariable::InternalLinkage,
494                                               In, GV->getName()+"."+Twine(i),
495                                               GV->getThreadLocalMode(),
496                                              GV->getType()->getAddressSpace());
497      Globals.insert(GV, NGV);
498      NewGlobals.push_back(NGV);
499
500      // Calculate the known alignment of the field.  If the original aggregate
501      // had 256 byte alignment for example, something might depend on that:
502      // propagate info to each field.
503      uint64_t FieldOffset = Layout.getElementOffset(i);
504      unsigned NewAlign = (unsigned)MinAlign(StartAlignment, FieldOffset);
505      if (NewAlign > TD.getABITypeAlignment(STy->getElementType(i)))
506        NGV->setAlignment(NewAlign);
507    }
508  } else if (SequentialType *STy = dyn_cast<SequentialType>(Ty)) {
509    unsigned NumElements = 0;
510    if (ArrayType *ATy = dyn_cast<ArrayType>(STy))
511      NumElements = ATy->getNumElements();
512    else
513      NumElements = cast<VectorType>(STy)->getNumElements();
514
515    if (NumElements > 16 && GV->hasNUsesOrMore(16))
516      return 0; // It's not worth it.
517    NewGlobals.reserve(NumElements);
518
519    uint64_t EltSize = TD.getTypeAllocSize(STy->getElementType());
520    unsigned EltAlign = TD.getABITypeAlignment(STy->getElementType());
521    for (unsigned i = 0, e = NumElements; i != e; ++i) {
522      Constant *In = Init->getAggregateElement(i);
523      assert(In && "Couldn't get element of initializer?");
524
525      GlobalVariable *NGV = new GlobalVariable(STy->getElementType(), false,
526                                               GlobalVariable::InternalLinkage,
527                                               In, GV->getName()+"."+Twine(i),
528                                               GV->getThreadLocalMode(),
529                                              GV->getType()->getAddressSpace());
530      Globals.insert(GV, NGV);
531      NewGlobals.push_back(NGV);
532
533      // Calculate the known alignment of the field.  If the original aggregate
534      // had 256 byte alignment for example, something might depend on that:
535      // propagate info to each field.
536      unsigned NewAlign = (unsigned)MinAlign(StartAlignment, EltSize*i);
537      if (NewAlign > EltAlign)
538        NGV->setAlignment(NewAlign);
539    }
540  }
541
542  if (NewGlobals.empty())
543    return 0;
544
545  DEBUG(dbgs() << "PERFORMING GLOBAL SRA ON: " << *GV);
546
547  Constant *NullInt =Constant::getNullValue(Type::getInt32Ty(GV->getContext()));
548
549  // Loop over all of the uses of the global, replacing the constantexpr geps,
550  // with smaller constantexpr geps or direct references.
551  while (!GV->use_empty()) {
552    User *GEP = GV->use_back();
553    assert(((isa<ConstantExpr>(GEP) &&
554             cast<ConstantExpr>(GEP)->getOpcode()==Instruction::GetElementPtr)||
555            isa<GetElementPtrInst>(GEP)) && "NonGEP CE's are not SRAable!");
556
557    // Ignore the 1th operand, which has to be zero or else the program is quite
558    // broken (undefined).  Get the 2nd operand, which is the structure or array
559    // index.
560    unsigned Val = cast<ConstantInt>(GEP->getOperand(2))->getZExtValue();
561    if (Val >= NewGlobals.size()) Val = 0; // Out of bound array access.
562
563    Value *NewPtr = NewGlobals[Val];
564
565    // Form a shorter GEP if needed.
566    if (GEP->getNumOperands() > 3) {
567      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(GEP)) {
568        SmallVector<Constant*, 8> Idxs;
569        Idxs.push_back(NullInt);
570        for (unsigned i = 3, e = CE->getNumOperands(); i != e; ++i)
571          Idxs.push_back(CE->getOperand(i));
572        NewPtr = ConstantExpr::getGetElementPtr(cast<Constant>(NewPtr), Idxs);
573      } else {
574        GetElementPtrInst *GEPI = cast<GetElementPtrInst>(GEP);
575        SmallVector<Value*, 8> Idxs;
576        Idxs.push_back(NullInt);
577        for (unsigned i = 3, e = GEPI->getNumOperands(); i != e; ++i)
578          Idxs.push_back(GEPI->getOperand(i));
579        NewPtr = GetElementPtrInst::Create(NewPtr, Idxs,
580                                           GEPI->getName()+"."+Twine(Val),GEPI);
581      }
582    }
583    GEP->replaceAllUsesWith(NewPtr);
584
585    if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(GEP))
586      GEPI->eraseFromParent();
587    else
588      cast<ConstantExpr>(GEP)->destroyConstant();
589  }
590
591  // Delete the old global, now that it is dead.
592  Globals.erase(GV);
593  ++NumSRA;
594
595  // Loop over the new globals array deleting any globals that are obviously
596  // dead.  This can arise due to scalarization of a structure or an array that
597  // has elements that are dead.
598  unsigned FirstGlobal = 0;
599  for (unsigned i = 0, e = NewGlobals.size(); i != e; ++i)
600    if (NewGlobals[i]->use_empty()) {
601      Globals.erase(NewGlobals[i]);
602      if (FirstGlobal == i) ++FirstGlobal;
603    }
604
605  return FirstGlobal != NewGlobals.size() ? NewGlobals[FirstGlobal] : 0;
606}
607
608/// AllUsesOfValueWillTrapIfNull - Return true if all users of the specified
609/// value will trap if the value is dynamically null.  PHIs keeps track of any
610/// phi nodes we've seen to avoid reprocessing them.
611static bool AllUsesOfValueWillTrapIfNull(const Value *V,
612                                         SmallPtrSet<const PHINode*, 8> &PHIs) {
613  for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;
614       ++UI) {
615    const User *U = *UI;
616
617    if (isa<LoadInst>(U)) {
618      // Will trap.
619    } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
620      if (SI->getOperand(0) == V) {
621        //cerr << "NONTRAPPING USE: " << *U;
622        return false;  // Storing the value.
623      }
624    } else if (const CallInst *CI = dyn_cast<CallInst>(U)) {
625      if (CI->getCalledValue() != V) {
626        //cerr << "NONTRAPPING USE: " << *U;
627        return false;  // Not calling the ptr
628      }
629    } else if (const InvokeInst *II = dyn_cast<InvokeInst>(U)) {
630      if (II->getCalledValue() != V) {
631        //cerr << "NONTRAPPING USE: " << *U;
632        return false;  // Not calling the ptr
633      }
634    } else if (const BitCastInst *CI = dyn_cast<BitCastInst>(U)) {
635      if (!AllUsesOfValueWillTrapIfNull(CI, PHIs)) return false;
636    } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) {
637      if (!AllUsesOfValueWillTrapIfNull(GEPI, PHIs)) return false;
638    } else if (const PHINode *PN = dyn_cast<PHINode>(U)) {
639      // If we've already seen this phi node, ignore it, it has already been
640      // checked.
641      if (PHIs.insert(PN) && !AllUsesOfValueWillTrapIfNull(PN, PHIs))
642        return false;
643    } else if (isa<ICmpInst>(U) &&
644               isa<ConstantPointerNull>(UI->getOperand(1))) {
645      // Ignore icmp X, null
646    } else {
647      //cerr << "NONTRAPPING USE: " << *U;
648      return false;
649    }
650  }
651  return true;
652}
653
654/// AllUsesOfLoadedValueWillTrapIfNull - Return true if all uses of any loads
655/// from GV will trap if the loaded value is null.  Note that this also permits
656/// comparisons of the loaded value against null, as a special case.
657static bool AllUsesOfLoadedValueWillTrapIfNull(const GlobalVariable *GV) {
658  for (Value::const_use_iterator UI = GV->use_begin(), E = GV->use_end();
659       UI != E; ++UI) {
660    const User *U = *UI;
661
662    if (const LoadInst *LI = dyn_cast<LoadInst>(U)) {
663      SmallPtrSet<const PHINode*, 8> PHIs;
664      if (!AllUsesOfValueWillTrapIfNull(LI, PHIs))
665        return false;
666    } else if (isa<StoreInst>(U)) {
667      // Ignore stores to the global.
668    } else {
669      // We don't know or understand this user, bail out.
670      //cerr << "UNKNOWN USER OF GLOBAL!: " << *U;
671      return false;
672    }
673  }
674  return true;
675}
676
677static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) {
678  bool Changed = false;
679  for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ) {
680    Instruction *I = cast<Instruction>(*UI++);
681    if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
682      LI->setOperand(0, NewV);
683      Changed = true;
684    } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
685      if (SI->getOperand(1) == V) {
686        SI->setOperand(1, NewV);
687        Changed = true;
688      }
689    } else if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
690      CallSite CS(I);
691      if (CS.getCalledValue() == V) {
692        // Calling through the pointer!  Turn into a direct call, but be careful
693        // that the pointer is not also being passed as an argument.
694        CS.setCalledFunction(NewV);
695        Changed = true;
696        bool PassedAsArg = false;
697        for (unsigned i = 0, e = CS.arg_size(); i != e; ++i)
698          if (CS.getArgument(i) == V) {
699            PassedAsArg = true;
700            CS.setArgument(i, NewV);
701          }
702
703        if (PassedAsArg) {
704          // Being passed as an argument also.  Be careful to not invalidate UI!
705          UI = V->use_begin();
706        }
707      }
708    } else if (CastInst *CI = dyn_cast<CastInst>(I)) {
709      Changed |= OptimizeAwayTrappingUsesOfValue(CI,
710                                ConstantExpr::getCast(CI->getOpcode(),
711                                                      NewV, CI->getType()));
712      if (CI->use_empty()) {
713        Changed = true;
714        CI->eraseFromParent();
715      }
716    } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
717      // Should handle GEP here.
718      SmallVector<Constant*, 8> Idxs;
719      Idxs.reserve(GEPI->getNumOperands()-1);
720      for (User::op_iterator i = GEPI->op_begin() + 1, e = GEPI->op_end();
721           i != e; ++i)
722        if (Constant *C = dyn_cast<Constant>(*i))
723          Idxs.push_back(C);
724        else
725          break;
726      if (Idxs.size() == GEPI->getNumOperands()-1)
727        Changed |= OptimizeAwayTrappingUsesOfValue(GEPI,
728                          ConstantExpr::getGetElementPtr(NewV, Idxs));
729      if (GEPI->use_empty()) {
730        Changed = true;
731        GEPI->eraseFromParent();
732      }
733    }
734  }
735
736  return Changed;
737}
738
739
740/// OptimizeAwayTrappingUsesOfLoads - The specified global has only one non-null
741/// value stored into it.  If there are uses of the loaded value that would trap
742/// if the loaded value is dynamically null, then we know that they cannot be
743/// reachable with a null optimize away the load.
744static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV,
745                                            DataLayout *TD,
746                                            TargetLibraryInfo *TLI) {
747  bool Changed = false;
748
749  // Keep track of whether we are able to remove all the uses of the global
750  // other than the store that defines it.
751  bool AllNonStoreUsesGone = true;
752
753  // Replace all uses of loads with uses of uses of the stored value.
754  for (Value::use_iterator GUI = GV->use_begin(), E = GV->use_end(); GUI != E;){
755    User *GlobalUser = *GUI++;
756    if (LoadInst *LI = dyn_cast<LoadInst>(GlobalUser)) {
757      Changed |= OptimizeAwayTrappingUsesOfValue(LI, LV);
758      // If we were able to delete all uses of the loads
759      if (LI->use_empty()) {
760        LI->eraseFromParent();
761        Changed = true;
762      } else {
763        AllNonStoreUsesGone = false;
764      }
765    } else if (isa<StoreInst>(GlobalUser)) {
766      // Ignore the store that stores "LV" to the global.
767      assert(GlobalUser->getOperand(1) == GV &&
768             "Must be storing *to* the global");
769    } else {
770      AllNonStoreUsesGone = false;
771
772      // If we get here we could have other crazy uses that are transitively
773      // loaded.
774      assert((isa<PHINode>(GlobalUser) || isa<SelectInst>(GlobalUser) ||
775              isa<ConstantExpr>(GlobalUser) || isa<CmpInst>(GlobalUser) ||
776              isa<BitCastInst>(GlobalUser) ||
777              isa<GetElementPtrInst>(GlobalUser)) &&
778             "Only expect load and stores!");
779    }
780  }
781
782  if (Changed) {
783    DEBUG(dbgs() << "OPTIMIZED LOADS FROM STORED ONCE POINTER: " << *GV);
784    ++NumGlobUses;
785  }
786
787  // If we nuked all of the loads, then none of the stores are needed either,
788  // nor is the global.
789  if (AllNonStoreUsesGone) {
790    if (isLeakCheckerRoot(GV)) {
791      Changed |= CleanupPointerRootUsers(GV, TLI);
792    } else {
793      Changed = true;
794      CleanupConstantGlobalUsers(GV, 0, TD, TLI);
795    }
796    if (GV->use_empty()) {
797      DEBUG(dbgs() << "  *** GLOBAL NOW DEAD!\n");
798      Changed = true;
799      GV->eraseFromParent();
800      ++NumDeleted;
801    }
802  }
803  return Changed;
804}
805
806/// ConstantPropUsersOf - Walk the use list of V, constant folding all of the
807/// instructions that are foldable.
808static void ConstantPropUsersOf(Value *V,
809                                DataLayout *TD, TargetLibraryInfo *TLI) {
810  for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; )
811    if (Instruction *I = dyn_cast<Instruction>(*UI++))
812      if (Constant *NewC = ConstantFoldInstruction(I, TD, TLI)) {
813        I->replaceAllUsesWith(NewC);
814
815        // Advance UI to the next non-I use to avoid invalidating it!
816        // Instructions could multiply use V.
817        while (UI != E && *UI == I)
818          ++UI;
819        I->eraseFromParent();
820      }
821}
822
823/// OptimizeGlobalAddressOfMalloc - This function takes the specified global
824/// variable, and transforms the program as if it always contained the result of
825/// the specified malloc.  Because it is always the result of the specified
826/// malloc, there is no reason to actually DO the malloc.  Instead, turn the
827/// malloc into a global, and any loads of GV as uses of the new global.
828static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
829                                                     CallInst *CI,
830                                                     Type *AllocTy,
831                                                     ConstantInt *NElements,
832                                                     DataLayout *TD,
833                                                     TargetLibraryInfo *TLI) {
834  DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << "  CALL = " << *CI << '\n');
835
836  Type *GlobalType;
837  if (NElements->getZExtValue() == 1)
838    GlobalType = AllocTy;
839  else
840    // If we have an array allocation, the global variable is of an array.
841    GlobalType = ArrayType::get(AllocTy, NElements->getZExtValue());
842
843  // Create the new global variable.  The contents of the malloc'd memory is
844  // undefined, so initialize with an undef value.
845  GlobalVariable *NewGV = new GlobalVariable(*GV->getParent(),
846                                             GlobalType, false,
847                                             GlobalValue::InternalLinkage,
848                                             UndefValue::get(GlobalType),
849                                             GV->getName()+".body",
850                                             GV,
851                                             GV->getThreadLocalMode());
852
853  // If there are bitcast users of the malloc (which is typical, usually we have
854  // a malloc + bitcast) then replace them with uses of the new global.  Update
855  // other users to use the global as well.
856  BitCastInst *TheBC = 0;
857  while (!CI->use_empty()) {
858    Instruction *User = cast<Instruction>(CI->use_back());
859    if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) {
860      if (BCI->getType() == NewGV->getType()) {
861        BCI->replaceAllUsesWith(NewGV);
862        BCI->eraseFromParent();
863      } else {
864        BCI->setOperand(0, NewGV);
865      }
866    } else {
867      if (TheBC == 0)
868        TheBC = new BitCastInst(NewGV, CI->getType(), "newgv", CI);
869      User->replaceUsesOfWith(CI, TheBC);
870    }
871  }
872
873  Constant *RepValue = NewGV;
874  if (NewGV->getType() != GV->getType()->getElementType())
875    RepValue = ConstantExpr::getBitCast(RepValue,
876                                        GV->getType()->getElementType());
877
878  // If there is a comparison against null, we will insert a global bool to
879  // keep track of whether the global was initialized yet or not.
880  GlobalVariable *InitBool =
881    new GlobalVariable(Type::getInt1Ty(GV->getContext()), false,
882                       GlobalValue::InternalLinkage,
883                       ConstantInt::getFalse(GV->getContext()),
884                       GV->getName()+".init", GV->getThreadLocalMode());
885  bool InitBoolUsed = false;
886
887  // Loop over all uses of GV, processing them in turn.
888  while (!GV->use_empty()) {
889    if (StoreInst *SI = dyn_cast<StoreInst>(GV->use_back())) {
890      // The global is initialized when the store to it occurs.
891      new StoreInst(ConstantInt::getTrue(GV->getContext()), InitBool, false, 0,
892                    SI->getOrdering(), SI->getSynchScope(), SI);
893      SI->eraseFromParent();
894      continue;
895    }
896
897    LoadInst *LI = cast<LoadInst>(GV->use_back());
898    while (!LI->use_empty()) {
899      Use &LoadUse = LI->use_begin().getUse();
900      if (!isa<ICmpInst>(LoadUse.getUser())) {
901        LoadUse = RepValue;
902        continue;
903      }
904
905      ICmpInst *ICI = cast<ICmpInst>(LoadUse.getUser());
906      // Replace the cmp X, 0 with a use of the bool value.
907      // Sink the load to where the compare was, if atomic rules allow us to.
908      Value *LV = new LoadInst(InitBool, InitBool->getName()+".val", false, 0,
909                               LI->getOrdering(), LI->getSynchScope(),
910                               LI->isUnordered() ? (Instruction*)ICI : LI);
911      InitBoolUsed = true;
912      switch (ICI->getPredicate()) {
913      default: llvm_unreachable("Unknown ICmp Predicate!");
914      case ICmpInst::ICMP_ULT:
915      case ICmpInst::ICMP_SLT:   // X < null -> always false
916        LV = ConstantInt::getFalse(GV->getContext());
917        break;
918      case ICmpInst::ICMP_ULE:
919      case ICmpInst::ICMP_SLE:
920      case ICmpInst::ICMP_EQ:
921        LV = BinaryOperator::CreateNot(LV, "notinit", ICI);
922        break;
923      case ICmpInst::ICMP_NE:
924      case ICmpInst::ICMP_UGE:
925      case ICmpInst::ICMP_SGE:
926      case ICmpInst::ICMP_UGT:
927      case ICmpInst::ICMP_SGT:
928        break;  // no change.
929      }
930      ICI->replaceAllUsesWith(LV);
931      ICI->eraseFromParent();
932    }
933    LI->eraseFromParent();
934  }
935
936  // If the initialization boolean was used, insert it, otherwise delete it.
937  if (!InitBoolUsed) {
938    while (!InitBool->use_empty())  // Delete initializations
939      cast<StoreInst>(InitBool->use_back())->eraseFromParent();
940    delete InitBool;
941  } else
942    GV->getParent()->getGlobalList().insert(GV, InitBool);
943
944  // Now the GV is dead, nuke it and the malloc..
945  GV->eraseFromParent();
946  CI->eraseFromParent();
947
948  // To further other optimizations, loop over all users of NewGV and try to
949  // constant prop them.  This will promote GEP instructions with constant
950  // indices into GEP constant-exprs, which will allow global-opt to hack on it.
951  ConstantPropUsersOf(NewGV, TD, TLI);
952  if (RepValue != NewGV)
953    ConstantPropUsersOf(RepValue, TD, TLI);
954
955  return NewGV;
956}
957
958/// ValueIsOnlyUsedLocallyOrStoredToOneGlobal - Scan the use-list of V checking
959/// to make sure that there are no complex uses of V.  We permit simple things
960/// like dereferencing the pointer, but not storing through the address, unless
961/// it is to the specified global.
962static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V,
963                                                      const GlobalVariable *GV,
964                                         SmallPtrSet<const PHINode*, 8> &PHIs) {
965  for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end();
966       UI != E; ++UI) {
967    const Instruction *Inst = cast<Instruction>(*UI);
968
969    if (isa<LoadInst>(Inst) || isa<CmpInst>(Inst)) {
970      continue; // Fine, ignore.
971    }
972
973    if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
974      if (SI->getOperand(0) == V && SI->getOperand(1) != GV)
975        return false;  // Storing the pointer itself... bad.
976      continue; // Otherwise, storing through it, or storing into GV... fine.
977    }
978
979    // Must index into the array and into the struct.
980    if (isa<GetElementPtrInst>(Inst) && Inst->getNumOperands() >= 3) {
981      if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(Inst, GV, PHIs))
982        return false;
983      continue;
984    }
985
986    if (const PHINode *PN = dyn_cast<PHINode>(Inst)) {
987      // PHIs are ok if all uses are ok.  Don't infinitely recurse through PHI
988      // cycles.
989      if (PHIs.insert(PN))
990        if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(PN, GV, PHIs))
991          return false;
992      continue;
993    }
994
995    if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Inst)) {
996      if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(BCI, GV, PHIs))
997        return false;
998      continue;
999    }
1000
1001    return false;
1002  }
1003  return true;
1004}
1005
1006/// ReplaceUsesOfMallocWithGlobal - The Alloc pointer is stored into GV
1007/// somewhere.  Transform all uses of the allocation into loads from the
1008/// global and uses of the resultant pointer.  Further, delete the store into
1009/// GV.  This assumes that these value pass the
1010/// 'ValueIsOnlyUsedLocallyOrStoredToOneGlobal' predicate.
1011static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc,
1012                                          GlobalVariable *GV) {
1013  while (!Alloc->use_empty()) {
1014    Instruction *U = cast<Instruction>(*Alloc->use_begin());
1015    Instruction *InsertPt = U;
1016    if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
1017      // If this is the store of the allocation into the global, remove it.
1018      if (SI->getOperand(1) == GV) {
1019        SI->eraseFromParent();
1020        continue;
1021      }
1022    } else if (PHINode *PN = dyn_cast<PHINode>(U)) {
1023      // Insert the load in the corresponding predecessor, not right before the
1024      // PHI.
1025      InsertPt = PN->getIncomingBlock(Alloc->use_begin())->getTerminator();
1026    } else if (isa<BitCastInst>(U)) {
1027      // Must be bitcast between the malloc and store to initialize the global.
1028      ReplaceUsesOfMallocWithGlobal(U, GV);
1029      U->eraseFromParent();
1030      continue;
1031    } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) {
1032      // If this is a "GEP bitcast" and the user is a store to the global, then
1033      // just process it as a bitcast.
1034      if (GEPI->hasAllZeroIndices() && GEPI->hasOneUse())
1035        if (StoreInst *SI = dyn_cast<StoreInst>(GEPI->use_back()))
1036          if (SI->getOperand(1) == GV) {
1037            // Must be bitcast GEP between the malloc and store to initialize
1038            // the global.
1039            ReplaceUsesOfMallocWithGlobal(GEPI, GV);
1040            GEPI->eraseFromParent();
1041            continue;
1042          }
1043    }
1044
1045    // Insert a load from the global, and use it instead of the malloc.
1046    Value *NL = new LoadInst(GV, GV->getName()+".val", InsertPt);
1047    U->replaceUsesOfWith(Alloc, NL);
1048  }
1049}
1050
1051/// LoadUsesSimpleEnoughForHeapSRA - Verify that all uses of V (a load, or a phi
1052/// of a load) are simple enough to perform heap SRA on.  This permits GEP's
1053/// that index through the array and struct field, icmps of null, and PHIs.
1054static bool LoadUsesSimpleEnoughForHeapSRA(const Value *V,
1055                        SmallPtrSet<const PHINode*, 32> &LoadUsingPHIs,
1056                        SmallPtrSet<const PHINode*, 32> &LoadUsingPHIsPerLoad) {
1057  // We permit two users of the load: setcc comparing against the null
1058  // pointer, and a getelementptr of a specific form.
1059  for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;
1060       ++UI) {
1061    const Instruction *User = cast<Instruction>(*UI);
1062
1063    // Comparison against null is ok.
1064    if (const ICmpInst *ICI = dyn_cast<ICmpInst>(User)) {
1065      if (!isa<ConstantPointerNull>(ICI->getOperand(1)))
1066        return false;
1067      continue;
1068    }
1069
1070    // getelementptr is also ok, but only a simple form.
1071    if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) {
1072      // Must index into the array and into the struct.
1073      if (GEPI->getNumOperands() < 3)
1074        return false;
1075
1076      // Otherwise the GEP is ok.
1077      continue;
1078    }
1079
1080    if (const PHINode *PN = dyn_cast<PHINode>(User)) {
1081      if (!LoadUsingPHIsPerLoad.insert(PN))
1082        // This means some phi nodes are dependent on each other.
1083        // Avoid infinite looping!
1084        return false;
1085      if (!LoadUsingPHIs.insert(PN))
1086        // If we have already analyzed this PHI, then it is safe.
1087        continue;
1088
1089      // Make sure all uses of the PHI are simple enough to transform.
1090      if (!LoadUsesSimpleEnoughForHeapSRA(PN,
1091                                          LoadUsingPHIs, LoadUsingPHIsPerLoad))
1092        return false;
1093
1094      continue;
1095    }
1096
1097    // Otherwise we don't know what this is, not ok.
1098    return false;
1099  }
1100
1101  return true;
1102}
1103
1104
1105/// AllGlobalLoadUsesSimpleEnoughForHeapSRA - If all users of values loaded from
1106/// GV are simple enough to perform HeapSRA, return true.
1107static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(const GlobalVariable *GV,
1108                                                    Instruction *StoredVal) {
1109  SmallPtrSet<const PHINode*, 32> LoadUsingPHIs;
1110  SmallPtrSet<const PHINode*, 32> LoadUsingPHIsPerLoad;
1111  for (Value::const_use_iterator UI = GV->use_begin(), E = GV->use_end();
1112       UI != E; ++UI)
1113    if (const LoadInst *LI = dyn_cast<LoadInst>(*UI)) {
1114      if (!LoadUsesSimpleEnoughForHeapSRA(LI, LoadUsingPHIs,
1115                                          LoadUsingPHIsPerLoad))
1116        return false;
1117      LoadUsingPHIsPerLoad.clear();
1118    }
1119
1120  // If we reach here, we know that all uses of the loads and transitive uses
1121  // (through PHI nodes) are simple enough to transform.  However, we don't know
1122  // that all inputs the to the PHI nodes are in the same equivalence sets.
1123  // Check to verify that all operands of the PHIs are either PHIS that can be
1124  // transformed, loads from GV, or MI itself.
1125  for (SmallPtrSet<const PHINode*, 32>::const_iterator I = LoadUsingPHIs.begin()
1126       , E = LoadUsingPHIs.end(); I != E; ++I) {
1127    const PHINode *PN = *I;
1128    for (unsigned op = 0, e = PN->getNumIncomingValues(); op != e; ++op) {
1129      Value *InVal = PN->getIncomingValue(op);
1130
1131      // PHI of the stored value itself is ok.
1132      if (InVal == StoredVal) continue;
1133
1134      if (const PHINode *InPN = dyn_cast<PHINode>(InVal)) {
1135        // One of the PHIs in our set is (optimistically) ok.
1136        if (LoadUsingPHIs.count(InPN))
1137          continue;
1138        return false;
1139      }
1140
1141      // Load from GV is ok.
1142      if (const LoadInst *LI = dyn_cast<LoadInst>(InVal))
1143        if (LI->getOperand(0) == GV)
1144          continue;
1145
1146      // UNDEF? NULL?
1147
1148      // Anything else is rejected.
1149      return false;
1150    }
1151  }
1152
1153  return true;
1154}
1155
1156static Value *GetHeapSROAValue(Value *V, unsigned FieldNo,
1157               DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
1158                   std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
1159  std::vector<Value*> &FieldVals = InsertedScalarizedValues[V];
1160
1161  if (FieldNo >= FieldVals.size())
1162    FieldVals.resize(FieldNo+1);
1163
1164  // If we already have this value, just reuse the previously scalarized
1165  // version.
1166  if (Value *FieldVal = FieldVals[FieldNo])
1167    return FieldVal;
1168
1169  // Depending on what instruction this is, we have several cases.
1170  Value *Result;
1171  if (LoadInst *LI = dyn_cast<LoadInst>(V)) {
1172    // This is a scalarized version of the load from the global.  Just create
1173    // a new Load of the scalarized global.
1174    Result = new LoadInst(GetHeapSROAValue(LI->getOperand(0), FieldNo,
1175                                           InsertedScalarizedValues,
1176                                           PHIsToRewrite),
1177                          LI->getName()+".f"+Twine(FieldNo), LI);
1178  } else if (PHINode *PN = dyn_cast<PHINode>(V)) {
1179    // PN's type is pointer to struct.  Make a new PHI of pointer to struct
1180    // field.
1181    StructType *ST = cast<StructType>(PN->getType()->getPointerElementType());
1182
1183    PHINode *NewPN =
1184     PHINode::Create(PointerType::getUnqual(ST->getElementType(FieldNo)),
1185                     PN->getNumIncomingValues(),
1186                     PN->getName()+".f"+Twine(FieldNo), PN);
1187    Result = NewPN;
1188    PHIsToRewrite.push_back(std::make_pair(PN, FieldNo));
1189  } else {
1190    llvm_unreachable("Unknown usable value");
1191  }
1192
1193  return FieldVals[FieldNo] = Result;
1194}
1195
1196/// RewriteHeapSROALoadUser - Given a load instruction and a value derived from
1197/// the load, rewrite the derived value to use the HeapSRoA'd load.
1198static void RewriteHeapSROALoadUser(Instruction *LoadUser,
1199             DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
1200                   std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
1201  // If this is a comparison against null, handle it.
1202  if (ICmpInst *SCI = dyn_cast<ICmpInst>(LoadUser)) {
1203    assert(isa<ConstantPointerNull>(SCI->getOperand(1)));
1204    // If we have a setcc of the loaded pointer, we can use a setcc of any
1205    // field.
1206    Value *NPtr = GetHeapSROAValue(SCI->getOperand(0), 0,
1207                                   InsertedScalarizedValues, PHIsToRewrite);
1208
1209    Value *New = new ICmpInst(SCI, SCI->getPredicate(), NPtr,
1210                              Constant::getNullValue(NPtr->getType()),
1211                              SCI->getName());
1212    SCI->replaceAllUsesWith(New);
1213    SCI->eraseFromParent();
1214    return;
1215  }
1216
1217  // Handle 'getelementptr Ptr, Idx, i32 FieldNo ...'
1218  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(LoadUser)) {
1219    assert(GEPI->getNumOperands() >= 3 && isa<ConstantInt>(GEPI->getOperand(2))
1220           && "Unexpected GEPI!");
1221
1222    // Load the pointer for this field.
1223    unsigned FieldNo = cast<ConstantInt>(GEPI->getOperand(2))->getZExtValue();
1224    Value *NewPtr = GetHeapSROAValue(GEPI->getOperand(0), FieldNo,
1225                                     InsertedScalarizedValues, PHIsToRewrite);
1226
1227    // Create the new GEP idx vector.
1228    SmallVector<Value*, 8> GEPIdx;
1229    GEPIdx.push_back(GEPI->getOperand(1));
1230    GEPIdx.append(GEPI->op_begin()+3, GEPI->op_end());
1231
1232    Value *NGEPI = GetElementPtrInst::Create(NewPtr, GEPIdx,
1233                                             GEPI->getName(), GEPI);
1234    GEPI->replaceAllUsesWith(NGEPI);
1235    GEPI->eraseFromParent();
1236    return;
1237  }
1238
1239  // Recursively transform the users of PHI nodes.  This will lazily create the
1240  // PHIs that are needed for individual elements.  Keep track of what PHIs we
1241  // see in InsertedScalarizedValues so that we don't get infinite loops (very
1242  // antisocial).  If the PHI is already in InsertedScalarizedValues, it has
1243  // already been seen first by another load, so its uses have already been
1244  // processed.
1245  PHINode *PN = cast<PHINode>(LoadUser);
1246  if (!InsertedScalarizedValues.insert(std::make_pair(PN,
1247                                              std::vector<Value*>())).second)
1248    return;
1249
1250  // If this is the first time we've seen this PHI, recursively process all
1251  // users.
1252  for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end(); UI != E; ) {
1253    Instruction *User = cast<Instruction>(*UI++);
1254    RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite);
1255  }
1256}
1257
1258/// RewriteUsesOfLoadForHeapSRoA - We are performing Heap SRoA on a global.  Ptr
1259/// is a value loaded from the global.  Eliminate all uses of Ptr, making them
1260/// use FieldGlobals instead.  All uses of loaded values satisfy
1261/// AllGlobalLoadUsesSimpleEnoughForHeapSRA.
1262static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load,
1263               DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
1264                   std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
1265  for (Value::use_iterator UI = Load->use_begin(), E = Load->use_end();
1266       UI != E; ) {
1267    Instruction *User = cast<Instruction>(*UI++);
1268    RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite);
1269  }
1270
1271  if (Load->use_empty()) {
1272    Load->eraseFromParent();
1273    InsertedScalarizedValues.erase(Load);
1274  }
1275}
1276
1277/// PerformHeapAllocSRoA - CI is an allocation of an array of structures.  Break
1278/// it up into multiple allocations of arrays of the fields.
1279static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
1280                                            Value *NElems, DataLayout *TD,
1281                                            const TargetLibraryInfo *TLI) {
1282  DEBUG(dbgs() << "SROA HEAP ALLOC: " << *GV << "  MALLOC = " << *CI << '\n');
1283  Type *MAT = getMallocAllocatedType(CI, TLI);
1284  StructType *STy = cast<StructType>(MAT);
1285
1286  // There is guaranteed to be at least one use of the malloc (storing
1287  // it into GV).  If there are other uses, change them to be uses of
1288  // the global to simplify later code.  This also deletes the store
1289  // into GV.
1290  ReplaceUsesOfMallocWithGlobal(CI, GV);
1291
1292  // Okay, at this point, there are no users of the malloc.  Insert N
1293  // new mallocs at the same place as CI, and N globals.
1294  std::vector<Value*> FieldGlobals;
1295  std::vector<Value*> FieldMallocs;
1296
1297  for (unsigned FieldNo = 0, e = STy->getNumElements(); FieldNo != e;++FieldNo){
1298    Type *FieldTy = STy->getElementType(FieldNo);
1299    PointerType *PFieldTy = PointerType::getUnqual(FieldTy);
1300
1301    GlobalVariable *NGV =
1302      new GlobalVariable(*GV->getParent(),
1303                         PFieldTy, false, GlobalValue::InternalLinkage,
1304                         Constant::getNullValue(PFieldTy),
1305                         GV->getName() + ".f" + Twine(FieldNo), GV,
1306                         GV->getThreadLocalMode());
1307    FieldGlobals.push_back(NGV);
1308
1309    unsigned TypeSize = TD->getTypeAllocSize(FieldTy);
1310    if (StructType *ST = dyn_cast<StructType>(FieldTy))
1311      TypeSize = TD->getStructLayout(ST)->getSizeInBytes();
1312    Type *IntPtrTy = TD->getIntPtrType(CI->getType());
1313    Value *NMI = CallInst::CreateMalloc(CI, IntPtrTy, FieldTy,
1314                                        ConstantInt::get(IntPtrTy, TypeSize),
1315                                        NElems, 0,
1316                                        CI->getName() + ".f" + Twine(FieldNo));
1317    FieldMallocs.push_back(NMI);
1318    new StoreInst(NMI, NGV, CI);
1319  }
1320
1321  // The tricky aspect of this transformation is handling the case when malloc
1322  // fails.  In the original code, malloc failing would set the result pointer
1323  // of malloc to null.  In this case, some mallocs could succeed and others
1324  // could fail.  As such, we emit code that looks like this:
1325  //    F0 = malloc(field0)
1326  //    F1 = malloc(field1)
1327  //    F2 = malloc(field2)
1328  //    if (F0 == 0 || F1 == 0 || F2 == 0) {
1329  //      if (F0) { free(F0); F0 = 0; }
1330  //      if (F1) { free(F1); F1 = 0; }
1331  //      if (F2) { free(F2); F2 = 0; }
1332  //    }
1333  // The malloc can also fail if its argument is too large.
1334  Constant *ConstantZero = ConstantInt::get(CI->getArgOperand(0)->getType(), 0);
1335  Value *RunningOr = new ICmpInst(CI, ICmpInst::ICMP_SLT, CI->getArgOperand(0),
1336                                  ConstantZero, "isneg");
1337  for (unsigned i = 0, e = FieldMallocs.size(); i != e; ++i) {
1338    Value *Cond = new ICmpInst(CI, ICmpInst::ICMP_EQ, FieldMallocs[i],
1339                             Constant::getNullValue(FieldMallocs[i]->getType()),
1340                               "isnull");
1341    RunningOr = BinaryOperator::CreateOr(RunningOr, Cond, "tmp", CI);
1342  }
1343
1344  // Split the basic block at the old malloc.
1345  BasicBlock *OrigBB = CI->getParent();
1346  BasicBlock *ContBB = OrigBB->splitBasicBlock(CI, "malloc_cont");
1347
1348  // Create the block to check the first condition.  Put all these blocks at the
1349  // end of the function as they are unlikely to be executed.
1350  BasicBlock *NullPtrBlock = BasicBlock::Create(OrigBB->getContext(),
1351                                                "malloc_ret_null",
1352                                                OrigBB->getParent());
1353
1354  // Remove the uncond branch from OrigBB to ContBB, turning it into a cond
1355  // branch on RunningOr.
1356  OrigBB->getTerminator()->eraseFromParent();
1357  BranchInst::Create(NullPtrBlock, ContBB, RunningOr, OrigBB);
1358
1359  // Within the NullPtrBlock, we need to emit a comparison and branch for each
1360  // pointer, because some may be null while others are not.
1361  for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) {
1362    Value *GVVal = new LoadInst(FieldGlobals[i], "tmp", NullPtrBlock);
1363    Value *Cmp = new ICmpInst(*NullPtrBlock, ICmpInst::ICMP_NE, GVVal,
1364                              Constant::getNullValue(GVVal->getType()));
1365    BasicBlock *FreeBlock = BasicBlock::Create(Cmp->getContext(), "free_it",
1366                                               OrigBB->getParent());
1367    BasicBlock *NextBlock = BasicBlock::Create(Cmp->getContext(), "next",
1368                                               OrigBB->getParent());
1369    Instruction *BI = BranchInst::Create(FreeBlock, NextBlock,
1370                                         Cmp, NullPtrBlock);
1371
1372    // Fill in FreeBlock.
1373    CallInst::CreateFree(GVVal, BI);
1374    new StoreInst(Constant::getNullValue(GVVal->getType()), FieldGlobals[i],
1375                  FreeBlock);
1376    BranchInst::Create(NextBlock, FreeBlock);
1377
1378    NullPtrBlock = NextBlock;
1379  }
1380
1381  BranchInst::Create(ContBB, NullPtrBlock);
1382
1383  // CI is no longer needed, remove it.
1384  CI->eraseFromParent();
1385
1386  /// InsertedScalarizedLoads - As we process loads, if we can't immediately
1387  /// update all uses of the load, keep track of what scalarized loads are
1388  /// inserted for a given load.
1389  DenseMap<Value*, std::vector<Value*> > InsertedScalarizedValues;
1390  InsertedScalarizedValues[GV] = FieldGlobals;
1391
1392  std::vector<std::pair<PHINode*, unsigned> > PHIsToRewrite;
1393
1394  // Okay, the malloc site is completely handled.  All of the uses of GV are now
1395  // loads, and all uses of those loads are simple.  Rewrite them to use loads
1396  // of the per-field globals instead.
1397  for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E;) {
1398    Instruction *User = cast<Instruction>(*UI++);
1399
1400    if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
1401      RewriteUsesOfLoadForHeapSRoA(LI, InsertedScalarizedValues, PHIsToRewrite);
1402      continue;
1403    }
1404
1405    // Must be a store of null.
1406    StoreInst *SI = cast<StoreInst>(User);
1407    assert(isa<ConstantPointerNull>(SI->getOperand(0)) &&
1408           "Unexpected heap-sra user!");
1409
1410    // Insert a store of null into each global.
1411    for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) {
1412      PointerType *PT = cast<PointerType>(FieldGlobals[i]->getType());
1413      Constant *Null = Constant::getNullValue(PT->getElementType());
1414      new StoreInst(Null, FieldGlobals[i], SI);
1415    }
1416    // Erase the original store.
1417    SI->eraseFromParent();
1418  }
1419
1420  // While we have PHIs that are interesting to rewrite, do it.
1421  while (!PHIsToRewrite.empty()) {
1422    PHINode *PN = PHIsToRewrite.back().first;
1423    unsigned FieldNo = PHIsToRewrite.back().second;
1424    PHIsToRewrite.pop_back();
1425    PHINode *FieldPN = cast<PHINode>(InsertedScalarizedValues[PN][FieldNo]);
1426    assert(FieldPN->getNumIncomingValues() == 0 &&"Already processed this phi");
1427
1428    // Add all the incoming values.  This can materialize more phis.
1429    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
1430      Value *InVal = PN->getIncomingValue(i);
1431      InVal = GetHeapSROAValue(InVal, FieldNo, InsertedScalarizedValues,
1432                               PHIsToRewrite);
1433      FieldPN->addIncoming(InVal, PN->getIncomingBlock(i));
1434    }
1435  }
1436
1437  // Drop all inter-phi links and any loads that made it this far.
1438  for (DenseMap<Value*, std::vector<Value*> >::iterator
1439       I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end();
1440       I != E; ++I) {
1441    if (PHINode *PN = dyn_cast<PHINode>(I->first))
1442      PN->dropAllReferences();
1443    else if (LoadInst *LI = dyn_cast<LoadInst>(I->first))
1444      LI->dropAllReferences();
1445  }
1446
1447  // Delete all the phis and loads now that inter-references are dead.
1448  for (DenseMap<Value*, std::vector<Value*> >::iterator
1449       I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end();
1450       I != E; ++I) {
1451    if (PHINode *PN = dyn_cast<PHINode>(I->first))
1452      PN->eraseFromParent();
1453    else if (LoadInst *LI = dyn_cast<LoadInst>(I->first))
1454      LI->eraseFromParent();
1455  }
1456
1457  // The old global is now dead, remove it.
1458  GV->eraseFromParent();
1459
1460  ++NumHeapSRA;
1461  return cast<GlobalVariable>(FieldGlobals[0]);
1462}
1463
1464/// TryToOptimizeStoreOfMallocToGlobal - This function is called when we see a
1465/// pointer global variable with a single value stored it that is a malloc or
1466/// cast of malloc.
1467static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
1468                                               CallInst *CI,
1469                                               Type *AllocTy,
1470                                               AtomicOrdering Ordering,
1471                                               Module::global_iterator &GVI,
1472                                               DataLayout *TD,
1473                                               TargetLibraryInfo *TLI) {
1474  if (!TD)
1475    return false;
1476
1477  // If this is a malloc of an abstract type, don't touch it.
1478  if (!AllocTy->isSized())
1479    return false;
1480
1481  // We can't optimize this global unless all uses of it are *known* to be
1482  // of the malloc value, not of the null initializer value (consider a use
1483  // that compares the global's value against zero to see if the malloc has
1484  // been reached).  To do this, we check to see if all uses of the global
1485  // would trap if the global were null: this proves that they must all
1486  // happen after the malloc.
1487  if (!AllUsesOfLoadedValueWillTrapIfNull(GV))
1488    return false;
1489
1490  // We can't optimize this if the malloc itself is used in a complex way,
1491  // for example, being stored into multiple globals.  This allows the
1492  // malloc to be stored into the specified global, loaded icmp'd, and
1493  // GEP'd.  These are all things we could transform to using the global
1494  // for.
1495  SmallPtrSet<const PHINode*, 8> PHIs;
1496  if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(CI, GV, PHIs))
1497    return false;
1498
1499  // If we have a global that is only initialized with a fixed size malloc,
1500  // transform the program to use global memory instead of malloc'd memory.
1501  // This eliminates dynamic allocation, avoids an indirection accessing the
1502  // data, and exposes the resultant global to further GlobalOpt.
1503  // We cannot optimize the malloc if we cannot determine malloc array size.
1504  Value *NElems = getMallocArraySize(CI, TD, TLI, true);
1505  if (!NElems)
1506    return false;
1507
1508  if (ConstantInt *NElements = dyn_cast<ConstantInt>(NElems))
1509    // Restrict this transformation to only working on small allocations
1510    // (2048 bytes currently), as we don't want to introduce a 16M global or
1511    // something.
1512    if (NElements->getZExtValue() * TD->getTypeAllocSize(AllocTy) < 2048) {
1513      GVI = OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElements, TD, TLI);
1514      return true;
1515    }
1516
1517  // If the allocation is an array of structures, consider transforming this
1518  // into multiple malloc'd arrays, one for each field.  This is basically
1519  // SRoA for malloc'd memory.
1520
1521  if (Ordering != NotAtomic)
1522    return false;
1523
1524  // If this is an allocation of a fixed size array of structs, analyze as a
1525  // variable size array.  malloc [100 x struct],1 -> malloc struct, 100
1526  if (NElems == ConstantInt::get(CI->getArgOperand(0)->getType(), 1))
1527    if (ArrayType *AT = dyn_cast<ArrayType>(AllocTy))
1528      AllocTy = AT->getElementType();
1529
1530  StructType *AllocSTy = dyn_cast<StructType>(AllocTy);
1531  if (!AllocSTy)
1532    return false;
1533
1534  // This the structure has an unreasonable number of fields, leave it
1535  // alone.
1536  if (AllocSTy->getNumElements() <= 16 && AllocSTy->getNumElements() != 0 &&
1537      AllGlobalLoadUsesSimpleEnoughForHeapSRA(GV, CI)) {
1538
1539    // If this is a fixed size array, transform the Malloc to be an alloc of
1540    // structs.  malloc [100 x struct],1 -> malloc struct, 100
1541    if (ArrayType *AT = dyn_cast<ArrayType>(getMallocAllocatedType(CI, TLI))) {
1542      Type *IntPtrTy = TD->getIntPtrType(CI->getType());
1543      unsigned TypeSize = TD->getStructLayout(AllocSTy)->getSizeInBytes();
1544      Value *AllocSize = ConstantInt::get(IntPtrTy, TypeSize);
1545      Value *NumElements = ConstantInt::get(IntPtrTy, AT->getNumElements());
1546      Instruction *Malloc = CallInst::CreateMalloc(CI, IntPtrTy, AllocSTy,
1547                                                   AllocSize, NumElements,
1548                                                   0, CI->getName());
1549      Instruction *Cast = new BitCastInst(Malloc, CI->getType(), "tmp", CI);
1550      CI->replaceAllUsesWith(Cast);
1551      CI->eraseFromParent();
1552      if (BitCastInst *BCI = dyn_cast<BitCastInst>(Malloc))
1553        CI = cast<CallInst>(BCI->getOperand(0));
1554      else
1555        CI = cast<CallInst>(Malloc);
1556    }
1557
1558    GVI = PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, TD, TLI, true),
1559                               TD, TLI);
1560    return true;
1561  }
1562
1563  return false;
1564}
1565
1566// OptimizeOnceStoredGlobal - Try to optimize globals based on the knowledge
1567// that only one value (besides its initializer) is ever stored to the global.
1568static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
1569                                     AtomicOrdering Ordering,
1570                                     Module::global_iterator &GVI,
1571                                     DataLayout *TD, TargetLibraryInfo *TLI) {
1572  // Ignore no-op GEPs and bitcasts.
1573  StoredOnceVal = StoredOnceVal->stripPointerCasts();
1574
1575  // If we are dealing with a pointer global that is initialized to null and
1576  // only has one (non-null) value stored into it, then we can optimize any
1577  // users of the loaded value (often calls and loads) that would trap if the
1578  // value was null.
1579  if (GV->getInitializer()->getType()->isPointerTy() &&
1580      GV->getInitializer()->isNullValue()) {
1581    if (Constant *SOVC = dyn_cast<Constant>(StoredOnceVal)) {
1582      if (GV->getInitializer()->getType() != SOVC->getType())
1583        SOVC = ConstantExpr::getBitCast(SOVC, GV->getInitializer()->getType());
1584
1585      // Optimize away any trapping uses of the loaded value.
1586      if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC, TD, TLI))
1587        return true;
1588    } else if (CallInst *CI = extractMallocCall(StoredOnceVal, TLI)) {
1589      Type *MallocType = getMallocAllocatedType(CI, TLI);
1590      if (MallocType &&
1591          TryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType, Ordering, GVI,
1592                                             TD, TLI))
1593        return true;
1594    }
1595  }
1596
1597  return false;
1598}
1599
1600/// TryToShrinkGlobalToBoolean - At this point, we have learned that the only
1601/// two values ever stored into GV are its initializer and OtherVal.  See if we
1602/// can shrink the global into a boolean and select between the two values
1603/// whenever it is used.  This exposes the values to other scalar optimizations.
1604static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
1605  Type *GVElType = GV->getType()->getElementType();
1606
1607  // If GVElType is already i1, it is already shrunk.  If the type of the GV is
1608  // an FP value, pointer or vector, don't do this optimization because a select
1609  // between them is very expensive and unlikely to lead to later
1610  // simplification.  In these cases, we typically end up with "cond ? v1 : v2"
1611  // where v1 and v2 both require constant pool loads, a big loss.
1612  if (GVElType == Type::getInt1Ty(GV->getContext()) ||
1613      GVElType->isFloatingPointTy() ||
1614      GVElType->isPointerTy() || GVElType->isVectorTy())
1615    return false;
1616
1617  // Walk the use list of the global seeing if all the uses are load or store.
1618  // If there is anything else, bail out.
1619  for (Value::use_iterator I = GV->use_begin(), E = GV->use_end(); I != E; ++I){
1620    User *U = *I;
1621    if (!isa<LoadInst>(U) && !isa<StoreInst>(U))
1622      return false;
1623  }
1624
1625  DEBUG(dbgs() << "   *** SHRINKING TO BOOL: " << *GV);
1626
1627  // Create the new global, initializing it to false.
1628  GlobalVariable *NewGV = new GlobalVariable(Type::getInt1Ty(GV->getContext()),
1629                                             false,
1630                                             GlobalValue::InternalLinkage,
1631                                        ConstantInt::getFalse(GV->getContext()),
1632                                             GV->getName()+".b",
1633                                             GV->getThreadLocalMode(),
1634                                             GV->getType()->getAddressSpace());
1635  GV->getParent()->getGlobalList().insert(GV, NewGV);
1636
1637  Constant *InitVal = GV->getInitializer();
1638  assert(InitVal->getType() != Type::getInt1Ty(GV->getContext()) &&
1639         "No reason to shrink to bool!");
1640
1641  // If initialized to zero and storing one into the global, we can use a cast
1642  // instead of a select to synthesize the desired value.
1643  bool IsOneZero = false;
1644  if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal))
1645    IsOneZero = InitVal->isNullValue() && CI->isOne();
1646
1647  while (!GV->use_empty()) {
1648    Instruction *UI = cast<Instruction>(GV->use_back());
1649    if (StoreInst *SI = dyn_cast<StoreInst>(UI)) {
1650      // Change the store into a boolean store.
1651      bool StoringOther = SI->getOperand(0) == OtherVal;
1652      // Only do this if we weren't storing a loaded value.
1653      Value *StoreVal;
1654      if (StoringOther || SI->getOperand(0) == InitVal) {
1655        StoreVal = ConstantInt::get(Type::getInt1Ty(GV->getContext()),
1656                                    StoringOther);
1657      } else {
1658        // Otherwise, we are storing a previously loaded copy.  To do this,
1659        // change the copy from copying the original value to just copying the
1660        // bool.
1661        Instruction *StoredVal = cast<Instruction>(SI->getOperand(0));
1662
1663        // If we've already replaced the input, StoredVal will be a cast or
1664        // select instruction.  If not, it will be a load of the original
1665        // global.
1666        if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) {
1667          assert(LI->getOperand(0) == GV && "Not a copy!");
1668          // Insert a new load, to preserve the saved value.
1669          StoreVal = new LoadInst(NewGV, LI->getName()+".b", false, 0,
1670                                  LI->getOrdering(), LI->getSynchScope(), LI);
1671        } else {
1672          assert((isa<CastInst>(StoredVal) || isa<SelectInst>(StoredVal)) &&
1673                 "This is not a form that we understand!");
1674          StoreVal = StoredVal->getOperand(0);
1675          assert(isa<LoadInst>(StoreVal) && "Not a load of NewGV!");
1676        }
1677      }
1678      new StoreInst(StoreVal, NewGV, false, 0,
1679                    SI->getOrdering(), SI->getSynchScope(), SI);
1680    } else {
1681      // Change the load into a load of bool then a select.
1682      LoadInst *LI = cast<LoadInst>(UI);
1683      LoadInst *NLI = new LoadInst(NewGV, LI->getName()+".b", false, 0,
1684                                   LI->getOrdering(), LI->getSynchScope(), LI);
1685      Value *NSI;
1686      if (IsOneZero)
1687        NSI = new ZExtInst(NLI, LI->getType(), "", LI);
1688      else
1689        NSI = SelectInst::Create(NLI, OtherVal, InitVal, "", LI);
1690      NSI->takeName(LI);
1691      LI->replaceAllUsesWith(NSI);
1692    }
1693    UI->eraseFromParent();
1694  }
1695
1696  // Retain the name of the old global variable. People who are debugging their
1697  // programs may expect these variables to be named the same.
1698  NewGV->takeName(GV);
1699  GV->eraseFromParent();
1700  return true;
1701}
1702
1703
1704/// ProcessGlobal - Analyze the specified global variable and optimize it if
1705/// possible.  If we make a change, return true.
1706bool GlobalOpt::ProcessGlobal(GlobalVariable *GV,
1707                              Module::global_iterator &GVI) {
1708  if (!GV->isDiscardableIfUnused())
1709    return false;
1710
1711  // Do more involved optimizations if the global is internal.
1712  GV->removeDeadConstantUsers();
1713
1714  if (GV->use_empty()) {
1715    DEBUG(dbgs() << "GLOBAL DEAD: " << *GV);
1716    GV->eraseFromParent();
1717    ++NumDeleted;
1718    return true;
1719  }
1720
1721  if (!GV->hasLocalLinkage())
1722    return false;
1723
1724  GlobalStatus GS;
1725
1726  if (GlobalStatus::analyzeGlobal(GV, GS))
1727    return false;
1728
1729  if (!GS.IsCompared && !GV->hasUnnamedAddr()) {
1730    GV->setUnnamedAddr(true);
1731    NumUnnamed++;
1732  }
1733
1734  if (GV->isConstant() || !GV->hasInitializer())
1735    return false;
1736
1737  return ProcessInternalGlobal(GV, GVI, GS);
1738}
1739
1740/// ProcessInternalGlobal - Analyze the specified global variable and optimize
1741/// it if possible.  If we make a change, return true.
1742bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
1743                                      Module::global_iterator &GVI,
1744                                      const GlobalStatus &GS) {
1745  // If this is a first class global and has only one accessing function
1746  // and this function is main (which we know is not recursive), we replace
1747  // the global with a local alloca in this function.
1748  //
1749  // NOTE: It doesn't make sense to promote non single-value types since we
1750  // are just replacing static memory to stack memory.
1751  //
1752  // If the global is in different address space, don't bring it to stack.
1753  if (!GS.HasMultipleAccessingFunctions &&
1754      GS.AccessingFunction && !GS.HasNonInstructionUser &&
1755      GV->getType()->getElementType()->isSingleValueType() &&
1756      GS.AccessingFunction->getName() == "main" &&
1757      GS.AccessingFunction->hasExternalLinkage() &&
1758      GV->getType()->getAddressSpace() == 0) {
1759    DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV);
1760    Instruction &FirstI = const_cast<Instruction&>(*GS.AccessingFunction
1761                                                   ->getEntryBlock().begin());
1762    Type *ElemTy = GV->getType()->getElementType();
1763    // FIXME: Pass Global's alignment when globals have alignment
1764    AllocaInst *Alloca = new AllocaInst(ElemTy, NULL, GV->getName(), &FirstI);
1765    if (!isa<UndefValue>(GV->getInitializer()))
1766      new StoreInst(GV->getInitializer(), Alloca, &FirstI);
1767
1768    GV->replaceAllUsesWith(Alloca);
1769    GV->eraseFromParent();
1770    ++NumLocalized;
1771    return true;
1772  }
1773
1774  // If the global is never loaded (but may be stored to), it is dead.
1775  // Delete it now.
1776  if (!GS.IsLoaded) {
1777    DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV);
1778
1779    bool Changed;
1780    if (isLeakCheckerRoot(GV)) {
1781      // Delete any constant stores to the global.
1782      Changed = CleanupPointerRootUsers(GV, TLI);
1783    } else {
1784      // Delete any stores we can find to the global.  We may not be able to
1785      // make it completely dead though.
1786      Changed = CleanupConstantGlobalUsers(GV, GV->getInitializer(), TD, TLI);
1787    }
1788
1789    // If the global is dead now, delete it.
1790    if (GV->use_empty()) {
1791      GV->eraseFromParent();
1792      ++NumDeleted;
1793      Changed = true;
1794    }
1795    return Changed;
1796
1797  } else if (GS.StoredType <= GlobalStatus::InitializerStored) {
1798    DEBUG(dbgs() << "MARKING CONSTANT: " << *GV << "\n");
1799    GV->setConstant(true);
1800
1801    // Clean up any obviously simplifiable users now.
1802    CleanupConstantGlobalUsers(GV, GV->getInitializer(), TD, TLI);
1803
1804    // If the global is dead now, just nuke it.
1805    if (GV->use_empty()) {
1806      DEBUG(dbgs() << "   *** Marking constant allowed us to simplify "
1807            << "all users and delete global!\n");
1808      GV->eraseFromParent();
1809      ++NumDeleted;
1810    }
1811
1812    ++NumMarked;
1813    return true;
1814  } else if (!GV->getInitializer()->getType()->isSingleValueType()) {
1815    if (DataLayout *TD = getAnalysisIfAvailable<DataLayout>())
1816      if (GlobalVariable *FirstNewGV = SRAGlobal(GV, *TD)) {
1817        GVI = FirstNewGV;  // Don't skip the newly produced globals!
1818        return true;
1819      }
1820  } else if (GS.StoredType == GlobalStatus::StoredOnce) {
1821    // If the initial value for the global was an undef value, and if only
1822    // one other value was stored into it, we can just change the
1823    // initializer to be the stored value, then delete all stores to the
1824    // global.  This allows us to mark it constant.
1825    if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue))
1826      if (isa<UndefValue>(GV->getInitializer())) {
1827        // Change the initial value here.
1828        GV->setInitializer(SOVConstant);
1829
1830        // Clean up any obviously simplifiable users now.
1831        CleanupConstantGlobalUsers(GV, GV->getInitializer(), TD, TLI);
1832
1833        if (GV->use_empty()) {
1834          DEBUG(dbgs() << "   *** Substituting initializer allowed us to "
1835                       << "simplify all users and delete global!\n");
1836          GV->eraseFromParent();
1837          ++NumDeleted;
1838        } else {
1839          GVI = GV;
1840        }
1841        ++NumSubstitute;
1842        return true;
1843      }
1844
1845    // Try to optimize globals based on the knowledge that only one value
1846    // (besides its initializer) is ever stored to the global.
1847    if (OptimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GS.Ordering, GVI,
1848                                 TD, TLI))
1849      return true;
1850
1851    // Otherwise, if the global was not a boolean, we can shrink it to be a
1852    // boolean.
1853    if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue)) {
1854      if (GS.Ordering == NotAtomic) {
1855        if (TryToShrinkGlobalToBoolean(GV, SOVConstant)) {
1856          ++NumShrunkToBool;
1857          return true;
1858        }
1859      }
1860    }
1861  }
1862
1863  return false;
1864}
1865
1866/// ChangeCalleesToFastCall - Walk all of the direct calls of the specified
1867/// function, changing them to FastCC.
1868static void ChangeCalleesToFastCall(Function *F) {
1869  for (Value::use_iterator UI = F->use_begin(), E = F->use_end(); UI != E;++UI){
1870    if (isa<BlockAddress>(*UI))
1871      continue;
1872    CallSite User(cast<Instruction>(*UI));
1873    User.setCallingConv(CallingConv::Fast);
1874  }
1875}
1876
1877static AttributeSet StripNest(LLVMContext &C, const AttributeSet &Attrs) {
1878  for (unsigned i = 0, e = Attrs.getNumSlots(); i != e; ++i) {
1879    unsigned Index = Attrs.getSlotIndex(i);
1880    if (!Attrs.getSlotAttributes(i).hasAttribute(Index, Attribute::Nest))
1881      continue;
1882
1883    // There can be only one.
1884    return Attrs.removeAttribute(C, Index, Attribute::Nest);
1885  }
1886
1887  return Attrs;
1888}
1889
1890static void RemoveNestAttribute(Function *F) {
1891  F->setAttributes(StripNest(F->getContext(), F->getAttributes()));
1892  for (Value::use_iterator UI = F->use_begin(), E = F->use_end(); UI != E;++UI){
1893    if (isa<BlockAddress>(*UI))
1894      continue;
1895    CallSite User(cast<Instruction>(*UI));
1896    User.setAttributes(StripNest(F->getContext(), User.getAttributes()));
1897  }
1898}
1899
1900bool GlobalOpt::OptimizeFunctions(Module &M) {
1901  bool Changed = false;
1902  // Optimize functions.
1903  for (Module::iterator FI = M.begin(), E = M.end(); FI != E; ) {
1904    Function *F = FI++;
1905    // Functions without names cannot be referenced outside this module.
1906    if (!F->hasName() && !F->isDeclaration())
1907      F->setLinkage(GlobalValue::InternalLinkage);
1908    F->removeDeadConstantUsers();
1909    if (F->isDefTriviallyDead()) {
1910      F->eraseFromParent();
1911      Changed = true;
1912      ++NumFnDeleted;
1913    } else if (F->hasLocalLinkage()) {
1914      if (F->getCallingConv() == CallingConv::C && !F->isVarArg() &&
1915          !F->hasAddressTaken()) {
1916        // If this function has C calling conventions, is not a varargs
1917        // function, and is only called directly, promote it to use the Fast
1918        // calling convention.
1919        F->setCallingConv(CallingConv::Fast);
1920        ChangeCalleesToFastCall(F);
1921        ++NumFastCallFns;
1922        Changed = true;
1923      }
1924
1925      if (F->getAttributes().hasAttrSomewhere(Attribute::Nest) &&
1926          !F->hasAddressTaken()) {
1927        // The function is not used by a trampoline intrinsic, so it is safe
1928        // to remove the 'nest' attribute.
1929        RemoveNestAttribute(F);
1930        ++NumNestRemoved;
1931        Changed = true;
1932      }
1933    }
1934  }
1935  return Changed;
1936}
1937
1938bool GlobalOpt::OptimizeGlobalVars(Module &M) {
1939  bool Changed = false;
1940  for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
1941       GVI != E; ) {
1942    GlobalVariable *GV = GVI++;
1943    // Global variables without names cannot be referenced outside this module.
1944    if (!GV->hasName() && !GV->isDeclaration())
1945      GV->setLinkage(GlobalValue::InternalLinkage);
1946    // Simplify the initializer.
1947    if (GV->hasInitializer())
1948      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(GV->getInitializer())) {
1949        Constant *New = ConstantFoldConstantExpression(CE, TD, TLI);
1950        if (New && New != CE)
1951          GV->setInitializer(New);
1952      }
1953
1954    Changed |= ProcessGlobal(GV, GVI);
1955  }
1956  return Changed;
1957}
1958
1959/// FindGlobalCtors - Find the llvm.global_ctors list, verifying that all
1960/// initializers have an init priority of 65535.
1961GlobalVariable *GlobalOpt::FindGlobalCtors(Module &M) {
1962  GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors");
1963  if (GV == 0) return 0;
1964
1965  // Verify that the initializer is simple enough for us to handle. We are
1966  // only allowed to optimize the initializer if it is unique.
1967  if (!GV->hasUniqueInitializer()) return 0;
1968
1969  if (isa<ConstantAggregateZero>(GV->getInitializer()))
1970    return GV;
1971  ConstantArray *CA = cast<ConstantArray>(GV->getInitializer());
1972
1973  for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) {
1974    if (isa<ConstantAggregateZero>(*i))
1975      continue;
1976    ConstantStruct *CS = cast<ConstantStruct>(*i);
1977    if (isa<ConstantPointerNull>(CS->getOperand(1)))
1978      continue;
1979
1980    // Must have a function or null ptr.
1981    if (!isa<Function>(CS->getOperand(1)))
1982      return 0;
1983
1984    // Init priority must be standard.
1985    ConstantInt *CI = cast<ConstantInt>(CS->getOperand(0));
1986    if (CI->getZExtValue() != 65535)
1987      return 0;
1988  }
1989
1990  return GV;
1991}
1992
1993/// ParseGlobalCtors - Given a llvm.global_ctors list that we can understand,
1994/// return a list of the functions and null terminator as a vector.
1995static std::vector<Function*> ParseGlobalCtors(GlobalVariable *GV) {
1996  if (GV->getInitializer()->isNullValue())
1997    return std::vector<Function*>();
1998  ConstantArray *CA = cast<ConstantArray>(GV->getInitializer());
1999  std::vector<Function*> Result;
2000  Result.reserve(CA->getNumOperands());
2001  for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) {
2002    ConstantStruct *CS = cast<ConstantStruct>(*i);
2003    Result.push_back(dyn_cast<Function>(CS->getOperand(1)));
2004  }
2005  return Result;
2006}
2007
2008/// InstallGlobalCtors - Given a specified llvm.global_ctors list, install the
2009/// specified array, returning the new global to use.
2010static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL,
2011                                          const std::vector<Function*> &Ctors) {
2012  // If we made a change, reassemble the initializer list.
2013  Constant *CSVals[2];
2014  CSVals[0] = ConstantInt::get(Type::getInt32Ty(GCL->getContext()), 65535);
2015  CSVals[1] = 0;
2016
2017  StructType *StructTy =
2018    cast<StructType>(GCL->getType()->getElementType()->getArrayElementType());
2019
2020  // Create the new init list.
2021  std::vector<Constant*> CAList;
2022  for (unsigned i = 0, e = Ctors.size(); i != e; ++i) {
2023    if (Ctors[i]) {
2024      CSVals[1] = Ctors[i];
2025    } else {
2026      Type *FTy = FunctionType::get(Type::getVoidTy(GCL->getContext()),
2027                                          false);
2028      PointerType *PFTy = PointerType::getUnqual(FTy);
2029      CSVals[1] = Constant::getNullValue(PFTy);
2030      CSVals[0] = ConstantInt::get(Type::getInt32Ty(GCL->getContext()),
2031                                   0x7fffffff);
2032    }
2033    CAList.push_back(ConstantStruct::get(StructTy, CSVals));
2034  }
2035
2036  // Create the array initializer.
2037  Constant *CA = ConstantArray::get(ArrayType::get(StructTy,
2038                                                   CAList.size()), CAList);
2039
2040  // If we didn't change the number of elements, don't create a new GV.
2041  if (CA->getType() == GCL->getInitializer()->getType()) {
2042    GCL->setInitializer(CA);
2043    return GCL;
2044  }
2045
2046  // Create the new global and insert it next to the existing list.
2047  GlobalVariable *NGV = new GlobalVariable(CA->getType(), GCL->isConstant(),
2048                                           GCL->getLinkage(), CA, "",
2049                                           GCL->getThreadLocalMode());
2050  GCL->getParent()->getGlobalList().insert(GCL, NGV);
2051  NGV->takeName(GCL);
2052
2053  // Nuke the old list, replacing any uses with the new one.
2054  if (!GCL->use_empty()) {
2055    Constant *V = NGV;
2056    if (V->getType() != GCL->getType())
2057      V = ConstantExpr::getBitCast(V, GCL->getType());
2058    GCL->replaceAllUsesWith(V);
2059  }
2060  GCL->eraseFromParent();
2061
2062  if (Ctors.size())
2063    return NGV;
2064  else
2065    return 0;
2066}
2067
2068
2069static inline bool
2070isSimpleEnoughValueToCommit(Constant *C,
2071                            SmallPtrSet<Constant*, 8> &SimpleConstants,
2072                            const DataLayout *TD);
2073
2074
2075/// isSimpleEnoughValueToCommit - Return true if the specified constant can be
2076/// handled by the code generator.  We don't want to generate something like:
2077///   void *X = &X/42;
2078/// because the code generator doesn't have a relocation that can handle that.
2079///
2080/// This function should be called if C was not found (but just got inserted)
2081/// in SimpleConstants to avoid having to rescan the same constants all the
2082/// time.
2083static bool isSimpleEnoughValueToCommitHelper(Constant *C,
2084                                   SmallPtrSet<Constant*, 8> &SimpleConstants,
2085                                   const DataLayout *TD) {
2086  // Simple integer, undef, constant aggregate zero, global addresses, etc are
2087  // all supported.
2088  if (C->getNumOperands() == 0 || isa<BlockAddress>(C) ||
2089      isa<GlobalValue>(C))
2090    return true;
2091
2092  // Aggregate values are safe if all their elements are.
2093  if (isa<ConstantArray>(C) || isa<ConstantStruct>(C) ||
2094      isa<ConstantVector>(C)) {
2095    for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) {
2096      Constant *Op = cast<Constant>(C->getOperand(i));
2097      if (!isSimpleEnoughValueToCommit(Op, SimpleConstants, TD))
2098        return false;
2099    }
2100    return true;
2101  }
2102
2103  // We don't know exactly what relocations are allowed in constant expressions,
2104  // so we allow &global+constantoffset, which is safe and uniformly supported
2105  // across targets.
2106  ConstantExpr *CE = cast<ConstantExpr>(C);
2107  switch (CE->getOpcode()) {
2108  case Instruction::BitCast:
2109    // Bitcast is fine if the casted value is fine.
2110    return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, TD);
2111
2112  case Instruction::IntToPtr:
2113  case Instruction::PtrToInt:
2114    // int <=> ptr is fine if the int type is the same size as the
2115    // pointer type.
2116    if (!TD || TD->getTypeSizeInBits(CE->getType()) !=
2117               TD->getTypeSizeInBits(CE->getOperand(0)->getType()))
2118      return false;
2119    return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, TD);
2120
2121  // GEP is fine if it is simple + constant offset.
2122  case Instruction::GetElementPtr:
2123    for (unsigned i = 1, e = CE->getNumOperands(); i != e; ++i)
2124      if (!isa<ConstantInt>(CE->getOperand(i)))
2125        return false;
2126    return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, TD);
2127
2128  case Instruction::Add:
2129    // We allow simple+cst.
2130    if (!isa<ConstantInt>(CE->getOperand(1)))
2131      return false;
2132    return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, TD);
2133  }
2134  return false;
2135}
2136
2137static inline bool
2138isSimpleEnoughValueToCommit(Constant *C,
2139                            SmallPtrSet<Constant*, 8> &SimpleConstants,
2140                            const DataLayout *TD) {
2141  // If we already checked this constant, we win.
2142  if (!SimpleConstants.insert(C)) return true;
2143  // Check the constant.
2144  return isSimpleEnoughValueToCommitHelper(C, SimpleConstants, TD);
2145}
2146
2147
2148/// isSimpleEnoughPointerToCommit - Return true if this constant is simple
2149/// enough for us to understand.  In particular, if it is a cast to anything
2150/// other than from one pointer type to another pointer type, we punt.
2151/// We basically just support direct accesses to globals and GEP's of
2152/// globals.  This should be kept up to date with CommitValueTo.
2153static bool isSimpleEnoughPointerToCommit(Constant *C) {
2154  // Conservatively, avoid aggregate types. This is because we don't
2155  // want to worry about them partially overlapping other stores.
2156  if (!cast<PointerType>(C->getType())->getElementType()->isSingleValueType())
2157    return false;
2158
2159  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C))
2160    // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or
2161    // external globals.
2162    return GV->hasUniqueInitializer();
2163
2164  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
2165    // Handle a constantexpr gep.
2166    if (CE->getOpcode() == Instruction::GetElementPtr &&
2167        isa<GlobalVariable>(CE->getOperand(0)) &&
2168        cast<GEPOperator>(CE)->isInBounds()) {
2169      GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0));
2170      // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or
2171      // external globals.
2172      if (!GV->hasUniqueInitializer())
2173        return false;
2174
2175      // The first index must be zero.
2176      ConstantInt *CI = dyn_cast<ConstantInt>(*llvm::next(CE->op_begin()));
2177      if (!CI || !CI->isZero()) return false;
2178
2179      // The remaining indices must be compile-time known integers within the
2180      // notional bounds of the corresponding static array types.
2181      if (!CE->isGEPWithNoNotionalOverIndexing())
2182        return false;
2183
2184      return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE);
2185
2186    // A constantexpr bitcast from a pointer to another pointer is a no-op,
2187    // and we know how to evaluate it by moving the bitcast from the pointer
2188    // operand to the value operand.
2189    } else if (CE->getOpcode() == Instruction::BitCast &&
2190               isa<GlobalVariable>(CE->getOperand(0))) {
2191      // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or
2192      // external globals.
2193      return cast<GlobalVariable>(CE->getOperand(0))->hasUniqueInitializer();
2194    }
2195  }
2196
2197  return false;
2198}
2199
2200/// EvaluateStoreInto - Evaluate a piece of a constantexpr store into a global
2201/// initializer.  This returns 'Init' modified to reflect 'Val' stored into it.
2202/// At this point, the GEP operands of Addr [0, OpNo) have been stepped into.
2203static Constant *EvaluateStoreInto(Constant *Init, Constant *Val,
2204                                   ConstantExpr *Addr, unsigned OpNo) {
2205  // Base case of the recursion.
2206  if (OpNo == Addr->getNumOperands()) {
2207    assert(Val->getType() == Init->getType() && "Type mismatch!");
2208    return Val;
2209  }
2210
2211  SmallVector<Constant*, 32> Elts;
2212  if (StructType *STy = dyn_cast<StructType>(Init->getType())) {
2213    // Break up the constant into its elements.
2214    for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
2215      Elts.push_back(Init->getAggregateElement(i));
2216
2217    // Replace the element that we are supposed to.
2218    ConstantInt *CU = cast<ConstantInt>(Addr->getOperand(OpNo));
2219    unsigned Idx = CU->getZExtValue();
2220    assert(Idx < STy->getNumElements() && "Struct index out of range!");
2221    Elts[Idx] = EvaluateStoreInto(Elts[Idx], Val, Addr, OpNo+1);
2222
2223    // Return the modified struct.
2224    return ConstantStruct::get(STy, Elts);
2225  }
2226
2227  ConstantInt *CI = cast<ConstantInt>(Addr->getOperand(OpNo));
2228  SequentialType *InitTy = cast<SequentialType>(Init->getType());
2229
2230  uint64_t NumElts;
2231  if (ArrayType *ATy = dyn_cast<ArrayType>(InitTy))
2232    NumElts = ATy->getNumElements();
2233  else
2234    NumElts = InitTy->getVectorNumElements();
2235
2236  // Break up the array into elements.
2237  for (uint64_t i = 0, e = NumElts; i != e; ++i)
2238    Elts.push_back(Init->getAggregateElement(i));
2239
2240  assert(CI->getZExtValue() < NumElts);
2241  Elts[CI->getZExtValue()] =
2242    EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1);
2243
2244  if (Init->getType()->isArrayTy())
2245    return ConstantArray::get(cast<ArrayType>(InitTy), Elts);
2246  return ConstantVector::get(Elts);
2247}
2248
2249/// CommitValueTo - We have decided that Addr (which satisfies the predicate
2250/// isSimpleEnoughPointerToCommit) should get Val as its value.  Make it happen.
2251static void CommitValueTo(Constant *Val, Constant *Addr) {
2252  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Addr)) {
2253    assert(GV->hasInitializer());
2254    GV->setInitializer(Val);
2255    return;
2256  }
2257
2258  ConstantExpr *CE = cast<ConstantExpr>(Addr);
2259  GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0));
2260  GV->setInitializer(EvaluateStoreInto(GV->getInitializer(), Val, CE, 2));
2261}
2262
2263namespace {
2264
2265/// Evaluator - This class evaluates LLVM IR, producing the Constant
2266/// representing each SSA instruction.  Changes to global variables are stored
2267/// in a mapping that can be iterated over after the evaluation is complete.
2268/// Once an evaluation call fails, the evaluation object should not be reused.
2269class Evaluator {
2270public:
2271  Evaluator(const DataLayout *TD, const TargetLibraryInfo *TLI)
2272    : TD(TD), TLI(TLI) {
2273    ValueStack.push_back(new DenseMap<Value*, Constant*>);
2274  }
2275
2276  ~Evaluator() {
2277    DeleteContainerPointers(ValueStack);
2278    while (!AllocaTmps.empty()) {
2279      GlobalVariable *Tmp = AllocaTmps.back();
2280      AllocaTmps.pop_back();
2281
2282      // If there are still users of the alloca, the program is doing something
2283      // silly, e.g. storing the address of the alloca somewhere and using it
2284      // later.  Since this is undefined, we'll just make it be null.
2285      if (!Tmp->use_empty())
2286        Tmp->replaceAllUsesWith(Constant::getNullValue(Tmp->getType()));
2287      delete Tmp;
2288    }
2289  }
2290
2291  /// EvaluateFunction - Evaluate a call to function F, returning true if
2292  /// successful, false if we can't evaluate it.  ActualArgs contains the formal
2293  /// arguments for the function.
2294  bool EvaluateFunction(Function *F, Constant *&RetVal,
2295                        const SmallVectorImpl<Constant*> &ActualArgs);
2296
2297  /// EvaluateBlock - Evaluate all instructions in block BB, returning true if
2298  /// successful, false if we can't evaluate it.  NewBB returns the next BB that
2299  /// control flows into, or null upon return.
2300  bool EvaluateBlock(BasicBlock::iterator CurInst, BasicBlock *&NextBB);
2301
2302  Constant *getVal(Value *V) {
2303    if (Constant *CV = dyn_cast<Constant>(V)) return CV;
2304    Constant *R = ValueStack.back()->lookup(V);
2305    assert(R && "Reference to an uncomputed value!");
2306    return R;
2307  }
2308
2309  void setVal(Value *V, Constant *C) {
2310    ValueStack.back()->operator[](V) = C;
2311  }
2312
2313  const DenseMap<Constant*, Constant*> &getMutatedMemory() const {
2314    return MutatedMemory;
2315  }
2316
2317  const SmallPtrSet<GlobalVariable*, 8> &getInvariants() const {
2318    return Invariants;
2319  }
2320
2321private:
2322  Constant *ComputeLoadResult(Constant *P);
2323
2324  /// ValueStack - As we compute SSA register values, we store their contents
2325  /// here. The back of the vector contains the current function and the stack
2326  /// contains the values in the calling frames.
2327  SmallVector<DenseMap<Value*, Constant*>*, 4> ValueStack;
2328
2329  /// CallStack - This is used to detect recursion.  In pathological situations
2330  /// we could hit exponential behavior, but at least there is nothing
2331  /// unbounded.
2332  SmallVector<Function*, 4> CallStack;
2333
2334  /// MutatedMemory - For each store we execute, we update this map.  Loads
2335  /// check this to get the most up-to-date value.  If evaluation is successful,
2336  /// this state is committed to the process.
2337  DenseMap<Constant*, Constant*> MutatedMemory;
2338
2339  /// AllocaTmps - To 'execute' an alloca, we create a temporary global variable
2340  /// to represent its body.  This vector is needed so we can delete the
2341  /// temporary globals when we are done.
2342  SmallVector<GlobalVariable*, 32> AllocaTmps;
2343
2344  /// Invariants - These global variables have been marked invariant by the
2345  /// static constructor.
2346  SmallPtrSet<GlobalVariable*, 8> Invariants;
2347
2348  /// SimpleConstants - These are constants we have checked and know to be
2349  /// simple enough to live in a static initializer of a global.
2350  SmallPtrSet<Constant*, 8> SimpleConstants;
2351
2352  const DataLayout *TD;
2353  const TargetLibraryInfo *TLI;
2354};
2355
2356}  // anonymous namespace
2357
2358/// ComputeLoadResult - Return the value that would be computed by a load from
2359/// P after the stores reflected by 'memory' have been performed.  If we can't
2360/// decide, return null.
2361Constant *Evaluator::ComputeLoadResult(Constant *P) {
2362  // If this memory location has been recently stored, use the stored value: it
2363  // is the most up-to-date.
2364  DenseMap<Constant*, Constant*>::const_iterator I = MutatedMemory.find(P);
2365  if (I != MutatedMemory.end()) return I->second;
2366
2367  // Access it.
2368  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) {
2369    if (GV->hasDefinitiveInitializer())
2370      return GV->getInitializer();
2371    return 0;
2372  }
2373
2374  // Handle a constantexpr getelementptr.
2375  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(P))
2376    if (CE->getOpcode() == Instruction::GetElementPtr &&
2377        isa<GlobalVariable>(CE->getOperand(0))) {
2378      GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0));
2379      if (GV->hasDefinitiveInitializer())
2380        return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE);
2381    }
2382
2383  return 0;  // don't know how to evaluate.
2384}
2385
2386/// EvaluateBlock - Evaluate all instructions in block BB, returning true if
2387/// successful, false if we can't evaluate it.  NewBB returns the next BB that
2388/// control flows into, or null upon return.
2389bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
2390                              BasicBlock *&NextBB) {
2391  // This is the main evaluation loop.
2392  while (1) {
2393    Constant *InstResult = 0;
2394
2395    DEBUG(dbgs() << "Evaluating Instruction: " << *CurInst << "\n");
2396
2397    if (StoreInst *SI = dyn_cast<StoreInst>(CurInst)) {
2398      if (!SI->isSimple()) {
2399        DEBUG(dbgs() << "Store is not simple! Can not evaluate.\n");
2400        return false;  // no volatile/atomic accesses.
2401      }
2402      Constant *Ptr = getVal(SI->getOperand(1));
2403      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) {
2404        DEBUG(dbgs() << "Folding constant ptr expression: " << *Ptr);
2405        Ptr = ConstantFoldConstantExpression(CE, TD, TLI);
2406        DEBUG(dbgs() << "; To: " << *Ptr << "\n");
2407      }
2408      if (!isSimpleEnoughPointerToCommit(Ptr)) {
2409        // If this is too complex for us to commit, reject it.
2410        DEBUG(dbgs() << "Pointer is too complex for us to evaluate store.");
2411        return false;
2412      }
2413
2414      Constant *Val = getVal(SI->getOperand(0));
2415
2416      // If this might be too difficult for the backend to handle (e.g. the addr
2417      // of one global variable divided by another) then we can't commit it.
2418      if (!isSimpleEnoughValueToCommit(Val, SimpleConstants, TD)) {
2419        DEBUG(dbgs() << "Store value is too complex to evaluate store. " << *Val
2420              << "\n");
2421        return false;
2422      }
2423
2424      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) {
2425        if (CE->getOpcode() == Instruction::BitCast) {
2426          DEBUG(dbgs() << "Attempting to resolve bitcast on constant ptr.\n");
2427          // If we're evaluating a store through a bitcast, then we need
2428          // to pull the bitcast off the pointer type and push it onto the
2429          // stored value.
2430          Ptr = CE->getOperand(0);
2431
2432          Type *NewTy = cast<PointerType>(Ptr->getType())->getElementType();
2433
2434          // In order to push the bitcast onto the stored value, a bitcast
2435          // from NewTy to Val's type must be legal.  If it's not, we can try
2436          // introspecting NewTy to find a legal conversion.
2437          while (!Val->getType()->canLosslesslyBitCastTo(NewTy)) {
2438            // If NewTy is a struct, we can convert the pointer to the struct
2439            // into a pointer to its first member.
2440            // FIXME: This could be extended to support arrays as well.
2441            if (StructType *STy = dyn_cast<StructType>(NewTy)) {
2442              NewTy = STy->getTypeAtIndex(0U);
2443
2444              IntegerType *IdxTy = IntegerType::get(NewTy->getContext(), 32);
2445              Constant *IdxZero = ConstantInt::get(IdxTy, 0, false);
2446              Constant * const IdxList[] = {IdxZero, IdxZero};
2447
2448              Ptr = ConstantExpr::getGetElementPtr(Ptr, IdxList);
2449              if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))
2450                Ptr = ConstantFoldConstantExpression(CE, TD, TLI);
2451
2452            // If we can't improve the situation by introspecting NewTy,
2453            // we have to give up.
2454            } else {
2455              DEBUG(dbgs() << "Failed to bitcast constant ptr, can not "
2456                    "evaluate.\n");
2457              return false;
2458            }
2459          }
2460
2461          // If we found compatible types, go ahead and push the bitcast
2462          // onto the stored value.
2463          Val = ConstantExpr::getBitCast(Val, NewTy);
2464
2465          DEBUG(dbgs() << "Evaluated bitcast: " << *Val << "\n");
2466        }
2467      }
2468
2469      MutatedMemory[Ptr] = Val;
2470    } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(CurInst)) {
2471      InstResult = ConstantExpr::get(BO->getOpcode(),
2472                                     getVal(BO->getOperand(0)),
2473                                     getVal(BO->getOperand(1)));
2474      DEBUG(dbgs() << "Found a BinaryOperator! Simplifying: " << *InstResult
2475            << "\n");
2476    } else if (CmpInst *CI = dyn_cast<CmpInst>(CurInst)) {
2477      InstResult = ConstantExpr::getCompare(CI->getPredicate(),
2478                                            getVal(CI->getOperand(0)),
2479                                            getVal(CI->getOperand(1)));
2480      DEBUG(dbgs() << "Found a CmpInst! Simplifying: " << *InstResult
2481            << "\n");
2482    } else if (CastInst *CI = dyn_cast<CastInst>(CurInst)) {
2483      InstResult = ConstantExpr::getCast(CI->getOpcode(),
2484                                         getVal(CI->getOperand(0)),
2485                                         CI->getType());
2486      DEBUG(dbgs() << "Found a Cast! Simplifying: " << *InstResult
2487            << "\n");
2488    } else if (SelectInst *SI = dyn_cast<SelectInst>(CurInst)) {
2489      InstResult = ConstantExpr::getSelect(getVal(SI->getOperand(0)),
2490                                           getVal(SI->getOperand(1)),
2491                                           getVal(SI->getOperand(2)));
2492      DEBUG(dbgs() << "Found a Select! Simplifying: " << *InstResult
2493            << "\n");
2494    } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(CurInst)) {
2495      Constant *P = getVal(GEP->getOperand(0));
2496      SmallVector<Constant*, 8> GEPOps;
2497      for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end();
2498           i != e; ++i)
2499        GEPOps.push_back(getVal(*i));
2500      InstResult =
2501        ConstantExpr::getGetElementPtr(P, GEPOps,
2502                                       cast<GEPOperator>(GEP)->isInBounds());
2503      DEBUG(dbgs() << "Found a GEP! Simplifying: " << *InstResult
2504            << "\n");
2505    } else if (LoadInst *LI = dyn_cast<LoadInst>(CurInst)) {
2506
2507      if (!LI->isSimple()) {
2508        DEBUG(dbgs() << "Found a Load! Not a simple load, can not evaluate.\n");
2509        return false;  // no volatile/atomic accesses.
2510      }
2511
2512      Constant *Ptr = getVal(LI->getOperand(0));
2513      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) {
2514        Ptr = ConstantFoldConstantExpression(CE, TD, TLI);
2515        DEBUG(dbgs() << "Found a constant pointer expression, constant "
2516              "folding: " << *Ptr << "\n");
2517      }
2518      InstResult = ComputeLoadResult(Ptr);
2519      if (InstResult == 0) {
2520        DEBUG(dbgs() << "Failed to compute load result. Can not evaluate load."
2521              "\n");
2522        return false; // Could not evaluate load.
2523      }
2524
2525      DEBUG(dbgs() << "Evaluated load: " << *InstResult << "\n");
2526    } else if (AllocaInst *AI = dyn_cast<AllocaInst>(CurInst)) {
2527      if (AI->isArrayAllocation()) {
2528        DEBUG(dbgs() << "Found an array alloca. Can not evaluate.\n");
2529        return false;  // Cannot handle array allocs.
2530      }
2531      Type *Ty = AI->getType()->getElementType();
2532      AllocaTmps.push_back(new GlobalVariable(Ty, false,
2533                                              GlobalValue::InternalLinkage,
2534                                              UndefValue::get(Ty),
2535                                              AI->getName()));
2536      InstResult = AllocaTmps.back();
2537      DEBUG(dbgs() << "Found an alloca. Result: " << *InstResult << "\n");
2538    } else if (isa<CallInst>(CurInst) || isa<InvokeInst>(CurInst)) {
2539      CallSite CS(CurInst);
2540
2541      // Debug info can safely be ignored here.
2542      if (isa<DbgInfoIntrinsic>(CS.getInstruction())) {
2543        DEBUG(dbgs() << "Ignoring debug info.\n");
2544        ++CurInst;
2545        continue;
2546      }
2547
2548      // Cannot handle inline asm.
2549      if (isa<InlineAsm>(CS.getCalledValue())) {
2550        DEBUG(dbgs() << "Found inline asm, can not evaluate.\n");
2551        return false;
2552      }
2553
2554      if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) {
2555        if (MemSetInst *MSI = dyn_cast<MemSetInst>(II)) {
2556          if (MSI->isVolatile()) {
2557            DEBUG(dbgs() << "Can not optimize a volatile memset " <<
2558                  "intrinsic.\n");
2559            return false;
2560          }
2561          Constant *Ptr = getVal(MSI->getDest());
2562          Constant *Val = getVal(MSI->getValue());
2563          Constant *DestVal = ComputeLoadResult(getVal(Ptr));
2564          if (Val->isNullValue() && DestVal && DestVal->isNullValue()) {
2565            // This memset is a no-op.
2566            DEBUG(dbgs() << "Ignoring no-op memset.\n");
2567            ++CurInst;
2568            continue;
2569          }
2570        }
2571
2572        if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
2573            II->getIntrinsicID() == Intrinsic::lifetime_end) {
2574          DEBUG(dbgs() << "Ignoring lifetime intrinsic.\n");
2575          ++CurInst;
2576          continue;
2577        }
2578
2579        if (II->getIntrinsicID() == Intrinsic::invariant_start) {
2580          // We don't insert an entry into Values, as it doesn't have a
2581          // meaningful return value.
2582          if (!II->use_empty()) {
2583            DEBUG(dbgs() << "Found unused invariant_start. Cant evaluate.\n");
2584            return false;
2585          }
2586          ConstantInt *Size = cast<ConstantInt>(II->getArgOperand(0));
2587          Value *PtrArg = getVal(II->getArgOperand(1));
2588          Value *Ptr = PtrArg->stripPointerCasts();
2589          if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr)) {
2590            Type *ElemTy = cast<PointerType>(GV->getType())->getElementType();
2591            if (TD && !Size->isAllOnesValue() &&
2592                Size->getValue().getLimitedValue() >=
2593                TD->getTypeStoreSize(ElemTy)) {
2594              Invariants.insert(GV);
2595              DEBUG(dbgs() << "Found a global var that is an invariant: " << *GV
2596                    << "\n");
2597            } else {
2598              DEBUG(dbgs() << "Found a global var, but can not treat it as an "
2599                    "invariant.\n");
2600            }
2601          }
2602          // Continue even if we do nothing.
2603          ++CurInst;
2604          continue;
2605        }
2606
2607        DEBUG(dbgs() << "Unknown intrinsic. Can not evaluate.\n");
2608        return false;
2609      }
2610
2611      // Resolve function pointers.
2612      Function *Callee = dyn_cast<Function>(getVal(CS.getCalledValue()));
2613      if (!Callee || Callee->mayBeOverridden()) {
2614        DEBUG(dbgs() << "Can not resolve function pointer.\n");
2615        return false;  // Cannot resolve.
2616      }
2617
2618      SmallVector<Constant*, 8> Formals;
2619      for (User::op_iterator i = CS.arg_begin(), e = CS.arg_end(); i != e; ++i)
2620        Formals.push_back(getVal(*i));
2621
2622      if (Callee->isDeclaration()) {
2623        // If this is a function we can constant fold, do it.
2624        if (Constant *C = ConstantFoldCall(Callee, Formals, TLI)) {
2625          InstResult = C;
2626          DEBUG(dbgs() << "Constant folded function call. Result: " <<
2627                *InstResult << "\n");
2628        } else {
2629          DEBUG(dbgs() << "Can not constant fold function call.\n");
2630          return false;
2631        }
2632      } else {
2633        if (Callee->getFunctionType()->isVarArg()) {
2634          DEBUG(dbgs() << "Can not constant fold vararg function call.\n");
2635          return false;
2636        }
2637
2638        Constant *RetVal = 0;
2639        // Execute the call, if successful, use the return value.
2640        ValueStack.push_back(new DenseMap<Value*, Constant*>);
2641        if (!EvaluateFunction(Callee, RetVal, Formals)) {
2642          DEBUG(dbgs() << "Failed to evaluate function.\n");
2643          return false;
2644        }
2645        delete ValueStack.pop_back_val();
2646        InstResult = RetVal;
2647
2648        if (InstResult != NULL) {
2649          DEBUG(dbgs() << "Successfully evaluated function. Result: " <<
2650                InstResult << "\n\n");
2651        } else {
2652          DEBUG(dbgs() << "Successfully evaluated function. Result: 0\n\n");
2653        }
2654      }
2655    } else if (isa<TerminatorInst>(CurInst)) {
2656      DEBUG(dbgs() << "Found a terminator instruction.\n");
2657
2658      if (BranchInst *BI = dyn_cast<BranchInst>(CurInst)) {
2659        if (BI->isUnconditional()) {
2660          NextBB = BI->getSuccessor(0);
2661        } else {
2662          ConstantInt *Cond =
2663            dyn_cast<ConstantInt>(getVal(BI->getCondition()));
2664          if (!Cond) return false;  // Cannot determine.
2665
2666          NextBB = BI->getSuccessor(!Cond->getZExtValue());
2667        }
2668      } else if (SwitchInst *SI = dyn_cast<SwitchInst>(CurInst)) {
2669        ConstantInt *Val =
2670          dyn_cast<ConstantInt>(getVal(SI->getCondition()));
2671        if (!Val) return false;  // Cannot determine.
2672        NextBB = SI->findCaseValue(Val).getCaseSuccessor();
2673      } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(CurInst)) {
2674        Value *Val = getVal(IBI->getAddress())->stripPointerCasts();
2675        if (BlockAddress *BA = dyn_cast<BlockAddress>(Val))
2676          NextBB = BA->getBasicBlock();
2677        else
2678          return false;  // Cannot determine.
2679      } else if (isa<ReturnInst>(CurInst)) {
2680        NextBB = 0;
2681      } else {
2682        // invoke, unwind, resume, unreachable.
2683        DEBUG(dbgs() << "Can not handle terminator.");
2684        return false;  // Cannot handle this terminator.
2685      }
2686
2687      // We succeeded at evaluating this block!
2688      DEBUG(dbgs() << "Successfully evaluated block.\n");
2689      return true;
2690    } else {
2691      // Did not know how to evaluate this!
2692      DEBUG(dbgs() << "Failed to evaluate block due to unhandled instruction."
2693            "\n");
2694      return false;
2695    }
2696
2697    if (!CurInst->use_empty()) {
2698      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(InstResult))
2699        InstResult = ConstantFoldConstantExpression(CE, TD, TLI);
2700
2701      setVal(CurInst, InstResult);
2702    }
2703
2704    // If we just processed an invoke, we finished evaluating the block.
2705    if (InvokeInst *II = dyn_cast<InvokeInst>(CurInst)) {
2706      NextBB = II->getNormalDest();
2707      DEBUG(dbgs() << "Found an invoke instruction. Finished Block.\n\n");
2708      return true;
2709    }
2710
2711    // Advance program counter.
2712    ++CurInst;
2713  }
2714}
2715
2716/// EvaluateFunction - Evaluate a call to function F, returning true if
2717/// successful, false if we can't evaluate it.  ActualArgs contains the formal
2718/// arguments for the function.
2719bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal,
2720                                 const SmallVectorImpl<Constant*> &ActualArgs) {
2721  // Check to see if this function is already executing (recursion).  If so,
2722  // bail out.  TODO: we might want to accept limited recursion.
2723  if (std::find(CallStack.begin(), CallStack.end(), F) != CallStack.end())
2724    return false;
2725
2726  CallStack.push_back(F);
2727
2728  // Initialize arguments to the incoming values specified.
2729  unsigned ArgNo = 0;
2730  for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); AI != E;
2731       ++AI, ++ArgNo)
2732    setVal(AI, ActualArgs[ArgNo]);
2733
2734  // ExecutedBlocks - We only handle non-looping, non-recursive code.  As such,
2735  // we can only evaluate any one basic block at most once.  This set keeps
2736  // track of what we have executed so we can detect recursive cases etc.
2737  SmallPtrSet<BasicBlock*, 32> ExecutedBlocks;
2738
2739  // CurBB - The current basic block we're evaluating.
2740  BasicBlock *CurBB = F->begin();
2741
2742  BasicBlock::iterator CurInst = CurBB->begin();
2743
2744  while (1) {
2745    BasicBlock *NextBB = 0; // Initialized to avoid compiler warnings.
2746    DEBUG(dbgs() << "Trying to evaluate BB: " << *CurBB << "\n");
2747
2748    if (!EvaluateBlock(CurInst, NextBB))
2749      return false;
2750
2751    if (NextBB == 0) {
2752      // Successfully running until there's no next block means that we found
2753      // the return.  Fill it the return value and pop the call stack.
2754      ReturnInst *RI = cast<ReturnInst>(CurBB->getTerminator());
2755      if (RI->getNumOperands())
2756        RetVal = getVal(RI->getOperand(0));
2757      CallStack.pop_back();
2758      return true;
2759    }
2760
2761    // Okay, we succeeded in evaluating this control flow.  See if we have
2762    // executed the new block before.  If so, we have a looping function,
2763    // which we cannot evaluate in reasonable time.
2764    if (!ExecutedBlocks.insert(NextBB))
2765      return false;  // looped!
2766
2767    // Okay, we have never been in this block before.  Check to see if there
2768    // are any PHI nodes.  If so, evaluate them with information about where
2769    // we came from.
2770    PHINode *PN = 0;
2771    for (CurInst = NextBB->begin();
2772         (PN = dyn_cast<PHINode>(CurInst)); ++CurInst)
2773      setVal(PN, getVal(PN->getIncomingValueForBlock(CurBB)));
2774
2775    // Advance to the next block.
2776    CurBB = NextBB;
2777  }
2778}
2779
2780/// EvaluateStaticConstructor - Evaluate static constructors in the function, if
2781/// we can.  Return true if we can, false otherwise.
2782static bool EvaluateStaticConstructor(Function *F, const DataLayout *TD,
2783                                      const TargetLibraryInfo *TLI) {
2784  // Call the function.
2785  Evaluator Eval(TD, TLI);
2786  Constant *RetValDummy;
2787  bool EvalSuccess = Eval.EvaluateFunction(F, RetValDummy,
2788                                           SmallVector<Constant*, 0>());
2789
2790  if (EvalSuccess) {
2791    // We succeeded at evaluation: commit the result.
2792    DEBUG(dbgs() << "FULLY EVALUATED GLOBAL CTOR FUNCTION '"
2793          << F->getName() << "' to " << Eval.getMutatedMemory().size()
2794          << " stores.\n");
2795    for (DenseMap<Constant*, Constant*>::const_iterator I =
2796           Eval.getMutatedMemory().begin(), E = Eval.getMutatedMemory().end();
2797         I != E; ++I)
2798      CommitValueTo(I->second, I->first);
2799    for (SmallPtrSet<GlobalVariable*, 8>::const_iterator I =
2800           Eval.getInvariants().begin(), E = Eval.getInvariants().end();
2801         I != E; ++I)
2802      (*I)->setConstant(true);
2803  }
2804
2805  return EvalSuccess;
2806}
2807
2808/// OptimizeGlobalCtorsList - Simplify and evaluation global ctors if possible.
2809/// Return true if anything changed.
2810bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) {
2811  std::vector<Function*> Ctors = ParseGlobalCtors(GCL);
2812  bool MadeChange = false;
2813  if (Ctors.empty()) return false;
2814
2815  // Loop over global ctors, optimizing them when we can.
2816  for (unsigned i = 0; i != Ctors.size(); ++i) {
2817    Function *F = Ctors[i];
2818    // Found a null terminator in the middle of the list, prune off the rest of
2819    // the list.
2820    if (F == 0) {
2821      if (i != Ctors.size()-1) {
2822        Ctors.resize(i+1);
2823        MadeChange = true;
2824      }
2825      break;
2826    }
2827    DEBUG(dbgs() << "Optimizing Global Constructor: " << *F << "\n");
2828
2829    // We cannot simplify external ctor functions.
2830    if (F->empty()) continue;
2831
2832    // If we can evaluate the ctor at compile time, do.
2833    if (EvaluateStaticConstructor(F, TD, TLI)) {
2834      Ctors.erase(Ctors.begin()+i);
2835      MadeChange = true;
2836      --i;
2837      ++NumCtorsEvaluated;
2838      continue;
2839    }
2840  }
2841
2842  if (!MadeChange) return false;
2843
2844  GCL = InstallGlobalCtors(GCL, Ctors);
2845  return true;
2846}
2847
2848static int compareNames(Constant *const *A, Constant *const *B) {
2849  return (*A)->getName().compare((*B)->getName());
2850}
2851
2852static void setUsedInitializer(GlobalVariable &V,
2853                               SmallPtrSet<GlobalValue *, 8> Init) {
2854  if (Init.empty()) {
2855    V.eraseFromParent();
2856    return;
2857  }
2858
2859  SmallVector<llvm::Constant *, 8> UsedArray;
2860  PointerType *Int8PtrTy = Type::getInt8PtrTy(V.getContext());
2861
2862  for (SmallPtrSet<GlobalValue *, 8>::iterator I = Init.begin(), E = Init.end();
2863       I != E; ++I) {
2864    Constant *Cast = llvm::ConstantExpr::getBitCast(*I, Int8PtrTy);
2865    UsedArray.push_back(Cast);
2866  }
2867  // Sort to get deterministic order.
2868  array_pod_sort(UsedArray.begin(), UsedArray.end(), compareNames);
2869  ArrayType *ATy = ArrayType::get(Int8PtrTy, UsedArray.size());
2870
2871  Module *M = V.getParent();
2872  V.removeFromParent();
2873  GlobalVariable *NV =
2874      new GlobalVariable(*M, ATy, false, llvm::GlobalValue::AppendingLinkage,
2875                         llvm::ConstantArray::get(ATy, UsedArray), "");
2876  NV->takeName(&V);
2877  NV->setSection("llvm.metadata");
2878  delete &V;
2879}
2880
2881namespace {
2882/// \brief An easy to access representation of llvm.used and llvm.compiler.used.
2883class LLVMUsed {
2884  SmallPtrSet<GlobalValue *, 8> Used;
2885  SmallPtrSet<GlobalValue *, 8> CompilerUsed;
2886  GlobalVariable *UsedV;
2887  GlobalVariable *CompilerUsedV;
2888
2889public:
2890  LLVMUsed(Module &M) {
2891    UsedV = collectUsedGlobalVariables(M, Used, false);
2892    CompilerUsedV = collectUsedGlobalVariables(M, CompilerUsed, true);
2893  }
2894  typedef SmallPtrSet<GlobalValue *, 8>::iterator iterator;
2895  iterator usedBegin() { return Used.begin(); }
2896  iterator usedEnd() { return Used.end(); }
2897  iterator compilerUsedBegin() { return CompilerUsed.begin(); }
2898  iterator compilerUsedEnd() { return CompilerUsed.end(); }
2899  bool usedCount(GlobalValue *GV) const { return Used.count(GV); }
2900  bool compilerUsedCount(GlobalValue *GV) const {
2901    return CompilerUsed.count(GV);
2902  }
2903  bool usedErase(GlobalValue *GV) { return Used.erase(GV); }
2904  bool compilerUsedErase(GlobalValue *GV) { return CompilerUsed.erase(GV); }
2905  bool usedInsert(GlobalValue *GV) { return Used.insert(GV); }
2906  bool compilerUsedInsert(GlobalValue *GV) { return CompilerUsed.insert(GV); }
2907
2908  void syncVariablesAndSets() {
2909    if (UsedV)
2910      setUsedInitializer(*UsedV, Used);
2911    if (CompilerUsedV)
2912      setUsedInitializer(*CompilerUsedV, CompilerUsed);
2913  }
2914};
2915}
2916
2917static bool hasUseOtherThanLLVMUsed(GlobalAlias &GA, const LLVMUsed &U) {
2918  if (GA.use_empty()) // No use at all.
2919    return false;
2920
2921  assert((!U.usedCount(&GA) || !U.compilerUsedCount(&GA)) &&
2922         "We should have removed the duplicated "
2923         "element from llvm.compiler.used");
2924  if (!GA.hasOneUse())
2925    // Strictly more than one use. So at least one is not in llvm.used and
2926    // llvm.compiler.used.
2927    return true;
2928
2929  // Exactly one use. Check if it is in llvm.used or llvm.compiler.used.
2930  return !U.usedCount(&GA) && !U.compilerUsedCount(&GA);
2931}
2932
2933static bool hasMoreThanOneUseOtherThanLLVMUsed(GlobalValue &V,
2934                                               const LLVMUsed &U) {
2935  unsigned N = 2;
2936  assert((!U.usedCount(&V) || !U.compilerUsedCount(&V)) &&
2937         "We should have removed the duplicated "
2938         "element from llvm.compiler.used");
2939  if (U.usedCount(&V) || U.compilerUsedCount(&V))
2940    ++N;
2941  return V.hasNUsesOrMore(N);
2942}
2943
2944static bool mayHaveOtherReferences(GlobalAlias &GA, const LLVMUsed &U) {
2945  if (!GA.hasLocalLinkage())
2946    return true;
2947
2948  return U.usedCount(&GA) || U.compilerUsedCount(&GA);
2949}
2950
2951static bool hasUsesToReplace(GlobalAlias &GA, LLVMUsed &U, bool &RenameTarget) {
2952  RenameTarget = false;
2953  bool Ret = false;
2954  if (hasUseOtherThanLLVMUsed(GA, U))
2955    Ret = true;
2956
2957  // If the alias is externally visible, we may still be able to simplify it.
2958  if (!mayHaveOtherReferences(GA, U))
2959    return Ret;
2960
2961  // If the aliasee has internal linkage, give it the name and linkage
2962  // of the alias, and delete the alias.  This turns:
2963  //   define internal ... @f(...)
2964  //   @a = alias ... @f
2965  // into:
2966  //   define ... @a(...)
2967  Constant *Aliasee = GA.getAliasee();
2968  GlobalValue *Target = cast<GlobalValue>(Aliasee->stripPointerCasts());
2969  if (!Target->hasLocalLinkage())
2970    return Ret;
2971
2972  // Do not perform the transform if multiple aliases potentially target the
2973  // aliasee. This check also ensures that it is safe to replace the section
2974  // and other attributes of the aliasee with those of the alias.
2975  if (hasMoreThanOneUseOtherThanLLVMUsed(*Target, U))
2976    return Ret;
2977
2978  RenameTarget = true;
2979  return true;
2980}
2981
2982bool GlobalOpt::OptimizeGlobalAliases(Module &M) {
2983  bool Changed = false;
2984  LLVMUsed Used(M);
2985
2986  for (SmallPtrSet<GlobalValue *, 8>::iterator I = Used.usedBegin(),
2987                                               E = Used.usedEnd();
2988       I != E; ++I)
2989    Used.compilerUsedErase(*I);
2990
2991  for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end();
2992       I != E;) {
2993    Module::alias_iterator J = I++;
2994    // Aliases without names cannot be referenced outside this module.
2995    if (!J->hasName() && !J->isDeclaration())
2996      J->setLinkage(GlobalValue::InternalLinkage);
2997    // If the aliasee may change at link time, nothing can be done - bail out.
2998    if (J->mayBeOverridden())
2999      continue;
3000
3001    Constant *Aliasee = J->getAliasee();
3002    GlobalValue *Target = cast<GlobalValue>(Aliasee->stripPointerCasts());
3003    Target->removeDeadConstantUsers();
3004
3005    // Make all users of the alias use the aliasee instead.
3006    bool RenameTarget;
3007    if (!hasUsesToReplace(*J, Used, RenameTarget))
3008      continue;
3009
3010    J->replaceAllUsesWith(Aliasee);
3011    ++NumAliasesResolved;
3012    Changed = true;
3013
3014    if (RenameTarget) {
3015      // Give the aliasee the name, linkage and other attributes of the alias.
3016      Target->takeName(J);
3017      Target->setLinkage(J->getLinkage());
3018      Target->GlobalValue::copyAttributesFrom(J);
3019
3020      if (Used.usedErase(J))
3021        Used.usedInsert(Target);
3022
3023      if (Used.compilerUsedErase(J))
3024        Used.compilerUsedInsert(Target);
3025    } else if (mayHaveOtherReferences(*J, Used))
3026      continue;
3027
3028    // Delete the alias.
3029    M.getAliasList().erase(J);
3030    ++NumAliasesRemoved;
3031    Changed = true;
3032  }
3033
3034  Used.syncVariablesAndSets();
3035
3036  return Changed;
3037}
3038
3039static Function *FindCXAAtExit(Module &M, TargetLibraryInfo *TLI) {
3040  if (!TLI->has(LibFunc::cxa_atexit))
3041    return 0;
3042
3043  Function *Fn = M.getFunction(TLI->getName(LibFunc::cxa_atexit));
3044
3045  if (!Fn)
3046    return 0;
3047
3048  FunctionType *FTy = Fn->getFunctionType();
3049
3050  // Checking that the function has the right return type, the right number of
3051  // parameters and that they all have pointer types should be enough.
3052  if (!FTy->getReturnType()->isIntegerTy() ||
3053      FTy->getNumParams() != 3 ||
3054      !FTy->getParamType(0)->isPointerTy() ||
3055      !FTy->getParamType(1)->isPointerTy() ||
3056      !FTy->getParamType(2)->isPointerTy())
3057    return 0;
3058
3059  return Fn;
3060}
3061
3062/// cxxDtorIsEmpty - Returns whether the given function is an empty C++
3063/// destructor and can therefore be eliminated.
3064/// Note that we assume that other optimization passes have already simplified
3065/// the code so we only look for a function with a single basic block, where
3066/// the only allowed instructions are 'ret', 'call' to an empty C++ dtor and
3067/// other side-effect free instructions.
3068static bool cxxDtorIsEmpty(const Function &Fn,
3069                           SmallPtrSet<const Function *, 8> &CalledFunctions) {
3070  // FIXME: We could eliminate C++ destructors if they're readonly/readnone and
3071  // nounwind, but that doesn't seem worth doing.
3072  if (Fn.isDeclaration())
3073    return false;
3074
3075  if (++Fn.begin() != Fn.end())
3076    return false;
3077
3078  const BasicBlock &EntryBlock = Fn.getEntryBlock();
3079  for (BasicBlock::const_iterator I = EntryBlock.begin(), E = EntryBlock.end();
3080       I != E; ++I) {
3081    if (const CallInst *CI = dyn_cast<CallInst>(I)) {
3082      // Ignore debug intrinsics.
3083      if (isa<DbgInfoIntrinsic>(CI))
3084        continue;
3085
3086      const Function *CalledFn = CI->getCalledFunction();
3087
3088      if (!CalledFn)
3089        return false;
3090
3091      SmallPtrSet<const Function *, 8> NewCalledFunctions(CalledFunctions);
3092
3093      // Don't treat recursive functions as empty.
3094      if (!NewCalledFunctions.insert(CalledFn))
3095        return false;
3096
3097      if (!cxxDtorIsEmpty(*CalledFn, NewCalledFunctions))
3098        return false;
3099    } else if (isa<ReturnInst>(*I))
3100      return true; // We're done.
3101    else if (I->mayHaveSideEffects())
3102      return false; // Destructor with side effects, bail.
3103  }
3104
3105  return false;
3106}
3107
3108bool GlobalOpt::OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) {
3109  /// Itanium C++ ABI p3.3.5:
3110  ///
3111  ///   After constructing a global (or local static) object, that will require
3112  ///   destruction on exit, a termination function is registered as follows:
3113  ///
3114  ///   extern "C" int __cxa_atexit ( void (*f)(void *), void *p, void *d );
3115  ///
3116  ///   This registration, e.g. __cxa_atexit(f,p,d), is intended to cause the
3117  ///   call f(p) when DSO d is unloaded, before all such termination calls
3118  ///   registered before this one. It returns zero if registration is
3119  ///   successful, nonzero on failure.
3120
3121  // This pass will look for calls to __cxa_atexit where the function is trivial
3122  // and remove them.
3123  bool Changed = false;
3124
3125  for (Function::use_iterator I = CXAAtExitFn->use_begin(),
3126       E = CXAAtExitFn->use_end(); I != E;) {
3127    // We're only interested in calls. Theoretically, we could handle invoke
3128    // instructions as well, but neither llvm-gcc nor clang generate invokes
3129    // to __cxa_atexit.
3130    CallInst *CI = dyn_cast<CallInst>(*I++);
3131    if (!CI)
3132      continue;
3133
3134    Function *DtorFn =
3135      dyn_cast<Function>(CI->getArgOperand(0)->stripPointerCasts());
3136    if (!DtorFn)
3137      continue;
3138
3139    SmallPtrSet<const Function *, 8> CalledFunctions;
3140    if (!cxxDtorIsEmpty(*DtorFn, CalledFunctions))
3141      continue;
3142
3143    // Just remove the call.
3144    CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
3145    CI->eraseFromParent();
3146
3147    ++NumCXXDtorsRemoved;
3148
3149    Changed |= true;
3150  }
3151
3152  return Changed;
3153}
3154
3155bool GlobalOpt::runOnModule(Module &M) {
3156  bool Changed = false;
3157
3158  TD = getAnalysisIfAvailable<DataLayout>();
3159  TLI = &getAnalysis<TargetLibraryInfo>();
3160
3161  // Try to find the llvm.globalctors list.
3162  GlobalVariable *GlobalCtors = FindGlobalCtors(M);
3163
3164  bool LocalChange = true;
3165  while (LocalChange) {
3166    LocalChange = false;
3167
3168    // Delete functions that are trivially dead, ccc -> fastcc
3169    LocalChange |= OptimizeFunctions(M);
3170
3171    // Optimize global_ctors list.
3172    if (GlobalCtors)
3173      LocalChange |= OptimizeGlobalCtorsList(GlobalCtors);
3174
3175    // Optimize non-address-taken globals.
3176    LocalChange |= OptimizeGlobalVars(M);
3177
3178    // Resolve aliases, when possible.
3179    LocalChange |= OptimizeGlobalAliases(M);
3180
3181    // Try to remove trivial global destructors if they are not removed
3182    // already.
3183    Function *CXAAtExitFn = FindCXAAtExit(M, TLI);
3184    if (CXAAtExitFn)
3185      LocalChange |= OptimizeEmptyGlobalCXXDtors(CXAAtExitFn);
3186
3187    Changed |= LocalChange;
3188  }
3189
3190  // TODO: Move all global ctors functions to the end of the module for code
3191  // layout.
3192
3193  return Changed;
3194}
3195