IndVarSimplify.cpp revision 263508
1231200Smm//===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===// 2231200Smm// 3313571Smm// The LLVM Compiler Infrastructure 4231200Smm// 5231200Smm// This file is distributed under the University of Illinois Open Source 6231200Smm// License. See LICENSE.TXT for details. 7231200Smm// 8231200Smm//===----------------------------------------------------------------------===// 9231200Smm// 10231200Smm// This transformation analyzes and transforms the induction variables (and 11231200Smm// computations derived from them) into simpler forms suitable for subsequent 12231200Smm// analysis and transformation. 13231200Smm// 14231200Smm// If the trip count of a loop is computable, this pass also makes the following 15231200Smm// changes: 16231200Smm// 1. The exit condition for the loop is canonicalized to compare the 17231200Smm// induction value against the exit value. This turns loops like: 18231200Smm// 'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)' 19231200Smm// 2. Any use outside of the loop of an expression derived from the indvar 20231200Smm// is changed to compute the derived value outside of the loop, eliminating 21231200Smm// the dependence on the exit value of the induction variable. If the only 22231200Smm// purpose of the loop is to compute the exit value of some derived 23231200Smm// expression, this transformation will make the loop dead. 24231200Smm// 25231200Smm//===----------------------------------------------------------------------===// 26231200Smm 27231200Smm#define DEBUG_TYPE "indvars" 28231200Smm#include "llvm/Transforms/Scalar.h" 29231200Smm#include "llvm/ADT/DenseMap.h" 30231200Smm#include "llvm/ADT/SmallVector.h" 31231200Smm#include "llvm/ADT/Statistic.h" 32231200Smm#include "llvm/Analysis/Dominators.h" 33231200Smm#include "llvm/Analysis/LoopInfo.h" 34231200Smm#include "llvm/Analysis/LoopPass.h" 35231200Smm#include "llvm/Analysis/ScalarEvolutionExpander.h" 36231200Smm#include "llvm/IR/BasicBlock.h" 37231200Smm#include "llvm/IR/Constants.h" 38231200Smm#include "llvm/IR/DataLayout.h" 39231200Smm#include "llvm/IR/Instructions.h" 40231200Smm#include "llvm/IR/IntrinsicInst.h" 41231200Smm#include "llvm/IR/LLVMContext.h" 42231200Smm#include "llvm/IR/Type.h" 43231200Smm#include "llvm/Support/CFG.h" 44231200Smm#include "llvm/Support/CommandLine.h" 45231200Smm#include "llvm/Support/Debug.h" 46231200Smm#include "llvm/Support/raw_ostream.h" 47231200Smm#include "llvm/Target/TargetLibraryInfo.h" 48231200Smm#include "llvm/Transforms/Utils/BasicBlockUtils.h" 49231200Smm#include "llvm/Transforms/Utils/Local.h" 50231200Smm#include "llvm/Transforms/Utils/SimplifyIndVar.h" 51231200Smmusing namespace llvm; 52231200Smm 53231200SmmSTATISTIC(NumWidened , "Number of indvars widened"); 54231200SmmSTATISTIC(NumReplaced , "Number of exit values replaced"); 55231200SmmSTATISTIC(NumLFTR , "Number of loop exit tests replaced"); 56232153SmmSTATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated"); 57232153SmmSTATISTIC(NumElimIV , "Number of congruent IVs eliminated"); 58232153Smm 59313571Smm// Trip count verification can be enabled by default under NDEBUG if we 60313571Smm// implement a strong expression equivalence checker in SCEV. Until then, we 61313571Smm// use the verify-indvars flag, which may assert in some cases. 62313571Smmstatic cl::opt<bool> VerifyIndvars( 63231200Smm "verify-indvars", cl::Hidden, 64231200Smm cl::desc("Verify the ScalarEvolution result after running indvars")); 65313571Smm 66313571Smmnamespace { 67313571Smm class IndVarSimplify : public LoopPass { 68313571Smm LoopInfo *LI; 69231200Smm ScalarEvolution *SE; 70231200Smm DominatorTree *DT; 71313571Smm DataLayout *TD; 72313571Smm TargetLibraryInfo *TLI; 73231200Smm 74231200Smm SmallVector<WeakVH, 16> DeadInsts; 75231200Smm bool Changed; 76313571Smm public: 77313571Smm 78313571Smm static char ID; // Pass identification, replacement for typeid 79313571Smm IndVarSimplify() : LoopPass(ID), LI(0), SE(0), DT(0), TD(0), 80231200Smm Changed(false) { 81231200Smm initializeIndVarSimplifyPass(*PassRegistry::getPassRegistry()); 82313571Smm } 83313571Smm 84231200Smm virtual bool runOnLoop(Loop *L, LPPassManager &LPM); 85231200Smm 86313927Smm virtual void getAnalysisUsage(AnalysisUsage &AU) const { 87313927Smm AU.addRequired<DominatorTree>(); 88313927Smm AU.addRequired<LoopInfo>(); 89313927Smm AU.addRequired<ScalarEvolution>(); 90313927Smm AU.addRequiredID(LoopSimplifyID); 91313927Smm AU.addRequiredID(LCSSAID); 92313927Smm AU.addPreserved<ScalarEvolution>(); 93313927Smm AU.addPreservedID(LoopSimplifyID); 94313927Smm AU.addPreservedID(LCSSAID); 95313927Smm AU.setPreservesCFG(); 96313927Smm } 97313927Smm 98313927Smm private: 99313927Smm virtual void releaseMemory() { 100313927Smm DeadInsts.clear(); 101313927Smm } 102313927Smm 103313927Smm bool isValidRewrite(Value *FromVal, Value *ToVal); 104313927Smm 105313927Smm void HandleFloatingPointIV(Loop *L, PHINode *PH); 106313927Smm void RewriteNonIntegerIVs(Loop *L); 107313927Smm 108313927Smm void SimplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LPPassManager &LPM); 109313927Smm 110313927Smm void RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter); 111313927Smm 112313927Smm Value *LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount, 113313927Smm PHINode *IndVar, SCEVExpander &Rewriter); 114313927Smm 115313927Smm void SinkUnusedInvariants(Loop *L); 116313927Smm }; 117313927Smm} 118313927Smm 119313927Smmchar IndVarSimplify::ID = 0; 120313927SmmINITIALIZE_PASS_BEGIN(IndVarSimplify, "indvars", 121313927Smm "Induction Variable Simplification", false, false) 122313927SmmINITIALIZE_PASS_DEPENDENCY(DominatorTree) 123313927SmmINITIALIZE_PASS_DEPENDENCY(LoopInfo) 124313927SmmINITIALIZE_PASS_DEPENDENCY(ScalarEvolution) 125313927SmmINITIALIZE_PASS_DEPENDENCY(LoopSimplify) 126313927SmmINITIALIZE_PASS_DEPENDENCY(LCSSA) 127313927SmmINITIALIZE_PASS_END(IndVarSimplify, "indvars", 128313927Smm "Induction Variable Simplification", false, false) 129313927Smm 130231200SmmPass *llvm::createIndVarSimplifyPass() { 131231200Smm return new IndVarSimplify(); 132231200Smm} 133231200Smm 134231200Smm/// isValidRewrite - Return true if the SCEV expansion generated by the 135231200Smm/// rewriter can replace the original value. SCEV guarantees that it 136231200Smm/// produces the same value, but the way it is produced may be illegal IR. 137231200Smm/// Ideally, this function will only be called for verification. 138231200Smmbool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) { 139231200Smm // If an SCEV expression subsumed multiple pointers, its expansion could 140231200Smm // reassociate the GEP changing the base pointer. This is illegal because the 141344674Smm // final address produced by a GEP chain must be inbounds relative to its 142344674Smm // underlying object. Otherwise basic alias analysis, among other things, 143344674Smm // could fail in a dangerous way. Ultimately, SCEV will be improved to avoid 144344674Smm // producing an expression involving multiple pointers. Until then, we must 145231200Smm // bail out here. 146311042Smm // 147231200Smm // Retrieve the pointer operand of the GEP. Don't use GetUnderlyingObject 148231200Smm // because it understands lcssa phis while SCEV does not. 149231200Smm Value *FromPtr = FromVal; 150231200Smm Value *ToPtr = ToVal; 151231200Smm if (GEPOperator *GEP = dyn_cast<GEPOperator>(FromVal)) { 152231200Smm FromPtr = GEP->getPointerOperand(); 153231200Smm } 154231200Smm if (GEPOperator *GEP = dyn_cast<GEPOperator>(ToVal)) { 155231200Smm ToPtr = GEP->getPointerOperand(); 156231200Smm } 157231200Smm if (FromPtr != FromVal || ToPtr != ToVal) { 158231200Smm // Quickly check the common case 159231200Smm if (FromPtr == ToPtr) 160231200Smm return true; 161231200Smm 162231200Smm // SCEV may have rewritten an expression that produces the GEP's pointer 163231200Smm // operand. That's ok as long as the pointer operand has the same base 164231200Smm // pointer. Unlike GetUnderlyingObject(), getPointerBase() will find the 165231200Smm // base of a recurrence. This handles the case in which SCEV expansion 166231200Smm // converts a pointer type recurrence into a nonrecurrent pointer base 167231200Smm // indexed by an integer recurrence. 168231200Smm 169231200Smm // If the GEP base pointer is a vector of pointers, abort. 170231200Smm if (!FromPtr->getType()->isPointerTy() || !ToPtr->getType()->isPointerTy()) 171231200Smm return false; 172231200Smm 173231200Smm const SCEV *FromBase = SE->getPointerBase(SE->getSCEV(FromPtr)); 174231200Smm const SCEV *ToBase = SE->getPointerBase(SE->getSCEV(ToPtr)); 175231200Smm if (FromBase == ToBase) 176231200Smm return true; 177231200Smm 178231200Smm DEBUG(dbgs() << "INDVARS: GEP rewrite bail out " 179231200Smm << *FromBase << " != " << *ToBase << "\n"); 180231200Smm 181231200Smm return false; 182231200Smm } 183231200Smm return true; 184231200Smm} 185231200Smm 186231200Smm/// Determine the insertion point for this user. By default, insert immediately 187231200Smm/// before the user. SCEVExpander or LICM will hoist loop invariants out of the 188231200Smm/// loop. For PHI nodes, there may be multiple uses, so compute the nearest 189231200Smm/// common dominator for the incoming blocks. 190231200Smmstatic Instruction *getInsertPointForUses(Instruction *User, Value *Def, 191231200Smm DominatorTree *DT) { 192231200Smm PHINode *PHI = dyn_cast<PHINode>(User); 193231200Smm if (!PHI) 194231200Smm return User; 195231200Smm 196231200Smm Instruction *InsertPt = 0; 197231200Smm for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) { 198231200Smm if (PHI->getIncomingValue(i) != Def) 199231200Smm continue; 200231200Smm 201231200Smm BasicBlock *InsertBB = PHI->getIncomingBlock(i); 202231200Smm if (!InsertPt) { 203231200Smm InsertPt = InsertBB->getTerminator(); 204231200Smm continue; 205231200Smm } 206231200Smm InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB); 207231200Smm InsertPt = InsertBB->getTerminator(); 208232153Smm } 209231200Smm assert(InsertPt && "Missing phi operand"); 210231200Smm assert((!isa<Instruction>(Def) || 211231200Smm DT->dominates(cast<Instruction>(Def), InsertPt)) && 212231200Smm "def does not dominate all uses"); 213231200Smm return InsertPt; 214231200Smm} 215231200Smm 216231200Smm//===----------------------------------------------------------------------===// 217231200Smm// RewriteNonIntegerIVs and helpers. Prefer integer IVs. 218231200Smm//===----------------------------------------------------------------------===// 219231200Smm 220231200Smm/// ConvertToSInt - Convert APF to an integer, if possible. 221231200Smmstatic bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) { 222231200Smm bool isExact = false; 223231200Smm // See if we can convert this to an int64_t 224231200Smm uint64_t UIntVal; 225231200Smm if (APF.convertToInteger(&UIntVal, 64, true, APFloat::rmTowardZero, 226231200Smm &isExact) != APFloat::opOK || !isExact) 227231200Smm return false; 228231200Smm IntVal = UIntVal; 229231200Smm return true; 230231200Smm} 231231200Smm 232231200Smm/// HandleFloatingPointIV - If the loop has floating induction variable 233231200Smm/// then insert corresponding integer induction variable if possible. 234231200Smm/// For example, 235231200Smm/// for(double i = 0; i < 10000; ++i) 236231200Smm/// bar(i) 237231200Smm/// is converted into 238231200Smm/// for(int i = 0; i < 10000; ++i) 239231200Smm/// bar((double)i); 240231200Smm/// 241231200Smmvoid IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) { 242231200Smm unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0)); 243231200Smm unsigned BackEdge = IncomingEdge^1; 244231200Smm 245231200Smm // Check incoming value. 246231200Smm ConstantFP *InitValueVal = 247231200Smm dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge)); 248231200Smm 249231200Smm int64_t InitValue; 250231200Smm if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue)) 251231200Smm return; 252231200Smm 253231200Smm // Check IV increment. Reject this PN if increment operation is not 254231200Smm // an add or increment value can not be represented by an integer. 255231200Smm BinaryOperator *Incr = 256231200Smm dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge)); 257231200Smm if (Incr == 0 || Incr->getOpcode() != Instruction::FAdd) return; 258231200Smm 259231200Smm // If this is not an add of the PHI with a constantfp, or if the constant fp 260231200Smm // is not an integer, bail out. 261231200Smm ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1)); 262231200Smm int64_t IncValue; 263231200Smm if (IncValueVal == 0 || Incr->getOperand(0) != PN || 264231200Smm !ConvertToSInt(IncValueVal->getValueAPF(), IncValue)) 265231200Smm return; 266231200Smm 267231200Smm // Check Incr uses. One user is PN and the other user is an exit condition 268231200Smm // used by the conditional terminator. 269231200Smm Value::use_iterator IncrUse = Incr->use_begin(); 270231200Smm Instruction *U1 = cast<Instruction>(*IncrUse++); 271231200Smm if (IncrUse == Incr->use_end()) return; 272231200Smm Instruction *U2 = cast<Instruction>(*IncrUse++); 273231200Smm if (IncrUse != Incr->use_end()) return; 274231200Smm 275231200Smm // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't 276231200Smm // only used by a branch, we can't transform it. 277231200Smm FCmpInst *Compare = dyn_cast<FCmpInst>(U1); 278231200Smm if (!Compare) 279231200Smm Compare = dyn_cast<FCmpInst>(U2); 280231200Smm if (Compare == 0 || !Compare->hasOneUse() || 281231200Smm !isa<BranchInst>(Compare->use_back())) 282231200Smm return; 283231200Smm 284231200Smm BranchInst *TheBr = cast<BranchInst>(Compare->use_back()); 285231200Smm 286231200Smm // We need to verify that the branch actually controls the iteration count 287231200Smm // of the loop. If not, the new IV can overflow and no one will notice. 288231200Smm // The branch block must be in the loop and one of the successors must be out 289231200Smm // of the loop. 290231200Smm assert(TheBr->isConditional() && "Can't use fcmp if not conditional"); 291231200Smm if (!L->contains(TheBr->getParent()) || 292231200Smm (L->contains(TheBr->getSuccessor(0)) && 293231200Smm L->contains(TheBr->getSuccessor(1)))) 294231200Smm return; 295231200Smm 296231200Smm 297231200Smm // If it isn't a comparison with an integer-as-fp (the exit value), we can't 298231200Smm // transform it. 299231200Smm ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1)); 300231200Smm int64_t ExitValue; 301231200Smm if (ExitValueVal == 0 || 302231200Smm !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue)) 303231200Smm return; 304231200Smm 305231200Smm // Find new predicate for integer comparison. 306231200Smm CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE; 307231200Smm switch (Compare->getPredicate()) { 308231200Smm default: return; // Unknown comparison. 309231200Smm case CmpInst::FCMP_OEQ: 310231200Smm case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break; 311231200Smm case CmpInst::FCMP_ONE: 312231200Smm case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break; 313231200Smm case CmpInst::FCMP_OGT: 314231200Smm case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break; 315231200Smm case CmpInst::FCMP_OGE: 316231200Smm case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break; 317231200Smm case CmpInst::FCMP_OLT: 318231200Smm case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break; 319231200Smm case CmpInst::FCMP_OLE: 320231200Smm case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break; 321231200Smm } 322231200Smm 323344674Smm // We convert the floating point induction variable to a signed i32 value if 324344674Smm // we can. This is only safe if the comparison will not overflow in a way 325344674Smm // that won't be trapped by the integer equivalent operations. Check for this 326344674Smm // now. 327231200Smm // TODO: We could use i64 if it is native and the range requires it. 328311042Smm 329311042Smm // The start/stride/exit values must all fit in signed i32. 330311042Smm if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue)) 331311042Smm return; 332311042Smm 333311042Smm // If not actually striding (add x, 0.0), avoid touching the code. 334231200Smm if (IncValue == 0) 335231200Smm return; 336231200Smm 337311042Smm // Positive and negative strides have different safety conditions. 338311042Smm if (IncValue > 0) { 339311042Smm // If we have a positive stride, we require the init to be less than the 340311042Smm // exit value. 341311042Smm if (InitValue >= ExitValue) 342311042Smm return; 343311042Smm 344231200Smm uint32_t Range = uint32_t(ExitValue-InitValue); 345231200Smm // Check for infinite loop, either: 346231200Smm // while (i <= Exit) or until (i > Exit) 347231200Smm if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) { 348231200Smm if (++Range == 0) return; // Range overflows. 349231200Smm } 350311042Smm 351231200Smm unsigned Leftover = Range % uint32_t(IncValue); 352231200Smm 353231200Smm // If this is an equality comparison, we require that the strided value 354231200Smm // exactly land on the exit value, otherwise the IV condition will wrap 355231200Smm // around and do things the fp IV wouldn't. 356231200Smm if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && 357231200Smm Leftover != 0) 358231200Smm return; 359231200Smm 360231200Smm // If the stride would wrap around the i32 before exiting, we can't 361231200Smm // transform the IV. 362231200Smm if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue) 363231200Smm return; 364231200Smm 365231200Smm } else { 366231200Smm // If we have a negative stride, we require the init to be greater than the 367231200Smm // exit value. 368231200Smm if (InitValue <= ExitValue) 369231200Smm return; 370231200Smm 371231200Smm uint32_t Range = uint32_t(InitValue-ExitValue); 372231200Smm // Check for infinite loop, either: 373231200Smm // while (i >= Exit) or until (i < Exit) 374231200Smm if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) { 375231200Smm if (++Range == 0) return; // Range overflows. 376231200Smm } 377231200Smm 378231200Smm unsigned Leftover = Range % uint32_t(-IncValue); 379231200Smm 380231200Smm // If this is an equality comparison, we require that the strided value 381231200Smm // exactly land on the exit value, otherwise the IV condition will wrap 382231200Smm // around and do things the fp IV wouldn't. 383231200Smm if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && 384231200Smm Leftover != 0) 385231200Smm return; 386231200Smm 387231200Smm // If the stride would wrap around the i32 before exiting, we can't 388313571Smm // transform the IV. 389313571Smm if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue) 390313571Smm return; 391313571Smm } 392313571Smm 393313571Smm IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext()); 394313571Smm 395313571Smm // Insert new integer induction variable. 396313571Smm PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN); 397231200Smm NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue), 398231200Smm PN->getIncomingBlock(IncomingEdge)); 399231200Smm 400231200Smm Value *NewAdd = 401231200Smm BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue), 402231200Smm Incr->getName()+".int", Incr); 403231200Smm NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge)); 404231200Smm 405231200Smm ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd, 406231200Smm ConstantInt::get(Int32Ty, ExitValue), 407231200Smm Compare->getName()); 408231200Smm 409231200Smm // In the following deletions, PN may become dead and may be deleted. 410231200Smm // Use a WeakVH to observe whether this happens. 411231200Smm WeakVH WeakPH = PN; 412231200Smm 413231200Smm // Delete the old floating point exit comparison. The branch starts using the 414231200Smm // new comparison. 415231200Smm NewCompare->takeName(Compare); 416231200Smm Compare->replaceAllUsesWith(NewCompare); 417231200Smm RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI); 418231200Smm 419231200Smm // Delete the old floating point increment. 420231200Smm Incr->replaceAllUsesWith(UndefValue::get(Incr->getType())); 421231200Smm RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI); 422231200Smm 423231200Smm // If the FP induction variable still has uses, this is because something else 424231200Smm // in the loop uses its value. In order to canonicalize the induction 425231200Smm // variable, we chose to eliminate the IV and rewrite it in terms of an 426231200Smm // int->fp cast. 427231200Smm // 428231200Smm // We give preference to sitofp over uitofp because it is faster on most 429231200Smm // platforms. 430231200Smm if (WeakPH) { 431231200Smm Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv", 432313571Smm PN->getParent()->getFirstInsertionPt()); 433313571Smm PN->replaceAllUsesWith(Conv); 434231200Smm RecursivelyDeleteTriviallyDeadInstructions(PN, TLI); 435231200Smm } 436231200Smm Changed = true; 437231200Smm} 438231200Smm 439231200Smmvoid IndVarSimplify::RewriteNonIntegerIVs(Loop *L) { 440231200Smm // First step. Check to see if there are any floating-point recurrences. 441231200Smm // If there are, change them into integer recurrences, permitting analysis by 442231200Smm // the SCEV routines. 443231200Smm // 444231200Smm BasicBlock *Header = L->getHeader(); 445231200Smm 446231200Smm SmallVector<WeakVH, 8> PHIs; 447231200Smm for (BasicBlock::iterator I = Header->begin(); 448231200Smm PHINode *PN = dyn_cast<PHINode>(I); ++I) 449231200Smm PHIs.push_back(PN); 450231200Smm 451231200Smm for (unsigned i = 0, e = PHIs.size(); i != e; ++i) 452231200Smm if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i])) 453231200Smm HandleFloatingPointIV(L, PN); 454231200Smm 455231200Smm // If the loop previously had floating-point IV, ScalarEvolution 456231200Smm // may not have been able to compute a trip count. Now that we've done some 457231200Smm // re-writing, the trip count may be computable. 458231200Smm if (Changed) 459231200Smm SE->forgetLoop(L); 460231200Smm} 461231200Smm 462231200Smm//===----------------------------------------------------------------------===// 463231200Smm// RewriteLoopExitValues - Optimize IV users outside the loop. 464231200Smm// As a side effect, reduces the amount of IV processing within the loop. 465231200Smm//===----------------------------------------------------------------------===// 466231200Smm 467231200Smm/// RewriteLoopExitValues - Check to see if this loop has a computable 468231200Smm/// loop-invariant execution count. If so, this means that we can compute the 469231200Smm/// final value of any expressions that are recurrent in the loop, and 470231200Smm/// substitute the exit values from the loop into any instructions outside of 471231200Smm/// the loop that use the final values of the current expressions. 472231200Smm/// 473231200Smm/// This is mostly redundant with the regular IndVarSimplify activities that 474231200Smm/// happen later, except that it's more powerful in some cases, because it's 475231200Smm/// able to brute-force evaluate arbitrary instructions as long as they have 476231200Smm/// constant operands at the beginning of the loop. 477231200Smmvoid IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) { 478231200Smm // Verify the input to the pass in already in LCSSA form. 479231200Smm assert(L->isLCSSAForm(*DT)); 480231200Smm 481231200Smm SmallVector<BasicBlock*, 8> ExitBlocks; 482231200Smm L->getUniqueExitBlocks(ExitBlocks); 483231200Smm 484231200Smm // Find all values that are computed inside the loop, but used outside of it. 485231200Smm // Because of LCSSA, these values will only occur in LCSSA PHI Nodes. Scan 486231200Smm // the exit blocks of the loop to find them. 487231200Smm for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { 488238856Smm BasicBlock *ExitBB = ExitBlocks[i]; 489238856Smm 490238856Smm // If there are no PHI nodes in this exit block, then no values defined 491231200Smm // inside the loop are used on this path, skip it. 492238856Smm PHINode *PN = dyn_cast<PHINode>(ExitBB->begin()); 493231200Smm if (!PN) continue; 494231200Smm 495231200Smm unsigned NumPreds = PN->getNumIncomingValues(); 496231200Smm 497231200Smm // Iterate over all of the PHI nodes. 498313571Smm BasicBlock::iterator BBI = ExitBB->begin(); 499231200Smm while ((PN = dyn_cast<PHINode>(BBI++))) { 500313571Smm if (PN->use_empty()) 501313571Smm continue; // dead use, don't replace it 502231200Smm 503313571Smm // SCEV only supports integer expressions for now. 504231200Smm if (!PN->getType()->isIntegerTy() && !PN->getType()->isPointerTy()) 505313571Smm continue; 506313571Smm 507313571Smm // It's necessary to tell ScalarEvolution about this explicitly so that 508313571Smm // it can walk the def-use list and forget all SCEVs, as it may not be 509313571Smm // watching the PHI itself. Once the new exit value is in place, there 510313571Smm // may not be a def-use connection between the loop and every instruction 511313571Smm // which got a SCEVAddRecExpr for that loop. 512231200Smm SE->forgetValue(PN); 513231200Smm 514313571Smm // Iterate over all of the values in all the PHI nodes. 515313571Smm for (unsigned i = 0; i != NumPreds; ++i) { 516313571Smm // If the value being merged in is not integer or is not defined 517313571Smm // in the loop, skip it. 518313571Smm Value *InVal = PN->getIncomingValue(i); 519313571Smm if (!isa<Instruction>(InVal)) 520313571Smm continue; 521313571Smm 522313571Smm // If this pred is for a subloop, not L itself, skip it. 523313571Smm if (LI->getLoopFor(PN->getIncomingBlock(i)) != L) 524313571Smm continue; // The Block is in a subloop, skip it. 525313571Smm 526313571Smm // Check that InVal is defined in the loop. 527313571Smm Instruction *Inst = cast<Instruction>(InVal); 528313571Smm if (!L->contains(Inst)) 529313571Smm continue; 530313571Smm 531313571Smm // Okay, this instruction has a user outside of the current loop 532313571Smm // and varies predictably *inside* the loop. Evaluate the value it 533313571Smm // contains when the loop exits, if possible. 534313571Smm const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop()); 535313571Smm if (!SE->isLoopInvariant(ExitValue, L) || 536313571Smm !isSafeToExpand(ExitValue, *SE)) 537313571Smm continue; 538313571Smm 539313571Smm // Computing the value outside of the loop brings no benefit if : 540313571Smm // - it is definitely used inside the loop in a way which can not be 541313571Smm // optimized away. 542231200Smm // - no use outside of the loop can take advantage of hoisting the 543231200Smm // computation out of the loop 544313571Smm if (ExitValue->getSCEVType()>=scMulExpr) { 545313571Smm unsigned NumHardInternalUses = 0; 546313571Smm unsigned NumSoftExternalUses = 0; 547313571Smm unsigned NumUses = 0; 548313571Smm for (Value::use_iterator IB=Inst->use_begin(), IE=Inst->use_end(); 549313571Smm IB!=IE && NumUses<=6 ; ++IB) { 550313571Smm Instruction *UseInstr = cast<Instruction>(*IB); 551313571Smm unsigned Opc = UseInstr->getOpcode(); 552313571Smm NumUses++; 553313571Smm if (L->contains(UseInstr)) { 554313571Smm if (Opc == Instruction::Call || Opc == Instruction::Ret) 555313571Smm NumHardInternalUses++; 556313571Smm } else { 557313571Smm if (Opc == Instruction::PHI) { 558313571Smm // Do not count the Phi as a use. LCSSA may have inserted 559313571Smm // plenty of trivial ones. 560313571Smm NumUses--; 561313571Smm for (Value::use_iterator PB=UseInstr->use_begin(), 562313571Smm PE=UseInstr->use_end(); 563313571Smm PB!=PE && NumUses<=6 ; ++PB, ++NumUses) { 564313571Smm unsigned PhiOpc = cast<Instruction>(*PB)->getOpcode(); 565313571Smm if (PhiOpc != Instruction::Call && PhiOpc != Instruction::Ret) 566313571Smm NumSoftExternalUses++; 567313571Smm } 568313571Smm continue; 569313571Smm } 570313571Smm if (Opc != Instruction::Call && Opc != Instruction::Ret) 571313571Smm NumSoftExternalUses++; 572313571Smm } 573313571Smm } 574313571Smm if (NumUses <= 6 && NumHardInternalUses && !NumSoftExternalUses) 575313571Smm continue; 576313571Smm } 577313571Smm 578313571Smm Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst); 579313571Smm 580313571Smm DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal << '\n' 581313571Smm << " LoopVal = " << *Inst << "\n"); 582313571Smm 583313571Smm if (!isValidRewrite(Inst, ExitVal)) { 584313571Smm DeadInsts.push_back(ExitVal); 585313571Smm continue; 586313571Smm } 587313571Smm Changed = true; 588313571Smm ++NumReplaced; 589313571Smm 590313571Smm PN->setIncomingValue(i, ExitVal); 591313571Smm 592313571Smm // If this instruction is dead now, delete it. Don't do it now to avoid 593313571Smm // invalidating iterators. 594313571Smm if (isInstructionTriviallyDead(Inst, TLI)) 595313571Smm DeadInsts.push_back(Inst); 596313571Smm 597313571Smm if (NumPreds == 1) { 598368708Smm // Completely replace a single-pred PHI. This is safe, because the 599313571Smm // NewVal won't be variant in the loop, so we don't need an LCSSA phi 600313571Smm // node anymore. 601313571Smm PN->replaceAllUsesWith(ExitVal); 602313571Smm PN->eraseFromParent(); 603313571Smm } 604313571Smm } 605313571Smm if (NumPreds != 1) { 606313571Smm // Clone the PHI and delete the original one. This lets IVUsers and 607313571Smm // any other maps purge the original user from their records. 608313571Smm PHINode *NewPN = cast<PHINode>(PN->clone()); 609313571Smm NewPN->takeName(PN); 610313571Smm NewPN->insertBefore(PN); 611313571Smm PN->replaceAllUsesWith(NewPN); 612313571Smm PN->eraseFromParent(); 613313571Smm } 614313571Smm } 615313571Smm } 616313571Smm 617313571Smm // The insertion point instruction may have been deleted; clear it out 618313571Smm // so that the rewriter doesn't trip over it later. 619313571Smm Rewriter.clearInsertPoint(); 620313571Smm} 621313571Smm 622313571Smm//===----------------------------------------------------------------------===// 623313571Smm// IV Widening - Extend the width of an IV to cover its widest uses. 624313571Smm//===----------------------------------------------------------------------===// 625231200Smm 626313571Smmnamespace { 627313571Smm // Collect information about induction variables that are used by sign/zero 628313571Smm // extend operations. This information is recorded by CollectExtend and 629313571Smm // provides the input to WidenIV. 630231200Smm struct WideIVInfo { 631313571Smm PHINode *NarrowIV; 632313571Smm Type *WidestNativeType; // Widest integer type created [sz]ext 633313571Smm bool IsSigned; // Was an sext user seen before a zext? 634313571Smm 635313571Smm WideIVInfo() : NarrowIV(0), WidestNativeType(0), IsSigned(false) {} 636313571Smm }; 637313571Smm 638313571Smm class WideIVVisitor : public IVVisitor { 639231200Smm ScalarEvolution *SE; 640313571Smm const DataLayout *TD; 641231200Smm 642231200Smm public: 643313571Smm WideIVInfo WI; 644313571Smm 645313571Smm WideIVVisitor(PHINode *NarrowIV, ScalarEvolution *SCEV, 646313571Smm const DataLayout *TData) : 647313571Smm SE(SCEV), TD(TData) { WI.NarrowIV = NarrowIV; } 648313571Smm 649313571Smm // Implement the interface used by simplifyUsersOfIV. 650313571Smm virtual void visitCast(CastInst *Cast); 651313571Smm }; 652313571Smm} 653313571Smm 654231200Smm/// visitCast - Update information about the induction variable that is 655313571Smm/// extended by this sign or zero extend operation. This is used to determine 656313571Smm/// the final width of the IV before actually widening it. 657313571Smmvoid WideIVVisitor::visitCast(CastInst *Cast) { 658313571Smm bool IsSigned = Cast->getOpcode() == Instruction::SExt; 659313571Smm if (!IsSigned && Cast->getOpcode() != Instruction::ZExt) 660313571Smm return; 661313571Smm 662313571Smm Type *Ty = Cast->getType(); 663313571Smm uint64_t Width = SE->getTypeSizeInBits(Ty); 664313571Smm if (TD && !TD->isLegalInteger(Width)) 665313571Smm return; 666313571Smm 667313571Smm if (!WI.WidestNativeType) { 668313571Smm WI.WidestNativeType = SE->getEffectiveSCEVType(Ty); 669313571Smm WI.IsSigned = IsSigned; 670313571Smm return; 671313571Smm } 672313571Smm 673313571Smm // We extend the IV to satisfy the sign of its first user, arbitrarily. 674313571Smm if (WI.IsSigned != IsSigned) 675313571Smm return; 676313571Smm 677313571Smm if (Width > SE->getTypeSizeInBits(WI.WidestNativeType)) 678313571Smm WI.WidestNativeType = SE->getEffectiveSCEVType(Ty); 679313571Smm} 680313571Smm 681231200Smmnamespace { 682231200Smm 683313571Smm/// NarrowIVDefUse - Record a link in the Narrow IV def-use chain along with the 684313571Smm/// WideIV that computes the same value as the Narrow IV def. This avoids 685313571Smm/// caching Use* pointers. 686313571Smmstruct NarrowIVDefUse { 687313571Smm Instruction *NarrowDef; 688313571Smm Instruction *NarrowUse; 689313571Smm Instruction *WideDef; 690313571Smm 691313571Smm NarrowIVDefUse(): NarrowDef(0), NarrowUse(0), WideDef(0) {} 692313571Smm 693313571Smm NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD): 694313571Smm NarrowDef(ND), NarrowUse(NU), WideDef(WD) {} 695313571Smm}; 696231200Smm 697313571Smm/// WidenIV - The goal of this transform is to remove sign and zero extends 698313571Smm/// without creating any new induction variables. To do this, it creates a new 699313571Smm/// phi of the wider type and redirects all users, either removing extends or 700313571Smm/// inserting truncs whenever we stop propagating the type. 701238856Smm/// 702313571Smmclass WidenIV { 703231200Smm // Parameters 704313571Smm PHINode *OrigPhi; 705313571Smm Type *WideType; 706313571Smm bool IsSigned; 707313571Smm 708231200Smm // Context 709313571Smm LoopInfo *LI; 710313571Smm Loop *L; 711313571Smm ScalarEvolution *SE; 712231200Smm DominatorTree *DT; 713313571Smm 714313571Smm // Result 715313571Smm PHINode *WidePhi; 716231200Smm Instruction *WideInc; 717231200Smm const SCEV *WideIncExpr; 718231200Smm SmallVectorImpl<WeakVH> &DeadInsts; 719231200Smm 720313571Smm SmallPtrSet<Instruction*,16> Widened; 721313571Smm SmallVector<NarrowIVDefUse, 8> NarrowIVUsers; 722313571Smm 723313571Smmpublic: 724313571Smm WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, 725313571Smm ScalarEvolution *SEv, DominatorTree *DTree, 726313571Smm SmallVectorImpl<WeakVH> &DI) : 727313571Smm OrigPhi(WI.NarrowIV), 728313571Smm WideType(WI.WidestNativeType), 729313571Smm IsSigned(WI.IsSigned), 730313571Smm LI(LInfo), 731313571Smm L(LI->getLoopFor(OrigPhi->getParent())), 732313571Smm SE(SEv), 733313571Smm DT(DTree), 734231200Smm WidePhi(0), 735231200Smm WideInc(0), 736231200Smm WideIncExpr(0), 737313571Smm DeadInsts(DI) { 738313571Smm assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV"); 739313571Smm } 740313571Smm 741313571Smm PHINode *CreateWideIV(SCEVExpander &Rewriter); 742313571Smm 743313571Smmprotected: 744313571Smm Value *getExtend(Value *NarrowOper, Type *WideType, bool IsSigned, 745313571Smm Instruction *Use); 746313571Smm 747313571Smm Instruction *CloneIVUser(NarrowIVDefUse DU); 748313571Smm 749313571Smm const SCEVAddRecExpr *GetWideRecurrence(Instruction *NarrowUse); 750231200Smm 751231200Smm const SCEVAddRecExpr* GetExtendedOperandRecurrence(NarrowIVDefUse DU); 752313571Smm 753313571Smm Instruction *WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter); 754313571Smm 755313571Smm void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef); 756313571Smm}; 757313571Smm} // anonymous namespace 758313571Smm 759313571Smm/// isLoopInvariant - Perform a quick domtree based check for loop invariance 760313571Smm/// assuming that V is used within the loop. LoopInfo::isLoopInvariant() seems 761313571Smm/// gratuitous for this purpose. 762313571Smmstatic bool isLoopInvariant(Value *V, const Loop *L, const DominatorTree *DT) { 763313571Smm Instruction *Inst = dyn_cast<Instruction>(V); 764231200Smm if (!Inst) 765231200Smm return true; 766231200Smm 767231200Smm return DT->properlyDominates(Inst->getParent(), L->getHeader()); 768231200Smm} 769231200Smm 770231200SmmValue *WidenIV::getExtend(Value *NarrowOper, Type *WideType, bool IsSigned, 771231200Smm Instruction *Use) { 772231200Smm // Set the debug location and conservative insertion point. 773231200Smm IRBuilder<> Builder(Use); 774231200Smm // Hoist the insertion point into loop preheaders as far as possible. 775231200Smm for (const Loop *L = LI->getLoopFor(Use->getParent()); 776231200Smm L && L->getLoopPreheader() && isLoopInvariant(NarrowOper, L, DT); 777313571Smm L = L->getParentLoop()) 778313571Smm Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator()); 779231200Smm 780313927Smm return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) : 781313927Smm Builder.CreateZExt(NarrowOper, WideType); 782231200Smm} 783231200Smm 784231200Smm/// CloneIVUser - Instantiate a wide operation to replace a narrow 785231200Smm/// operation. This only needs to handle operations that can evaluation to 786231200Smm/// SCEVAddRec. It can safely return 0 for any operation we decide not to clone. 787231200SmmInstruction *WidenIV::CloneIVUser(NarrowIVDefUse DU) { 788231200Smm unsigned Opcode = DU.NarrowUse->getOpcode(); 789231200Smm switch (Opcode) { 790313571Smm default: 791313571Smm return 0; 792313571Smm case Instruction::Add: 793313571Smm case Instruction::Mul: 794231200Smm case Instruction::UDiv: 795231200Smm case Instruction::Sub: 796231200Smm case Instruction::And: 797231200Smm case Instruction::Or: 798231200Smm case Instruction::Xor: 799231200Smm case Instruction::Shl: 800231200Smm case Instruction::LShr: 801313571Smm case Instruction::AShr: 802313571Smm DEBUG(dbgs() << "Cloning IVUser: " << *DU.NarrowUse << "\n"); 803313571Smm 804313571Smm // Replace NarrowDef operands with WideDef. Otherwise, we don't know 805231200Smm // anything about the narrow operand yet so must insert a [sz]ext. It is 806231200Smm // probably loop invariant and will be folded or hoisted. If it actually 807231200Smm // comes from a widened IV, it should be removed during a future call to 808231200Smm // WidenIVUse. 809231200Smm Value *LHS = (DU.NarrowUse->getOperand(0) == DU.NarrowDef) ? DU.WideDef : 810231200Smm getExtend(DU.NarrowUse->getOperand(0), WideType, IsSigned, DU.NarrowUse); 811231200Smm Value *RHS = (DU.NarrowUse->getOperand(1) == DU.NarrowDef) ? DU.WideDef : 812231200Smm getExtend(DU.NarrowUse->getOperand(1), WideType, IsSigned, DU.NarrowUse); 813231200Smm 814231200Smm BinaryOperator *NarrowBO = cast<BinaryOperator>(DU.NarrowUse); 815231200Smm BinaryOperator *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), 816231200Smm LHS, RHS, 817231200Smm NarrowBO->getName()); 818231200Smm IRBuilder<> Builder(DU.NarrowUse); 819313571Smm Builder.Insert(WideBO); 820313571Smm if (const OverflowingBinaryOperator *OBO = 821313571Smm dyn_cast<OverflowingBinaryOperator>(NarrowBO)) { 822313571Smm if (OBO->hasNoUnsignedWrap()) WideBO->setHasNoUnsignedWrap(); 823313571Smm if (OBO->hasNoSignedWrap()) WideBO->setHasNoSignedWrap(); 824231200Smm } 825231200Smm return WideBO; 826231200Smm } 827313571Smm} 828313571Smm 829313571Smm/// No-wrap operations can transfer sign extension of their result to their 830313571Smm/// operands. Generate the SCEV value for the widened operation without 831313571Smm/// actually modifying the IR yet. If the expression after extending the 832313571Smm/// operands is an AddRec for this loop, return it. 833313571Smmconst SCEVAddRecExpr* WidenIV::GetExtendedOperandRecurrence(NarrowIVDefUse DU) { 834313571Smm // Handle the common case of add<nsw/nuw> 835313571Smm if (DU.NarrowUse->getOpcode() != Instruction::Add) 836313571Smm return 0; 837313571Smm 838313571Smm // One operand (NarrowDef) has already been extended to WideDef. Now determine 839313571Smm // if extending the other will lead to a recurrence. 840313571Smm unsigned ExtendOperIdx = DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0; 841313571Smm assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU"); 842313571Smm 843313571Smm const SCEV *ExtendOperExpr = 0; 844313571Smm const OverflowingBinaryOperator *OBO = 845313571Smm cast<OverflowingBinaryOperator>(DU.NarrowUse); 846313571Smm if (IsSigned && OBO->hasNoSignedWrap()) 847313571Smm ExtendOperExpr = SE->getSignExtendExpr( 848313571Smm SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType); 849313571Smm else if(!IsSigned && OBO->hasNoUnsignedWrap()) 850313571Smm ExtendOperExpr = SE->getZeroExtendExpr( 851313927Smm SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType); 852313927Smm else 853313927Smm return 0; 854313927Smm 855313927Smm // When creating this AddExpr, don't apply the current operations NSW or NUW 856313927Smm // flags. This instruction may be guarded by control flow that the no-wrap 857313927Smm // behavior depends on. Non-control-equivalent instructions can be mapped to 858313571Smm // the same SCEV expression, and it would be incorrect to transfer NSW/NUW 859313927Smm // semantics to those operations. 860313927Smm const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>( 861313927Smm SE->getAddExpr(SE->getSCEV(DU.WideDef), ExtendOperExpr)); 862313927Smm 863313927Smm if (!AddRec || AddRec->getLoop() != L) 864313927Smm return 0; 865313571Smm return AddRec; 866313571Smm} 867313571Smm 868313571Smm/// GetWideRecurrence - Is this instruction potentially interesting from 869313571Smm/// IVUsers' perspective after widening it's type? In other words, can the 870313571Smm/// extend be safely hoisted out of the loop with SCEV reducing the value to a 871313571Smm/// recurrence on the same loop. If so, return the sign or zero extended 872313571Smm/// recurrence. Otherwise return NULL. 873313571Smmconst SCEVAddRecExpr *WidenIV::GetWideRecurrence(Instruction *NarrowUse) { 874313571Smm if (!SE->isSCEVable(NarrowUse->getType())) 875313571Smm return 0; 876313571Smm 877313571Smm const SCEV *NarrowExpr = SE->getSCEV(NarrowUse); 878313571Smm if (SE->getTypeSizeInBits(NarrowExpr->getType()) 879313571Smm >= SE->getTypeSizeInBits(WideType)) { 880313571Smm // NarrowUse implicitly widens its operand. e.g. a gep with a narrow 881313571Smm // index. So don't follow this use. 882231200Smm return 0; 883231200Smm } 884231200Smm 885231200Smm const SCEV *WideExpr = IsSigned ? 886231200Smm SE->getSignExtendExpr(NarrowExpr, WideType) : 887231200Smm SE->getZeroExtendExpr(NarrowExpr, WideType); 888231200Smm const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr); 889231200Smm if (!AddRec || AddRec->getLoop() != L) 890313571Smm return 0; 891313571Smm return AddRec; 892313571Smm} 893313571Smm 894313571Smm/// WidenIVUse - Determine whether an individual user of the narrow IV can be 895313571Smm/// widened. If so, return the wide clone of the user. 896231200SmmInstruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) { 897231200Smm 898231200Smm // Stop traversing the def-use chain at inner-loop phis or post-loop phis. 899313571Smm if (isa<PHINode>(DU.NarrowUse) && 900313571Smm LI->getLoopFor(DU.NarrowUse->getParent()) != L) 901231200Smm return 0; 902231200Smm 903231200Smm // Our raison d'etre! Eliminate sign and zero extension. 904231200Smm if (IsSigned ? isa<SExtInst>(DU.NarrowUse) : isa<ZExtInst>(DU.NarrowUse)) { 905313571Smm Value *NewDef = DU.WideDef; 906313571Smm if (DU.NarrowUse->getType() != WideType) { 907231200Smm unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType()); 908313571Smm unsigned IVWidth = SE->getTypeSizeInBits(WideType); 909231200Smm if (CastWidth < IVWidth) { 910313571Smm // The cast isn't as wide as the IV, so insert a Trunc. 911313571Smm IRBuilder<> Builder(DU.NarrowUse); 912313571Smm NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType()); 913231200Smm } 914313571Smm else { 915313571Smm // A wider extend was hidden behind a narrower one. This may induce 916231200Smm // another round of IV widening in which the intermediate IV becomes 917313571Smm // dead. It should be very rare. 918231200Smm DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi 919313571Smm << " not wide enough to subsume " << *DU.NarrowUse << "\n"); 920313571Smm DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef); 921313571Smm NewDef = DU.NarrowUse; 922313571Smm } 923313571Smm } 924313571Smm if (NewDef != DU.NarrowUse) { 925313571Smm DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse 926313571Smm << " replaced by " << *DU.WideDef << "\n"); 927231200Smm ++NumElimExt; 928313571Smm DU.NarrowUse->replaceAllUsesWith(NewDef); 929313571Smm DeadInsts.push_back(DU.NarrowUse); 930313571Smm } 931313571Smm // Now that the extend is gone, we want to expose it's uses for potential 932313571Smm // further simplification. We don't need to directly inform SimplifyIVUsers 933313571Smm // of the new users, because their parent IV will be processed later as a 934231200Smm // new loop phi. If we preserved IVUsers analysis, we would also want to 935313571Smm // push the uses of WideDef here. 936313571Smm 937313571Smm // No further widening is needed. The deceased [sz]ext had done it for us. 938313571Smm return 0; 939231200Smm } 940313571Smm 941313571Smm // Does this user itself evaluate to a recurrence after widening? 942313571Smm const SCEVAddRecExpr *WideAddRec = GetWideRecurrence(DU.NarrowUse); 943231200Smm if (!WideAddRec) { 944313571Smm WideAddRec = GetExtendedOperandRecurrence(DU); 945313571Smm } 946313571Smm if (!WideAddRec) { 947231200Smm // This user does not evaluate to a recurence after widening, so don't 948231200Smm // follow it. Instead insert a Trunc to kill off the original use, 949231200Smm // eventually isolating the original narrow IV so it can be removed. 950231200Smm IRBuilder<> Builder(getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT)); 951313571Smm Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType()); 952313571Smm DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc); 953313571Smm return 0; 954313571Smm } 955313571Smm // Assume block terminators cannot evaluate to a recurrence. We can't to 956313571Smm // insert a Trunc after a terminator if there happens to be a critical edge. 957313571Smm assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() && 958313571Smm "SCEV is not expected to evaluate a block terminator"); 959313571Smm 960313571Smm // Reuse the IV increment that SCEVExpander created as long as it dominates 961313571Smm // NarrowUse. 962313571Smm Instruction *WideUse = 0; 963313571Smm if (WideAddRec == WideIncExpr 964313571Smm && Rewriter.hoistIVInc(WideInc, DU.NarrowUse)) 965231200Smm WideUse = WideInc; 966231200Smm else { 967231200Smm WideUse = CloneIVUser(DU); 968313571Smm if (!WideUse) 969368708Smm return 0; 970313571Smm } 971313571Smm // Evaluation of WideAddRec ensured that the narrow expression could be 972313571Smm // extended outside the loop without overflow. This suggests that the wide use 973313571Smm // evaluates to the same expression as the extended narrow use, but doesn't 974313571Smm // absolutely guarantee it. Hence the following failsafe check. In rare cases 975313571Smm // where it fails, we simply throw away the newly created wide use. 976313571Smm if (WideAddRec != SE->getSCEV(WideUse)) { 977313571Smm DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse 978313571Smm << ": " << *SE->getSCEV(WideUse) << " != " << *WideAddRec << "\n"); 979231200Smm DeadInsts.push_back(WideUse); 980313571Smm return 0; 981313571Smm } 982313571Smm 983231200Smm // Returning WideUse pushes it on the worklist. 984231200Smm return WideUse; 985313571Smm} 986313571Smm 987313571Smm/// pushNarrowIVUsers - Add eligible users of NarrowDef to NarrowIVUsers. 988313571Smm/// 989313571Smmvoid WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) { 990313571Smm for (Value::use_iterator UI = NarrowDef->use_begin(), 991313571Smm UE = NarrowDef->use_end(); UI != UE; ++UI) { 992313571Smm Instruction *NarrowUse = cast<Instruction>(*UI); 993313571Smm 994313571Smm // Handle data flow merges and bizarre phi cycles. 995313571Smm if (!Widened.insert(NarrowUse)) 996313571Smm continue; 997231200Smm 998231200Smm NarrowIVUsers.push_back(NarrowIVDefUse(NarrowDef, NarrowUse, WideDef)); 999231200Smm } 1000231200Smm} 1001231200Smm 1002231200Smm/// CreateWideIV - Process a single induction variable. First use the 1003231200Smm/// SCEVExpander to create a wide induction variable that evaluates to the same 1004231200Smm/// recurrence as the original narrow IV. Then use a worklist to forward 1005231200Smm/// traverse the narrow IV's def-use chain. After WidenIVUse has processed all 1006231200Smm/// interesting IV users, the narrow IV will be isolated for removal by 1007231200Smm/// DeleteDeadPHIs. 1008231200Smm/// 1009231200Smm/// It would be simpler to delete uses as they are processed, but we must avoid 1010313571Smm/// invalidating SCEV expressions. 1011313571Smm/// 1012231200SmmPHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) { 1013313927Smm // Is this phi an induction variable? 1014313927Smm const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi)); 1015231200Smm if (!AddRec) 1016231200Smm return NULL; 1017231200Smm 1018231200Smm // Widen the induction variable expression. 1019231200Smm const SCEV *WideIVExpr = IsSigned ? 1020231200Smm SE->getSignExtendExpr(AddRec, WideType) : 1021231200Smm SE->getZeroExtendExpr(AddRec, WideType); 1022231200Smm 1023313571Smm assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType && 1024313571Smm "Expect the new IV expression to preserve its type"); 1025313571Smm 1026313571Smm // Can the IV be extended outside the loop without overflow? 1027231200Smm AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr); 1028231200Smm if (!AddRec || AddRec->getLoop() != L) 1029231200Smm return NULL; 1030231200Smm 1031231200Smm // An AddRec must have loop-invariant operands. Since this AddRec is 1032231200Smm // materialized by a loop header phi, the expression cannot have any post-loop 1033231200Smm // operands, so they must dominate the loop header. 1034313571Smm assert(SE->properlyDominates(AddRec->getStart(), L->getHeader()) && 1035313571Smm SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) 1036313571Smm && "Loop header phi recurrence inputs do not dominate the loop"); 1037313571Smm 1038231200Smm // The rewriter provides a value for the desired IV expression. This may 1039231200Smm // either find an existing phi or materialize a new one. Either way, we 1040231200Smm // expect a well-formed cyclic phi-with-increments. i.e. any operand not part 1041231200Smm // of the phi-SCC dominates the loop entry. 1042231200Smm Instruction *InsertPt = L->getHeader()->begin(); 1043231200Smm WidePhi = cast<PHINode>(Rewriter.expandCodeFor(AddRec, WideType, InsertPt)); 1044231200Smm 1045231200Smm // Remembering the WideIV increment generated by SCEVExpander allows 1046231200Smm // WidenIVUse to reuse it when widening the narrow IV's increment. We don't 1047231200Smm // employ a general reuse mechanism because the call above is the only call to 1048231200Smm // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses. 1049231200Smm if (BasicBlock *LatchBlock = L->getLoopLatch()) { 1050231200Smm WideInc = 1051231200Smm cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock)); 1052313571Smm WideIncExpr = SE->getSCEV(WideInc); 1053313571Smm } 1054313571Smm 1055313571Smm DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n"); 1056313571Smm ++NumWidened; 1057231200Smm 1058231200Smm // Traverse the def-use chain using a worklist starting at the original IV. 1059231200Smm assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" ); 1060313571Smm 1061313571Smm Widened.insert(OrigPhi); 1062313571Smm pushNarrowIVUsers(OrigPhi, WidePhi); 1063313571Smm 1064313571Smm while (!NarrowIVUsers.empty()) { 1065313571Smm NarrowIVDefUse DU = NarrowIVUsers.pop_back_val(); 1066313571Smm 1067313571Smm // Process a def-use edge. This may replace the use, so don't hold a 1068313571Smm // use_iterator across it. 1069313571Smm Instruction *WideUse = WidenIVUse(DU, Rewriter); 1070313571Smm 1071313571Smm // Follow all def-use edges from the previous narrow use. 1072313571Smm if (WideUse) 1073313571Smm pushNarrowIVUsers(DU.NarrowUse, WideUse); 1074313571Smm 1075313571Smm // WidenIVUse may have removed the def-use edge. 1076313571Smm if (DU.NarrowDef->use_empty()) 1077313571Smm DeadInsts.push_back(DU.NarrowDef); 1078313571Smm } 1079313571Smm return WidePhi; 1080313571Smm} 1081313571Smm 1082313571Smm//===----------------------------------------------------------------------===// 1083313571Smm// Simplification of IV users based on SCEV evaluation. 1084313927Smm//===----------------------------------------------------------------------===// 1085313927Smm 1086313927Smm 1087313927Smm/// SimplifyAndExtend - Iteratively perform simplification on a worklist of IV 1088313927Smm/// users. Each successive simplification may push more users which may 1089313927Smm/// themselves be candidates for simplification. 1090313927Smm/// 1091313571Smm/// Sign/Zero extend elimination is interleaved with IV simplification. 1092313927Smm/// 1093313927Smmvoid IndVarSimplify::SimplifyAndExtend(Loop *L, 1094313927Smm SCEVExpander &Rewriter, 1095313927Smm LPPassManager &LPM) { 1096313927Smm SmallVector<WideIVInfo, 8> WideIVs; 1097313927Smm 1098313571Smm SmallVector<PHINode*, 8> LoopPhis; 1099313571Smm for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { 1100313571Smm LoopPhis.push_back(cast<PHINode>(I)); 1101313571Smm } 1102313571Smm // Each round of simplification iterates through the SimplifyIVUsers worklist 1103313571Smm // for all current phis, then determines whether any IVs can be 1104313571Smm // widened. Widening adds new phis to LoopPhis, inducing another round of 1105313571Smm // simplification on the wide IVs. 1106313571Smm while (!LoopPhis.empty()) { 1107313571Smm // Evaluate as many IV expressions as possible before widening any IVs. This 1108313571Smm // forces SCEV to set no-wrap flags before evaluating sign/zero 1109313571Smm // extension. The first time SCEV attempts to normalize sign/zero extension, 1110313571Smm // the result becomes final. So for the most predictable results, we delay 1111313571Smm // evaluation of sign/zero extend evaluation until needed, and avoid running 1112313571Smm // other SCEV based analysis prior to SimplifyAndExtend. 1113231200Smm do { 1114231200Smm PHINode *CurrIV = LoopPhis.pop_back_val(); 1115231200Smm 1116231200Smm // Information about sign/zero extensions of CurrIV. 1117231200Smm WideIVVisitor WIV(CurrIV, SE, TD); 1118231200Smm 1119231200Smm Changed |= simplifyUsersOfIV(CurrIV, SE, &LPM, DeadInsts, &WIV); 1120231200Smm 1121231200Smm if (WIV.WI.WidestNativeType) { 1122313571Smm WideIVs.push_back(WIV.WI); 1123313571Smm } 1124313571Smm } while(!LoopPhis.empty()); 1125313571Smm 1126313571Smm for (; !WideIVs.empty(); WideIVs.pop_back()) { 1127313571Smm WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts); 1128313571Smm if (PHINode *WidePhi = Widener.CreateWideIV(Rewriter)) { 1129313571Smm Changed = true; 1130313571Smm LoopPhis.push_back(WidePhi); 1131231200Smm } 1132231200Smm } 1133313571Smm } 1134313571Smm} 1135231200Smm 1136231200Smm//===----------------------------------------------------------------------===// 1137231200Smm// LinearFunctionTestReplace and its kin. Rewrite the loop exit condition. 1138231200Smm//===----------------------------------------------------------------------===// 1139313571Smm 1140231200Smm/// Check for expressions that ScalarEvolution generates to compute 1141313571Smm/// BackedgeTakenInfo. If these expressions have not been reduced, then 1142313571Smm/// expanding them may incur additional cost (albeit in the loop preheader). 1143313571Smmstatic bool isHighCostExpansion(const SCEV *S, BranchInst *BI, 1144313571Smm SmallPtrSet<const SCEV*, 8> &Processed, 1145313571Smm ScalarEvolution *SE) { 1146231200Smm if (!Processed.insert(S)) 1147231200Smm return false; 1148313571Smm 1149313571Smm // If the backedge-taken count is a UDiv, it's very likely a UDiv that 1150313571Smm // ScalarEvolution's HowFarToZero or HowManyLessThans produced to compute a 1151313571Smm // precise expression, rather than a UDiv from the user's code. If we can't 1152313571Smm // find a UDiv in the code with some simple searching, assume the former and 1153313571Smm // forego rewriting the loop. 1154328828Smm if (isa<SCEVUDivExpr>(S)) { 1155313571Smm ICmpInst *OrigCond = dyn_cast<ICmpInst>(BI->getCondition()); 1156313571Smm if (!OrigCond) return true; 1157313571Smm const SCEV *R = SE->getSCEV(OrigCond->getOperand(1)); 1158313571Smm R = SE->getMinusSCEV(R, SE->getConstant(R->getType(), 1)); 1159313571Smm if (R != S) { 1160313571Smm const SCEV *L = SE->getSCEV(OrigCond->getOperand(0)); 1161313571Smm L = SE->getMinusSCEV(L, SE->getConstant(L->getType(), 1)); 1162313571Smm if (L != S) 1163313571Smm return true; 1164313571Smm } 1165313571Smm } 1166313571Smm 1167231200Smm // Recurse past add expressions, which commonly occur in the 1168231200Smm // BackedgeTakenCount. They may already exist in program code, and if not, 1169231200Smm // they are not too expensive rematerialize. 1170231200Smm if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { 1171231200Smm for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end(); 1172231200Smm I != E; ++I) { 1173231200Smm if (isHighCostExpansion(*I, BI, Processed, SE)) 1174231200Smm return true; 1175313571Smm } 1176231200Smm return false; 1177231200Smm } 1178231200Smm 1179231200Smm // HowManyLessThans uses a Max expression whenever the loop is not guarded by 1180231200Smm // the exit condition. 1181231200Smm if (isa<SCEVSMaxExpr>(S) || isa<SCEVUMaxExpr>(S)) 1182231200Smm return true; 1183313571Smm 1184231200Smm // If we haven't recognized an expensive SCEV pattern, assume it's an 1185231200Smm // expression produced by program code. 1186313571Smm return false; 1187313571Smm} 1188313571Smm 1189313571Smm/// canExpandBackedgeTakenCount - Return true if this loop's backedge taken 1190313571Smm/// count expression can be safely and cheaply expanded into an instruction 1191313571Smm/// sequence that can be used by LinearFunctionTestReplace. 1192313571Smm/// 1193231200Smm/// TODO: This fails for pointer-type loop counters with greater than one byte 1194313571Smm/// strides, consequently preventing LFTR from running. For the purpose of LFTR 1195313571Smm/// we could skip this check in the case that the LFTR loop counter (chosen by 1196231200Smm/// FindLoopCounter) is also pointer type. Instead, we could directly convert 1197313571Smm/// the loop test to an inequality test by checking the target data's alignment 1198313571Smm/// of element types (given that the initial pointer value originates from or is 1199313571Smm/// used by ABI constrained operation, as opposed to inttoptr/ptrtoint). 1200313571Smm/// However, we don't yet have a strong motivation for converting loop tests 1201313571Smm/// into inequality tests. 1202313571Smmstatic bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE) { 1203313571Smm const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L); 1204313571Smm if (isa<SCEVCouldNotCompute>(BackedgeTakenCount) || 1205313571Smm BackedgeTakenCount->isZero()) 1206313571Smm return false; 1207313571Smm 1208313571Smm if (!L->getExitingBlock()) 1209313571Smm return false; 1210313571Smm 1211313571Smm // Can't rewrite non-branch yet. 1212313571Smm BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator()); 1213313571Smm if (!BI) 1214313571Smm return false; 1215313571Smm 1216313571Smm SmallPtrSet<const SCEV*, 8> Processed; 1217313571Smm if (isHighCostExpansion(BackedgeTakenCount, BI, Processed, SE)) 1218231200Smm return false; 1219313571Smm 1220313571Smm return true; 1221313571Smm} 1222313571Smm 1223313571Smm/// getLoopPhiForCounter - Return the loop header phi IFF IncV adds a loop 1224313571Smm/// invariant value to the phi. 1225313571Smmstatic PHINode *getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT) { 1226313571Smm Instruction *IncI = dyn_cast<Instruction>(IncV); 1227313571Smm if (!IncI) 1228313571Smm return 0; 1229313571Smm 1230313571Smm switch (IncI->getOpcode()) { 1231313571Smm case Instruction::Add: 1232313571Smm case Instruction::Sub: 1233313571Smm break; 1234313571Smm case Instruction::GetElementPtr: 1235313571Smm // An IV counter must preserve its type. 1236313571Smm if (IncI->getNumOperands() == 2) 1237313571Smm break; 1238313571Smm default: 1239313571Smm return 0; 1240313571Smm } 1241313571Smm 1242313571Smm PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0)); 1243313571Smm if (Phi && Phi->getParent() == L->getHeader()) { 1244313571Smm if (isLoopInvariant(IncI->getOperand(1), L, DT)) 1245313571Smm return Phi; 1246313571Smm return 0; 1247313571Smm } 1248313571Smm if (IncI->getOpcode() == Instruction::GetElementPtr) 1249313571Smm return 0; 1250313571Smm 1251313571Smm // Allow add/sub to be commuted. 1252313571Smm Phi = dyn_cast<PHINode>(IncI->getOperand(1)); 1253313571Smm if (Phi && Phi->getParent() == L->getHeader()) { 1254313571Smm if (isLoopInvariant(IncI->getOperand(0), L, DT)) 1255313571Smm return Phi; 1256313571Smm } 1257313571Smm return 0; 1258313571Smm} 1259313571Smm 1260313571Smm/// Return the compare guarding the loop latch, or NULL for unrecognized tests. 1261313571Smmstatic ICmpInst *getLoopTest(Loop *L) { 1262313571Smm assert(L->getExitingBlock() && "expected loop exit"); 1263313571Smm 1264313571Smm BasicBlock *LatchBlock = L->getLoopLatch(); 1265313571Smm // Don't bother with LFTR if the loop is not properly simplified. 1266313571Smm if (!LatchBlock) 1267313571Smm return 0; 1268313571Smm 1269313571Smm BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator()); 1270313571Smm assert(BI && "expected exit branch"); 1271313571Smm 1272313571Smm return dyn_cast<ICmpInst>(BI->getCondition()); 1273313571Smm} 1274313571Smm 1275313571Smm/// needsLFTR - LinearFunctionTestReplace policy. Return true unless we can show 1276313571Smm/// that the current exit test is already sufficiently canonical. 1277313571Smmstatic bool needsLFTR(Loop *L, DominatorTree *DT) { 1278313571Smm // Do LFTR to simplify the exit condition to an ICMP. 1279313571Smm ICmpInst *Cond = getLoopTest(L); 1280313571Smm if (!Cond) 1281313571Smm return true; 1282313571Smm 1283313571Smm // Do LFTR to simplify the exit ICMP to EQ/NE 1284313571Smm ICmpInst::Predicate Pred = Cond->getPredicate(); 1285313571Smm if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ) 1286313571Smm return true; 1287313571Smm 1288313571Smm // Look for a loop invariant RHS 1289313571Smm Value *LHS = Cond->getOperand(0); 1290313571Smm Value *RHS = Cond->getOperand(1); 1291313571Smm if (!isLoopInvariant(RHS, L, DT)) { 1292313571Smm if (!isLoopInvariant(LHS, L, DT)) 1293313571Smm return true; 1294313571Smm std::swap(LHS, RHS); 1295313571Smm } 1296313571Smm // Look for a simple IV counter LHS 1297313571Smm PHINode *Phi = dyn_cast<PHINode>(LHS); 1298313571Smm if (!Phi) 1299313571Smm Phi = getLoopPhiForCounter(LHS, L, DT); 1300313571Smm 1301313571Smm if (!Phi) 1302313571Smm return true; 1303313571Smm 1304313571Smm // Do LFTR if PHI node is defined in the loop, but is *not* a counter. 1305313571Smm int Idx = Phi->getBasicBlockIndex(L->getLoopLatch()); 1306313571Smm if (Idx < 0) 1307313571Smm return true; 1308313571Smm 1309313571Smm // Do LFTR if the exit condition's IV is *not* a simple counter. 1310313571Smm Value *IncV = Phi->getIncomingValue(Idx); 1311313571Smm return Phi != getLoopPhiForCounter(IncV, L, DT); 1312313571Smm} 1313313571Smm 1314313571Smm/// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils 1315313571Smm/// down to checking that all operands are constant and listing instructions 1316313571Smm/// that may hide undef. 1317313571Smmstatic bool hasConcreteDefImpl(Value *V, SmallPtrSet<Value*, 8> &Visited, 1318313571Smm unsigned Depth) { 1319313571Smm if (isa<Constant>(V)) 1320313571Smm return !isa<UndefValue>(V); 1321313571Smm 1322313571Smm if (Depth >= 6) 1323313571Smm return false; 1324313571Smm 1325313571Smm // Conservatively handle non-constant non-instructions. For example, Arguments 1326313571Smm // may be undef. 1327313571Smm Instruction *I = dyn_cast<Instruction>(V); 1328313571Smm if (!I) 1329313571Smm return false; 1330313571Smm 1331313571Smm // Load and return values may be undef. 1332313571Smm if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I)) 1333313571Smm return false; 1334231200Smm 1335313571Smm // Optimistically handle other instructions. 1336231200Smm for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI) { 1337313571Smm if (!Visited.insert(*OI)) 1338231200Smm continue; 1339313571Smm if (!hasConcreteDefImpl(*OI, Visited, Depth+1)) 1340313571Smm return false; 1341313571Smm } 1342313571Smm return true; 1343313571Smm} 1344313571Smm 1345313571Smm/// Return true if the given value is concrete. We must prove that undef can 1346313571Smm/// never reach it. 1347313571Smm/// 1348313571Smm/// TODO: If we decide that this is a good approach to checking for undef, we 1349313571Smm/// may factor it into a common location. 1350313571Smmstatic bool hasConcreteDef(Value *V) { 1351313571Smm SmallPtrSet<Value*, 8> Visited; 1352313571Smm Visited.insert(V); 1353313571Smm return hasConcreteDefImpl(V, Visited, 0); 1354313571Smm} 1355313571Smm 1356313571Smm/// AlmostDeadIV - Return true if this IV has any uses other than the (soon to 1357313571Smm/// be rewritten) loop exit test. 1358313571Smmstatic bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) { 1359313571Smm int LatchIdx = Phi->getBasicBlockIndex(LatchBlock); 1360313571Smm Value *IncV = Phi->getIncomingValue(LatchIdx); 1361313571Smm 1362313571Smm for (Value::use_iterator UI = Phi->use_begin(), UE = Phi->use_end(); 1363313571Smm UI != UE; ++UI) { 1364313571Smm if (*UI != Cond && *UI != IncV) return false; 1365313571Smm } 1366313571Smm 1367313571Smm for (Value::use_iterator UI = IncV->use_begin(), UE = IncV->use_end(); 1368313571Smm UI != UE; ++UI) { 1369313571Smm if (*UI != Cond && *UI != Phi) return false; 1370313571Smm } 1371313571Smm return true; 1372313571Smm} 1373231200Smm 1374313571Smm/// FindLoopCounter - Find an affine IV in canonical form. 1375231200Smm/// 1376313571Smm/// BECount may be an i8* pointer type. The pointer difference is already 1377313571Smm/// valid count without scaling the address stride, so it remains a pointer 1378313571Smm/// expression as far as SCEV is concerned. 1379313571Smm/// 1380313571Smm/// Currently only valid for LFTR. See the comments on hasConcreteDef below. 1381231200Smm/// 1382313571Smm/// FIXME: Accept -1 stride and set IVLimit = IVInit - BECount 1383313571Smm/// 1384313571Smm/// FIXME: Accept non-unit stride as long as SCEV can reduce BECount * Stride. 1385313571Smm/// This is difficult in general for SCEV because of potential overflow. But we 1386313571Smm/// could at least handle constant BECounts. 1387231200Smmstatic PHINode * 1388231200SmmFindLoopCounter(Loop *L, const SCEV *BECount, 1389231200Smm ScalarEvolution *SE, DominatorTree *DT, const DataLayout *TD) { 1390231200Smm uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType()); 1391231200Smm 1392231200Smm Value *Cond = 1393231200Smm cast<BranchInst>(L->getExitingBlock()->getTerminator())->getCondition(); 1394231200Smm 1395231200Smm // Loop over all of the PHI nodes, looking for a simple counter. 1396231200Smm PHINode *BestPhi = 0; 1397231200Smm const SCEV *BestInit = 0; 1398231200Smm BasicBlock *LatchBlock = L->getLoopLatch(); 1399231200Smm assert(LatchBlock && "needsLFTR should guarantee a loop latch"); 1400231200Smm 1401368708Smm for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { 1402231200Smm PHINode *Phi = cast<PHINode>(I); 1403231200Smm if (!SE->isSCEVable(Phi->getType())) 1404368708Smm continue; 1405231200Smm 1406231200Smm // Avoid comparing an integer IV against a pointer Limit. 1407231200Smm if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy()) 1408368708Smm continue; 1409231200Smm 1410231200Smm const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi)); 1411231200Smm if (!AR || AR->getLoop() != L || !AR->isAffine()) 1412231200Smm continue; 1413231200Smm 1414231200Smm // AR may be a pointer type, while BECount is an integer type. 1415231200Smm // AR may be wider than BECount. With eq/ne tests overflow is immaterial. 1416231200Smm // AR may not be a narrower type, or we may never exit. 1417231200Smm uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType()); 1418231200Smm if (PhiWidth < BCWidth || (TD && !TD->isLegalInteger(PhiWidth))) 1419231200Smm continue; 1420231200Smm 1421231200Smm const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE)); 1422231200Smm if (!Step || !Step->isOne()) 1423231200Smm continue; 1424231200Smm 1425231200Smm int LatchIdx = Phi->getBasicBlockIndex(LatchBlock); 1426231200Smm Value *IncV = Phi->getIncomingValue(LatchIdx); 1427231200Smm if (getLoopPhiForCounter(IncV, L, DT) != Phi) 1428231200Smm continue; 1429231200Smm 1430231200Smm // Avoid reusing a potentially undef value to compute other values that may 1431231200Smm // have originally had a concrete definition. 1432313571Smm if (!hasConcreteDef(Phi)) { 1433231200Smm // We explicitly allow unknown phis as long as they are already used by 1434231200Smm // the loop test. In this case we assume that performing LFTR could not 1435313571Smm // increase the number of undef users. 1436231200Smm if (ICmpInst *Cond = getLoopTest(L)) { 1437231200Smm if (Phi != getLoopPhiForCounter(Cond->getOperand(0), L, DT) 1438313571Smm && Phi != getLoopPhiForCounter(Cond->getOperand(1), L, DT)) { 1439231200Smm continue; 1440231200Smm } 1441313571Smm } 1442231200Smm } 1443231200Smm const SCEV *Init = AR->getStart(); 1444231200Smm 1445231200Smm if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) { 1446231200Smm // Don't force a live loop counter if another IV can be used. 1447231200Smm if (AlmostDeadIV(Phi, LatchBlock, Cond)) 1448231200Smm continue; 1449231200Smm 1450231200Smm // Prefer to count-from-zero. This is a more "canonical" counter form. It 1451313571Smm // also prefers integer to pointer IVs. 1452313571Smm if (BestInit->isZero() != Init->isZero()) { 1453313571Smm if (BestInit->isZero()) 1454313571Smm continue; 1455313571Smm } 1456313571Smm // If two IVs both count from zero or both count from nonzero then the 1457313571Smm // narrower is likely a dead phi that has been widened. Use the wider phi 1458313927Smm // to allow the other to be eliminated. 1459313571Smm else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType())) 1460313571Smm continue; 1461313571Smm } 1462313571Smm BestPhi = Phi; 1463313571Smm BestInit = Init; 1464313571Smm } 1465313571Smm return BestPhi; 1466313571Smm} 1467313571Smm 1468313571Smm/// genLoopLimit - Help LinearFunctionTestReplace by generating a value that 1469313571Smm/// holds the RHS of the new loop test. 1470313571Smmstatic Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L, 1471313571Smm SCEVExpander &Rewriter, ScalarEvolution *SE) { 1472313571Smm const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(IndVar)); 1473313571Smm assert(AR && AR->getLoop() == L && AR->isAffine() && "bad loop counter"); 1474313571Smm const SCEV *IVInit = AR->getStart(); 1475313571Smm 1476313571Smm // IVInit may be a pointer while IVCount is an integer when FindLoopCounter 1477313571Smm // finds a valid pointer IV. Sign extend BECount in order to materialize a 1478313571Smm // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing 1479313571Smm // the existing GEPs whenever possible. 1480313571Smm if (IndVar->getType()->isPointerTy() 1481313571Smm && !IVCount->getType()->isPointerTy()) { 1482313571Smm 1483313571Smm // IVOffset will be the new GEP offset that is interpreted by GEP as a 1484313571Smm // signed value. IVCount on the other hand represents the loop trip count, 1485313571Smm // which is an unsigned value. FindLoopCounter only allows induction 1486313571Smm // variables that have a positive unit stride of one. This means we don't 1487313571Smm // have to handle the case of negative offsets (yet) and just need to zero 1488313571Smm // extend IVCount. 1489313571Smm Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType()); 1490313571Smm const SCEV *IVOffset = SE->getTruncateOrZeroExtend(IVCount, OfsTy); 1491313571Smm 1492313571Smm // Expand the code for the iteration count. 1493313571Smm assert(SE->isLoopInvariant(IVOffset, L) && 1494313571Smm "Computed iteration count is not loop invariant!"); 1495313571Smm BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator()); 1496313571Smm Value *GEPOffset = Rewriter.expandCodeFor(IVOffset, OfsTy, BI); 1497313571Smm 1498313571Smm Value *GEPBase = IndVar->getIncomingValueForBlock(L->getLoopPreheader()); 1499313571Smm assert(AR->getStart() == SE->getSCEV(GEPBase) && "bad loop counter"); 1500313571Smm // We could handle pointer IVs other than i8*, but we need to compensate for 1501313571Smm // gep index scaling. See canExpandBackedgeTakenCount comments. 1502313571Smm assert(SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()), 1503313571Smm cast<PointerType>(GEPBase->getType())->getElementType())->isOne() 1504313571Smm && "unit stride pointer IV must be i8*"); 1505313571Smm 1506313571Smm IRBuilder<> Builder(L->getLoopPreheader()->getTerminator()); 1507313571Smm return Builder.CreateGEP(GEPBase, GEPOffset, "lftr.limit"); 1508313571Smm } 1509313571Smm else { 1510313571Smm // In any other case, convert both IVInit and IVCount to integers before 1511313571Smm // comparing. This may result in SCEV expension of pointers, but in practice 1512313571Smm // SCEV will fold the pointer arithmetic away as such: 1513313571Smm // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc). 1514313571Smm // 1515313571Smm // Valid Cases: (1) both integers is most common; (2) both may be pointers 1516313571Smm // for simple memset-style loops. 1517313571Smm // 1518313571Smm // IVInit integer and IVCount pointer would only occur if a canonical IV 1519313571Smm // were generated on top of case #2, which is not expected. 1520313571Smm 1521313927Smm const SCEV *IVLimit = 0; 1522313571Smm // For unit stride, IVCount = Start + BECount with 2's complement overflow. 1523313571Smm // For non-zero Start, compute IVCount here. 1524313571Smm if (AR->getStart()->isZero()) 1525313571Smm IVLimit = IVCount; 1526313571Smm else { 1527313571Smm assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride"); 1528313571Smm const SCEV *IVInit = AR->getStart(); 1529313571Smm 1530313571Smm // For integer IVs, truncate the IV before computing IVInit + BECount. 1531313571Smm if (SE->getTypeSizeInBits(IVInit->getType()) 1532313571Smm > SE->getTypeSizeInBits(IVCount->getType())) 1533313571Smm IVInit = SE->getTruncateExpr(IVInit, IVCount->getType()); 1534313571Smm 1535313571Smm IVLimit = SE->getAddExpr(IVInit, IVCount); 1536313571Smm } 1537313571Smm // Expand the code for the iteration count. 1538313571Smm BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator()); 1539313571Smm IRBuilder<> Builder(BI); 1540313571Smm assert(SE->isLoopInvariant(IVLimit, L) && 1541313571Smm "Computed iteration count is not loop invariant!"); 1542313571Smm // Ensure that we generate the same type as IndVar, or a smaller integer 1543313571Smm // type. In the presence of null pointer values, we have an integer type 1544313571Smm // SCEV expression (IVInit) for a pointer type IV value (IndVar). 1545313571Smm Type *LimitTy = IVCount->getType()->isPointerTy() ? 1546313571Smm IndVar->getType() : IVCount->getType(); 1547313571Smm return Rewriter.expandCodeFor(IVLimit, LimitTy, BI); 1548313571Smm } 1549313571Smm} 1550313571Smm 1551313571Smm/// LinearFunctionTestReplace - This method rewrites the exit condition of the 1552313571Smm/// loop to be a canonical != comparison against the incremented loop induction 1553313571Smm/// variable. This pass is able to rewrite the exit tests of any loop where the 1554313571Smm/// SCEV analysis can determine a loop-invariant trip count of the loop, which 1555313571Smm/// is actually a much broader range than just linear tests. 1556313571SmmValue *IndVarSimplify:: 1557231200SmmLinearFunctionTestReplace(Loop *L, 1558231200Smm const SCEV *BackedgeTakenCount, 1559231200Smm PHINode *IndVar, 1560231200Smm SCEVExpander &Rewriter) { 1561231200Smm assert(canExpandBackedgeTakenCount(L, SE) && "precondition"); 1562231200Smm 1563231200Smm // Initialize CmpIndVar and IVCount to their preincremented values. 1564231200Smm Value *CmpIndVar = IndVar; 1565231200Smm const SCEV *IVCount = BackedgeTakenCount; 1566231200Smm 1567231200Smm // If the exiting block is the same as the backedge block, we prefer to 1568231200Smm // compare against the post-incremented value, otherwise we must compare 1569231200Smm // against the preincremented value. 1570231200Smm if (L->getExitingBlock() == L->getLoopLatch()) { 1571231200Smm // Add one to the "backedge-taken" count to get the trip count. 1572231200Smm // This addition may overflow, which is valid as long as the comparison is 1573231200Smm // truncated to BackedgeTakenCount->getType(). 1574231200Smm IVCount = SE->getAddExpr(BackedgeTakenCount, 1575231200Smm SE->getConstant(BackedgeTakenCount->getType(), 1)); 1576231200Smm // The BackedgeTaken expression contains the number of times that the 1577231200Smm // backedge branches to the loop header. This is one less than the 1578231200Smm // number of times the loop executes, so use the incremented indvar. 1579231200Smm CmpIndVar = IndVar->getIncomingValueForBlock(L->getExitingBlock()); 1580231200Smm } 1581231200Smm 1582231200Smm Value *ExitCnt = genLoopLimit(IndVar, IVCount, L, Rewriter, SE); 1583231200Smm assert(ExitCnt->getType()->isPointerTy() == IndVar->getType()->isPointerTy() 1584231200Smm && "genLoopLimit missed a cast"); 1585231200Smm 1586231200Smm // Insert a new icmp_ne or icmp_eq instruction before the branch. 1587231200Smm BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator()); 1588231200Smm ICmpInst::Predicate P; 1589231200Smm if (L->contains(BI->getSuccessor(0))) 1590231200Smm P = ICmpInst::ICMP_NE; 1591231200Smm else 1592231200Smm P = ICmpInst::ICMP_EQ; 1593231200Smm 1594313571Smm DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n" 1595313571Smm << " LHS:" << *CmpIndVar << '\n' 1596313571Smm << " op:\t" 1597313571Smm << (P == ICmpInst::ICMP_NE ? "!=" : "==") << "\n" 1598313571Smm << " RHS:\t" << *ExitCnt << "\n" 1599313571Smm << " IVCount:\t" << *IVCount << "\n"); 1600313571Smm 1601313571Smm IRBuilder<> Builder(BI); 1602313571Smm 1603231200Smm // LFTR can ignore IV overflow and truncate to the width of 1604231200Smm // BECount. This avoids materializing the add(zext(add)) expression. 1605313571Smm unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType()); 1606313571Smm unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType()); 1607231200Smm if (CmpIndVarSize > ExitCntSize) { 1608231200Smm const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar)); 1609231200Smm const SCEV *ARStart = AR->getStart(); 1610231200Smm const SCEV *ARStep = AR->getStepRecurrence(*SE); 1611313571Smm // For constant IVCount, avoid truncation. 1612231200Smm if (isa<SCEVConstant>(ARStart) && isa<SCEVConstant>(IVCount)) { 1613313571Smm const APInt &Start = cast<SCEVConstant>(ARStart)->getValue()->getValue(); 1614313571Smm APInt Count = cast<SCEVConstant>(IVCount)->getValue()->getValue(); 1615313571Smm // Note that the post-inc value of BackedgeTakenCount may have overflowed 1616313571Smm // above such that IVCount is now zero. 1617231200Smm if (IVCount != BackedgeTakenCount && Count == 0) { 1618231200Smm Count = APInt::getMaxValue(Count.getBitWidth()).zext(CmpIndVarSize); 1619313571Smm ++Count; 1620313571Smm } 1621313571Smm else 1622328828Smm Count = Count.zext(CmpIndVarSize); 1623313571Smm APInt NewLimit; 1624313571Smm if (cast<SCEVConstant>(ARStep)->getValue()->isNegative()) 1625313571Smm NewLimit = Start - Count; 1626313571Smm else 1627313571Smm NewLimit = Start + Count; 1628313571Smm ExitCnt = ConstantInt::get(CmpIndVar->getType(), NewLimit); 1629313571Smm 1630313571Smm DEBUG(dbgs() << " Widen RHS:\t" << *ExitCnt << "\n"); 1631313571Smm } else { 1632313571Smm CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(), 1633313571Smm "lftr.wideiv"); 1634313571Smm } 1635313571Smm } 1636313571Smm Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond"); 1637231200Smm Value *OrigCond = BI->getCondition(); 1638231200Smm // It's tempting to use replaceAllUsesWith here to fully replace the old 1639231200Smm // comparison, but that's not immediately safe, since users of the old 1640231200Smm // comparison may not be dominated by the new comparison. Instead, just 1641231200Smm // update the branch to use the new comparison; in the common case this 1642231200Smm // will make old comparison dead. 1643231200Smm BI->setCondition(Cond); 1644231200Smm DeadInsts.push_back(OrigCond); 1645231200Smm 1646313571Smm ++NumLFTR; 1647231200Smm Changed = true; 1648231200Smm return Cond; 1649231200Smm} 1650231200Smm 1651231200Smm//===----------------------------------------------------------------------===// 1652231200Smm// SinkUnusedInvariants. A late subpass to cleanup loop preheaders. 1653231200Smm//===----------------------------------------------------------------------===// 1654313571Smm 1655231200Smm/// If there's a single exit block, sink any loop-invariant values that 1656231200Smm/// were defined in the preheader but not used inside the loop into the 1657313571Smm/// exit block to reduce register pressure in the loop. 1658313571Smmvoid IndVarSimplify::SinkUnusedInvariants(Loop *L) { 1659313571Smm BasicBlock *ExitBlock = L->getExitBlock(); 1660313571Smm if (!ExitBlock) return; 1661313571Smm 1662313571Smm BasicBlock *Preheader = L->getLoopPreheader(); 1663313571Smm if (!Preheader) return; 1664231200Smm 1665313571Smm Instruction *InsertPt = ExitBlock->getFirstInsertionPt(); 1666313571Smm BasicBlock::iterator I = Preheader->getTerminator(); 1667231200Smm while (I != Preheader->begin()) { 1668313571Smm --I; 1669313571Smm // New instructions were inserted at the end of the preheader. 1670313571Smm if (isa<PHINode>(I)) 1671313571Smm break; 1672313571Smm 1673313571Smm // Don't move instructions which might have side effects, since the side 1674313571Smm // effects need to complete before instructions inside the loop. Also don't 1675313571Smm // move instructions which might read memory, since the loop may modify 1676313571Smm // memory. Note that it's okay if the instruction might have undefined 1677313571Smm // behavior: LoopSimplify guarantees that the preheader dominates the exit 1678313571Smm // block. 1679313571Smm if (I->mayHaveSideEffects() || I->mayReadFromMemory()) 1680313571Smm continue; 1681313571Smm 1682313571Smm // Skip debug info intrinsics. 1683313571Smm if (isa<DbgInfoIntrinsic>(I)) 1684313571Smm continue; 1685313571Smm 1686313571Smm // Skip landingpad instructions. 1687313571Smm if (isa<LandingPadInst>(I)) 1688313571Smm continue; 1689231200Smm 1690313571Smm // Don't sink alloca: we never want to sink static alloca's out of the 1691313571Smm // entry block, and correctly sinking dynamic alloca's requires 1692313571Smm // checks for stacksave/stackrestore intrinsics. 1693313571Smm // FIXME: Refactor this check somehow? 1694313571Smm if (isa<AllocaInst>(I)) 1695313571Smm continue; 1696313571Smm 1697313571Smm // Determine if there is a use in or before the loop (direct or 1698313571Smm // otherwise). 1699313571Smm bool UsedInLoop = false; 1700313571Smm for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); 1701313571Smm UI != UE; ++UI) { 1702313571Smm User *U = *UI; 1703313571Smm BasicBlock *UseBB = cast<Instruction>(U)->getParent(); 1704313571Smm if (PHINode *P = dyn_cast<PHINode>(U)) { 1705313571Smm unsigned i = 1706313571Smm PHINode::getIncomingValueNumForOperand(UI.getOperandNo()); 1707313571Smm UseBB = P->getIncomingBlock(i); 1708313571Smm } 1709313571Smm if (UseBB == Preheader || L->contains(UseBB)) { 1710313571Smm UsedInLoop = true; 1711313571Smm break; 1712313571Smm } 1713313571Smm } 1714313571Smm 1715313571Smm // If there is, the def must remain in the preheader. 1716313571Smm if (UsedInLoop) 1717313571Smm continue; 1718313571Smm 1719313571Smm // Otherwise, sink it to the exit block. 1720313571Smm Instruction *ToMove = I; 1721313571Smm bool Done = false; 1722313571Smm 1723313571Smm if (I != Preheader->begin()) { 1724313571Smm // Skip debug info intrinsics. 1725313571Smm do { 1726313571Smm --I; 1727313571Smm } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin()); 1728313571Smm 1729313571Smm if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin()) 1730313571Smm Done = true; 1731313571Smm } else { 1732313571Smm Done = true; 1733313571Smm } 1734313571Smm 1735313571Smm ToMove->moveBefore(InsertPt); 1736313571Smm if (Done) break; 1737313571Smm InsertPt = ToMove; 1738313571Smm } 1739313571Smm} 1740313571Smm 1741313571Smm//===----------------------------------------------------------------------===// 1742313571Smm// IndVarSimplify driver. Manage several subpasses of IV simplification. 1743313571Smm//===----------------------------------------------------------------------===// 1744313571Smm 1745313571Smmbool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { 1746313571Smm // If LoopSimplify form is not available, stay out of trouble. Some notes: 1747313571Smm // - LSR currently only supports LoopSimplify-form loops. Indvars' 1748313571Smm // canonicalization can be a pessimization without LSR to "clean up" 1749313571Smm // afterwards. 1750313571Smm // - We depend on having a preheader; in particular, 1751313571Smm // Loop::getCanonicalInductionVariable only supports loops with preheaders, 1752313571Smm // and we're in trouble if we can't find the induction variable even when 1753313571Smm // we've manually inserted one. 1754313571Smm if (!L->isLoopSimplifyForm()) 1755313571Smm return false; 1756313571Smm 1757313571Smm LI = &getAnalysis<LoopInfo>(); 1758313571Smm SE = &getAnalysis<ScalarEvolution>(); 1759313571Smm DT = &getAnalysis<DominatorTree>(); 1760313571Smm TD = getAnalysisIfAvailable<DataLayout>(); 1761313571Smm TLI = getAnalysisIfAvailable<TargetLibraryInfo>(); 1762313571Smm 1763313571Smm DeadInsts.clear(); 1764313571Smm Changed = false; 1765313571Smm 1766313571Smm // If there are any floating-point recurrences, attempt to 1767313571Smm // transform them to use integer recurrences. 1768313571Smm RewriteNonIntegerIVs(L); 1769313571Smm 1770313571Smm const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L); 1771313571Smm 1772313571Smm // Create a rewriter object which we'll use to transform the code with. 1773313571Smm SCEVExpander Rewriter(*SE, "indvars"); 1774313571Smm#ifndef NDEBUG 1775313571Smm Rewriter.setDebugType(DEBUG_TYPE); 1776313571Smm#endif 1777313571Smm 1778313571Smm // Eliminate redundant IV users. 1779313571Smm // 1780313571Smm // Simplification works best when run before other consumers of SCEV. We 1781313571Smm // attempt to avoid evaluating SCEVs for sign/zero extend operations until 1782313571Smm // other expressions involving loop IVs have been evaluated. This helps SCEV 1783313571Smm // set no-wrap flags before normalizing sign/zero extension. 1784313571Smm Rewriter.disableCanonicalMode(); 1785313571Smm SimplifyAndExtend(L, Rewriter, LPM); 1786313571Smm 1787313571Smm // Check to see if this loop has a computable loop-invariant execution count. 1788313571Smm // If so, this means that we can compute the final value of any expressions 1789313571Smm // that are recurrent in the loop, and substitute the exit values from the 1790313571Smm // loop into any instructions outside of the loop that use the final values of 1791313571Smm // the current expressions. 1792313571Smm // 1793313571Smm if (!isa<SCEVCouldNotCompute>(BackedgeTakenCount)) 1794313571Smm RewriteLoopExitValues(L, Rewriter); 1795313571Smm 1796313571Smm // Eliminate redundant IV cycles. 1797313571Smm NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts); 1798313571Smm 1799313571Smm // If we have a trip count expression, rewrite the loop's exit condition 1800313571Smm // using it. We can currently only handle loops with a single exit. 1801313571Smm if (canExpandBackedgeTakenCount(L, SE) && needsLFTR(L, DT)) { 1802313571Smm PHINode *IndVar = FindLoopCounter(L, BackedgeTakenCount, SE, DT, TD); 1803313571Smm if (IndVar) { 1804313571Smm // Check preconditions for proper SCEVExpander operation. SCEV does not 1805313571Smm // express SCEVExpander's dependencies, such as LoopSimplify. Instead any 1806231200Smm // pass that uses the SCEVExpander must do it. This does not work well for 1807313571Smm // loop passes because SCEVExpander makes assumptions about all loops, while 1808231200Smm // LoopPassManager only forces the current loop to be simplified. 1809313571Smm // 1810231200Smm // FIXME: SCEV expansion has no way to bail out, so the caller must 1811313571Smm // explicitly check any assumptions made by SCEV. Brittle. 1812313571Smm const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(BackedgeTakenCount); 1813313571Smm if (!AR || AR->getLoop()->getLoopPreheader()) 1814313571Smm (void)LinearFunctionTestReplace(L, BackedgeTakenCount, IndVar, 1815313571Smm Rewriter); 1816313571Smm } 1817313571Smm } 1818313571Smm // Clear the rewriter cache, because values that are in the rewriter's cache 1819313571Smm // can be deleted in the loop below, causing the AssertingVH in the cache to 1820313571Smm // trigger. 1821313571Smm Rewriter.clear(); 1822313571Smm 1823313571Smm // Now that we're done iterating through lists, clean up any instructions 1824313571Smm // which are now dead. 1825313571Smm while (!DeadInsts.empty()) 1826313571Smm if (Instruction *Inst = 1827313571Smm dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val())) 1828313571Smm RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI); 1829313571Smm 1830313571Smm // The Rewriter may not be used from this point on. 1831313571Smm 1832313571Smm // Loop-invariant instructions in the preheader that aren't used in the 1833313571Smm // loop may be sunk below the loop to reduce register pressure. 1834313571Smm SinkUnusedInvariants(L); 1835313571Smm 1836313571Smm // Clean up dead instructions. 1837313571Smm Changed |= DeleteDeadPHIs(L->getHeader(), TLI); 1838313571Smm // Check a post-condition. 1839313571Smm assert(L->isLCSSAForm(*DT) && 1840313571Smm "Indvars did not leave the loop in lcssa form!"); 1841313571Smm 1842313571Smm // Verify that LFTR, and any other change have not interfered with SCEV's 1843313571Smm // ability to compute trip count. 1844313571Smm#ifndef NDEBUG 1845313571Smm if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) { 1846231200Smm SE->forgetLoop(L); 1847231200Smm const SCEV *NewBECount = SE->getBackedgeTakenCount(L); 1848231200Smm if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) < 1849231200Smm SE->getTypeSizeInBits(NewBECount->getType())) 1850231200Smm NewBECount = SE->getTruncateOrNoop(NewBECount, 1851231200Smm BackedgeTakenCount->getType()); 1852231200Smm else 1853313571Smm BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount, 1854231200Smm NewBECount->getType()); 1855313571Smm assert(BackedgeTakenCount == NewBECount && "indvars must preserve SCEV"); 1856313571Smm } 1857313571Smm#endif 1858313571Smm 1859231200Smm return Changed; 1860231200Smm} 1861231200Smm