1243789Sdim//===-- StackColoring.cpp -------------------------------------------------===// 2243789Sdim// 3243789Sdim// The LLVM Compiler Infrastructure 4243789Sdim// 5243789Sdim// This file is distributed under the University of Illinois Open Source 6243789Sdim// License. See LICENSE.TXT for details. 7243789Sdim// 8243789Sdim//===----------------------------------------------------------------------===// 9243789Sdim// 10243789Sdim// This pass implements the stack-coloring optimization that looks for 11243789Sdim// lifetime markers machine instructions (LIFESTART_BEGIN and LIFESTART_END), 12243789Sdim// which represent the possible lifetime of stack slots. It attempts to 13243789Sdim// merge disjoint stack slots and reduce the used stack space. 14243789Sdim// NOTE: This pass is not StackSlotColoring, which optimizes spill slots. 15243789Sdim// 16243789Sdim// TODO: In the future we plan to improve stack coloring in the following ways: 17243789Sdim// 1. Allow merging multiple small slots into a single larger slot at different 18243789Sdim// offsets. 19243789Sdim// 2. Merge this pass with StackSlotColoring and allow merging of allocas with 20243789Sdim// spill slots. 21243789Sdim// 22243789Sdim//===----------------------------------------------------------------------===// 23243789Sdim 24243789Sdim#define DEBUG_TYPE "stackcoloring" 25252723Sdim#include "llvm/CodeGen/Passes.h" 26243789Sdim#include "llvm/ADT/BitVector.h" 27243789Sdim#include "llvm/ADT/DepthFirstIterator.h" 28243789Sdim#include "llvm/ADT/PostOrderIterator.h" 29243789Sdim#include "llvm/ADT/SetVector.h" 30243789Sdim#include "llvm/ADT/SmallPtrSet.h" 31243789Sdim#include "llvm/ADT/SparseSet.h" 32243789Sdim#include "llvm/ADT/Statistic.h" 33252723Sdim#include "llvm/Analysis/Dominators.h" 34252723Sdim#include "llvm/Analysis/ValueTracking.h" 35243789Sdim#include "llvm/CodeGen/LiveInterval.h" 36252723Sdim#include "llvm/CodeGen/MachineBasicBlock.h" 37243789Sdim#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" 38243789Sdim#include "llvm/CodeGen/MachineDominators.h" 39252723Sdim#include "llvm/CodeGen/MachineFrameInfo.h" 40243789Sdim#include "llvm/CodeGen/MachineFunctionPass.h" 41243789Sdim#include "llvm/CodeGen/MachineLoopInfo.h" 42252723Sdim#include "llvm/CodeGen/MachineMemOperand.h" 43243789Sdim#include "llvm/CodeGen/MachineModuleInfo.h" 44243789Sdim#include "llvm/CodeGen/MachineRegisterInfo.h" 45263509Sdim#include "llvm/CodeGen/PseudoSourceValue.h" 46243789Sdim#include "llvm/CodeGen/SlotIndexes.h" 47243789Sdim#include "llvm/DebugInfo.h" 48252723Sdim#include "llvm/IR/Function.h" 49252723Sdim#include "llvm/IR/Instructions.h" 50252723Sdim#include "llvm/IR/Module.h" 51243789Sdim#include "llvm/MC/MCInstrItineraries.h" 52243789Sdim#include "llvm/Support/CommandLine.h" 53243789Sdim#include "llvm/Support/Debug.h" 54243789Sdim#include "llvm/Support/raw_ostream.h" 55252723Sdim#include "llvm/Target/TargetInstrInfo.h" 56252723Sdim#include "llvm/Target/TargetRegisterInfo.h" 57243789Sdim 58243789Sdimusing namespace llvm; 59243789Sdim 60243789Sdimstatic cl::opt<bool> 61243789SdimDisableColoring("no-stack-coloring", 62243789Sdim cl::init(false), cl::Hidden, 63243789Sdim cl::desc("Disable stack coloring")); 64243789Sdim 65243789Sdim/// The user may write code that uses allocas outside of the declared lifetime 66243789Sdim/// zone. This can happen when the user returns a reference to a local 67243789Sdim/// data-structure. We can detect these cases and decide not to optimize the 68243789Sdim/// code. If this flag is enabled, we try to save the user. 69243789Sdimstatic cl::opt<bool> 70243789SdimProtectFromEscapedAllocas("protect-from-escaped-allocas", 71252723Sdim cl::init(false), cl::Hidden, 72252723Sdim cl::desc("Do not optimize lifetime zones that " 73252723Sdim "are broken")); 74243789Sdim 75243789SdimSTATISTIC(NumMarkerSeen, "Number of lifetime markers found."); 76243789SdimSTATISTIC(StackSpaceSaved, "Number of bytes saved due to merging slots."); 77243789SdimSTATISTIC(StackSlotMerged, "Number of stack slot merged."); 78252723SdimSTATISTIC(EscapedAllocas, "Number of allocas that escaped the lifetime region"); 79243789Sdim 80243789Sdim//===----------------------------------------------------------------------===// 81243789Sdim// StackColoring Pass 82243789Sdim//===----------------------------------------------------------------------===// 83243789Sdim 84243789Sdimnamespace { 85243789Sdim/// StackColoring - A machine pass for merging disjoint stack allocations, 86243789Sdim/// marked by the LIFETIME_START and LIFETIME_END pseudo instructions. 87243789Sdimclass StackColoring : public MachineFunctionPass { 88243789Sdim MachineFrameInfo *MFI; 89243789Sdim MachineFunction *MF; 90243789Sdim 91243789Sdim /// A class representing liveness information for a single basic block. 92243789Sdim /// Each bit in the BitVector represents the liveness property 93243789Sdim /// for a different stack slot. 94243789Sdim struct BlockLifetimeInfo { 95243789Sdim /// Which slots BEGINs in each basic block. 96243789Sdim BitVector Begin; 97243789Sdim /// Which slots ENDs in each basic block. 98243789Sdim BitVector End; 99243789Sdim /// Which slots are marked as LIVE_IN, coming into each basic block. 100243789Sdim BitVector LiveIn; 101243789Sdim /// Which slots are marked as LIVE_OUT, coming out of each basic block. 102243789Sdim BitVector LiveOut; 103243789Sdim }; 104243789Sdim 105243789Sdim /// Maps active slots (per bit) for each basic block. 106252723Sdim typedef DenseMap<const MachineBasicBlock*, BlockLifetimeInfo> LivenessMap; 107252723Sdim LivenessMap BlockLiveness; 108243789Sdim 109243789Sdim /// Maps serial numbers to basic blocks. 110252723Sdim DenseMap<const MachineBasicBlock*, int> BasicBlocks; 111243789Sdim /// Maps basic blocks to a serial number. 112252723Sdim SmallVector<const MachineBasicBlock*, 8> BasicBlockNumbering; 113243789Sdim 114243789Sdim /// Maps liveness intervals for each slot. 115243789Sdim SmallVector<LiveInterval*, 16> Intervals; 116243789Sdim /// VNInfo is used for the construction of LiveIntervals. 117243789Sdim VNInfo::Allocator VNInfoAllocator; 118243789Sdim /// SlotIndex analysis object. 119243789Sdim SlotIndexes *Indexes; 120243789Sdim 121243789Sdim /// The list of lifetime markers found. These markers are to be removed 122243789Sdim /// once the coloring is done. 123243789Sdim SmallVector<MachineInstr*, 8> Markers; 124243789Sdim 125243789Sdim /// SlotSizeSorter - A Sort utility for arranging stack slots according 126243789Sdim /// to their size. 127243789Sdim struct SlotSizeSorter { 128243789Sdim MachineFrameInfo *MFI; 129243789Sdim SlotSizeSorter(MachineFrameInfo *mfi) : MFI(mfi) { } 130243789Sdim bool operator()(int LHS, int RHS) { 131243789Sdim // We use -1 to denote a uninteresting slot. Place these slots at the end. 132243789Sdim if (LHS == -1) return false; 133243789Sdim if (RHS == -1) return true; 134243789Sdim // Sort according to size. 135243789Sdim return MFI->getObjectSize(LHS) > MFI->getObjectSize(RHS); 136243789Sdim } 137243789Sdim}; 138243789Sdim 139243789Sdimpublic: 140243789Sdim static char ID; 141243789Sdim StackColoring() : MachineFunctionPass(ID) { 142243789Sdim initializeStackColoringPass(*PassRegistry::getPassRegistry()); 143243789Sdim } 144243789Sdim void getAnalysisUsage(AnalysisUsage &AU) const; 145243789Sdim bool runOnMachineFunction(MachineFunction &MF); 146243789Sdim 147243789Sdimprivate: 148243789Sdim /// Debug. 149252723Sdim void dump() const; 150243789Sdim 151243789Sdim /// Removes all of the lifetime marker instructions from the function. 152243789Sdim /// \returns true if any markers were removed. 153243789Sdim bool removeAllMarkers(); 154243789Sdim 155243789Sdim /// Scan the machine function and find all of the lifetime markers. 156243789Sdim /// Record the findings in the BEGIN and END vectors. 157243789Sdim /// \returns the number of markers found. 158243789Sdim unsigned collectMarkers(unsigned NumSlot); 159243789Sdim 160243789Sdim /// Perform the dataflow calculation and calculate the lifetime for each of 161243789Sdim /// the slots, based on the BEGIN/END vectors. Set the LifetimeLIVE_IN and 162243789Sdim /// LifetimeLIVE_OUT maps that represent which stack slots are live coming 163243789Sdim /// in and out blocks. 164243789Sdim void calculateLocalLiveness(); 165243789Sdim 166243789Sdim /// Construct the LiveIntervals for the slots. 167243789Sdim void calculateLiveIntervals(unsigned NumSlots); 168243789Sdim 169243789Sdim /// Go over the machine function and change instructions which use stack 170243789Sdim /// slots to use the joint slots. 171243789Sdim void remapInstructions(DenseMap<int, int> &SlotRemap); 172243789Sdim 173263509Sdim /// The input program may contain instructions which are not inside lifetime 174243789Sdim /// markers. This can happen due to a bug in the compiler or due to a bug in 175243789Sdim /// user code (for example, returning a reference to a local variable). 176243789Sdim /// This procedure checks all of the instructions in the function and 177243789Sdim /// invalidates lifetime ranges which do not contain all of the instructions 178243789Sdim /// which access that frame slot. 179243789Sdim void removeInvalidSlotRanges(); 180243789Sdim 181243789Sdim /// Map entries which point to other entries to their destination. 182243789Sdim /// A->B->C becomes A->C. 183243789Sdim void expungeSlotMap(DenseMap<int, int> &SlotRemap, unsigned NumSlots); 184243789Sdim}; 185243789Sdim} // end anonymous namespace 186243789Sdim 187243789Sdimchar StackColoring::ID = 0; 188243789Sdimchar &llvm::StackColoringID = StackColoring::ID; 189243789Sdim 190243789SdimINITIALIZE_PASS_BEGIN(StackColoring, 191243789Sdim "stack-coloring", "Merge disjoint stack slots", false, false) 192243789SdimINITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 193243789SdimINITIALIZE_PASS_DEPENDENCY(SlotIndexes) 194243789SdimINITIALIZE_PASS_END(StackColoring, 195243789Sdim "stack-coloring", "Merge disjoint stack slots", false, false) 196243789Sdim 197243789Sdimvoid StackColoring::getAnalysisUsage(AnalysisUsage &AU) const { 198243789Sdim AU.addRequired<MachineDominatorTree>(); 199243789Sdim AU.addPreserved<MachineDominatorTree>(); 200243789Sdim AU.addRequired<SlotIndexes>(); 201243789Sdim MachineFunctionPass::getAnalysisUsage(AU); 202243789Sdim} 203243789Sdim 204252723Sdimvoid StackColoring::dump() const { 205243789Sdim for (df_iterator<MachineFunction*> FI = df_begin(MF), FE = df_end(MF); 206243789Sdim FI != FE; ++FI) { 207252723Sdim DEBUG(dbgs()<<"Inspecting block #"<<BasicBlocks.lookup(*FI)<< 208252723Sdim " ["<<FI->getName()<<"]\n"); 209252723Sdim 210252723Sdim LivenessMap::const_iterator BI = BlockLiveness.find(*FI); 211252723Sdim assert(BI != BlockLiveness.end() && "Block not found"); 212252723Sdim const BlockLifetimeInfo &BlockInfo = BI->second; 213252723Sdim 214243789Sdim DEBUG(dbgs()<<"BEGIN : {"); 215252723Sdim for (unsigned i=0; i < BlockInfo.Begin.size(); ++i) 216252723Sdim DEBUG(dbgs()<<BlockInfo.Begin.test(i)<<" "); 217243789Sdim DEBUG(dbgs()<<"}\n"); 218243789Sdim 219243789Sdim DEBUG(dbgs()<<"END : {"); 220252723Sdim for (unsigned i=0; i < BlockInfo.End.size(); ++i) 221252723Sdim DEBUG(dbgs()<<BlockInfo.End.test(i)<<" "); 222243789Sdim 223243789Sdim DEBUG(dbgs()<<"}\n"); 224243789Sdim 225243789Sdim DEBUG(dbgs()<<"LIVE_IN: {"); 226252723Sdim for (unsigned i=0; i < BlockInfo.LiveIn.size(); ++i) 227252723Sdim DEBUG(dbgs()<<BlockInfo.LiveIn.test(i)<<" "); 228243789Sdim 229243789Sdim DEBUG(dbgs()<<"}\n"); 230243789Sdim DEBUG(dbgs()<<"LIVEOUT: {"); 231252723Sdim for (unsigned i=0; i < BlockInfo.LiveOut.size(); ++i) 232252723Sdim DEBUG(dbgs()<<BlockInfo.LiveOut.test(i)<<" "); 233243789Sdim DEBUG(dbgs()<<"}\n"); 234243789Sdim } 235243789Sdim} 236243789Sdim 237243789Sdimunsigned StackColoring::collectMarkers(unsigned NumSlot) { 238243789Sdim unsigned MarkersFound = 0; 239243789Sdim // Scan the function to find all lifetime markers. 240243789Sdim // NOTE: We use the a reverse-post-order iteration to ensure that we obtain a 241243789Sdim // deterministic numbering, and because we'll need a post-order iteration 242243789Sdim // later for solving the liveness dataflow problem. 243243789Sdim for (df_iterator<MachineFunction*> FI = df_begin(MF), FE = df_end(MF); 244243789Sdim FI != FE; ++FI) { 245243789Sdim 246243789Sdim // Assign a serial number to this basic block. 247243789Sdim BasicBlocks[*FI] = BasicBlockNumbering.size(); 248243789Sdim BasicBlockNumbering.push_back(*FI); 249243789Sdim 250252723Sdim // Keep a reference to avoid repeated lookups. 251252723Sdim BlockLifetimeInfo &BlockInfo = BlockLiveness[*FI]; 252243789Sdim 253252723Sdim BlockInfo.Begin.resize(NumSlot); 254252723Sdim BlockInfo.End.resize(NumSlot); 255252723Sdim 256243789Sdim for (MachineBasicBlock::iterator BI = (*FI)->begin(), BE = (*FI)->end(); 257243789Sdim BI != BE; ++BI) { 258243789Sdim 259243789Sdim if (BI->getOpcode() != TargetOpcode::LIFETIME_START && 260243789Sdim BI->getOpcode() != TargetOpcode::LIFETIME_END) 261243789Sdim continue; 262243789Sdim 263243789Sdim Markers.push_back(BI); 264243789Sdim 265243789Sdim bool IsStart = BI->getOpcode() == TargetOpcode::LIFETIME_START; 266252723Sdim const MachineOperand &MI = BI->getOperand(0); 267243789Sdim unsigned Slot = MI.getIndex(); 268243789Sdim 269243789Sdim MarkersFound++; 270243789Sdim 271243789Sdim const AllocaInst *Allocation = MFI->getObjectAllocation(Slot); 272243789Sdim if (Allocation) { 273243789Sdim DEBUG(dbgs()<<"Found a lifetime marker for slot #"<<Slot<< 274243789Sdim " with allocation: "<< Allocation->getName()<<"\n"); 275243789Sdim } 276243789Sdim 277243789Sdim if (IsStart) { 278252723Sdim BlockInfo.Begin.set(Slot); 279243789Sdim } else { 280252723Sdim if (BlockInfo.Begin.test(Slot)) { 281243789Sdim // Allocas that start and end within a single block are handled 282243789Sdim // specially when computing the LiveIntervals to avoid pessimizing 283243789Sdim // the liveness propagation. 284252723Sdim BlockInfo.Begin.reset(Slot); 285243789Sdim } else { 286252723Sdim BlockInfo.End.set(Slot); 287243789Sdim } 288243789Sdim } 289243789Sdim } 290243789Sdim } 291243789Sdim 292243789Sdim // Update statistics. 293243789Sdim NumMarkerSeen += MarkersFound; 294243789Sdim return MarkersFound; 295243789Sdim} 296243789Sdim 297243789Sdimvoid StackColoring::calculateLocalLiveness() { 298243789Sdim // Perform a standard reverse dataflow computation to solve for 299243789Sdim // global liveness. The BEGIN set here is equivalent to KILL in the standard 300243789Sdim // formulation, and END is equivalent to GEN. The result of this computation 301243789Sdim // is a map from blocks to bitvectors where the bitvectors represent which 302243789Sdim // allocas are live in/out of that block. 303252723Sdim SmallPtrSet<const MachineBasicBlock*, 8> BBSet(BasicBlockNumbering.begin(), 304252723Sdim BasicBlockNumbering.end()); 305243789Sdim unsigned NumSSMIters = 0; 306243789Sdim bool changed = true; 307243789Sdim while (changed) { 308243789Sdim changed = false; 309243789Sdim ++NumSSMIters; 310243789Sdim 311252723Sdim SmallPtrSet<const MachineBasicBlock*, 8> NextBBSet; 312243789Sdim 313263509Sdim for (SmallVectorImpl<const MachineBasicBlock *>::iterator 314263509Sdim PI = BasicBlockNumbering.begin(), PE = BasicBlockNumbering.end(); 315263509Sdim PI != PE; ++PI) { 316243789Sdim 317252723Sdim const MachineBasicBlock *BB = *PI; 318243789Sdim if (!BBSet.count(BB)) continue; 319243789Sdim 320252723Sdim // Use an iterator to avoid repeated lookups. 321252723Sdim LivenessMap::iterator BI = BlockLiveness.find(BB); 322252723Sdim assert(BI != BlockLiveness.end() && "Block not found"); 323252723Sdim BlockLifetimeInfo &BlockInfo = BI->second; 324252723Sdim 325243789Sdim BitVector LocalLiveIn; 326243789Sdim BitVector LocalLiveOut; 327243789Sdim 328243789Sdim // Forward propagation from begins to ends. 329252723Sdim for (MachineBasicBlock::const_pred_iterator PI = BB->pred_begin(), 330252723Sdim PE = BB->pred_end(); PI != PE; ++PI) { 331252723Sdim LivenessMap::const_iterator I = BlockLiveness.find(*PI); 332252723Sdim assert(I != BlockLiveness.end() && "Predecessor not found"); 333252723Sdim LocalLiveIn |= I->second.LiveOut; 334252723Sdim } 335252723Sdim LocalLiveIn |= BlockInfo.End; 336252723Sdim LocalLiveIn.reset(BlockInfo.Begin); 337243789Sdim 338243789Sdim // Reverse propagation from ends to begins. 339252723Sdim for (MachineBasicBlock::const_succ_iterator SI = BB->succ_begin(), 340252723Sdim SE = BB->succ_end(); SI != SE; ++SI) { 341252723Sdim LivenessMap::const_iterator I = BlockLiveness.find(*SI); 342252723Sdim assert(I != BlockLiveness.end() && "Successor not found"); 343252723Sdim LocalLiveOut |= I->second.LiveIn; 344252723Sdim } 345252723Sdim LocalLiveOut |= BlockInfo.Begin; 346252723Sdim LocalLiveOut.reset(BlockInfo.End); 347243789Sdim 348243789Sdim LocalLiveIn |= LocalLiveOut; 349243789Sdim LocalLiveOut |= LocalLiveIn; 350243789Sdim 351243789Sdim // After adopting the live bits, we need to turn-off the bits which 352243789Sdim // are de-activated in this block. 353252723Sdim LocalLiveOut.reset(BlockInfo.End); 354252723Sdim LocalLiveIn.reset(BlockInfo.Begin); 355243789Sdim 356243789Sdim // If we have both BEGIN and END markers in the same basic block then 357243789Sdim // we know that the BEGIN marker comes after the END, because we already 358243789Sdim // handle the case where the BEGIN comes before the END when collecting 359243789Sdim // the markers (and building the BEGIN/END vectore). 360243789Sdim // Want to enable the LIVE_IN and LIVE_OUT of slots that have both 361243789Sdim // BEGIN and END because it means that the value lives before and after 362243789Sdim // this basic block. 363252723Sdim BitVector LocalEndBegin = BlockInfo.End; 364252723Sdim LocalEndBegin &= BlockInfo.Begin; 365243789Sdim LocalLiveIn |= LocalEndBegin; 366243789Sdim LocalLiveOut |= LocalEndBegin; 367243789Sdim 368252723Sdim if (LocalLiveIn.test(BlockInfo.LiveIn)) { 369243789Sdim changed = true; 370252723Sdim BlockInfo.LiveIn |= LocalLiveIn; 371243789Sdim 372252723Sdim for (MachineBasicBlock::const_pred_iterator PI = BB->pred_begin(), 373243789Sdim PE = BB->pred_end(); PI != PE; ++PI) 374243789Sdim NextBBSet.insert(*PI); 375243789Sdim } 376243789Sdim 377252723Sdim if (LocalLiveOut.test(BlockInfo.LiveOut)) { 378243789Sdim changed = true; 379252723Sdim BlockInfo.LiveOut |= LocalLiveOut; 380243789Sdim 381252723Sdim for (MachineBasicBlock::const_succ_iterator SI = BB->succ_begin(), 382243789Sdim SE = BB->succ_end(); SI != SE; ++SI) 383243789Sdim NextBBSet.insert(*SI); 384243789Sdim } 385243789Sdim } 386243789Sdim 387243789Sdim BBSet = NextBBSet; 388243789Sdim }// while changed. 389243789Sdim} 390243789Sdim 391243789Sdimvoid StackColoring::calculateLiveIntervals(unsigned NumSlots) { 392243789Sdim SmallVector<SlotIndex, 16> Starts; 393243789Sdim SmallVector<SlotIndex, 16> Finishes; 394243789Sdim 395243789Sdim // For each block, find which slots are active within this block 396243789Sdim // and update the live intervals. 397243789Sdim for (MachineFunction::iterator MBB = MF->begin(), MBBe = MF->end(); 398243789Sdim MBB != MBBe; ++MBB) { 399243789Sdim Starts.clear(); 400243789Sdim Starts.resize(NumSlots); 401243789Sdim Finishes.clear(); 402243789Sdim Finishes.resize(NumSlots); 403243789Sdim 404243789Sdim // Create the interval for the basic blocks with lifetime markers in them. 405252723Sdim for (SmallVectorImpl<MachineInstr*>::const_iterator it = Markers.begin(), 406243789Sdim e = Markers.end(); it != e; ++it) { 407252723Sdim const MachineInstr *MI = *it; 408243789Sdim if (MI->getParent() != MBB) 409243789Sdim continue; 410243789Sdim 411243789Sdim assert((MI->getOpcode() == TargetOpcode::LIFETIME_START || 412243789Sdim MI->getOpcode() == TargetOpcode::LIFETIME_END) && 413243789Sdim "Invalid Lifetime marker"); 414243789Sdim 415243789Sdim bool IsStart = MI->getOpcode() == TargetOpcode::LIFETIME_START; 416252723Sdim const MachineOperand &Mo = MI->getOperand(0); 417243789Sdim int Slot = Mo.getIndex(); 418243789Sdim assert(Slot >= 0 && "Invalid slot"); 419243789Sdim 420243789Sdim SlotIndex ThisIndex = Indexes->getInstructionIndex(MI); 421243789Sdim 422243789Sdim if (IsStart) { 423243789Sdim if (!Starts[Slot].isValid() || Starts[Slot] > ThisIndex) 424243789Sdim Starts[Slot] = ThisIndex; 425243789Sdim } else { 426243789Sdim if (!Finishes[Slot].isValid() || Finishes[Slot] < ThisIndex) 427243789Sdim Finishes[Slot] = ThisIndex; 428243789Sdim } 429243789Sdim } 430243789Sdim 431243789Sdim // Create the interval of the blocks that we previously found to be 'alive'. 432263509Sdim BlockLifetimeInfo &MBBLiveness = BlockLiveness[MBB]; 433263509Sdim for (int pos = MBBLiveness.LiveIn.find_first(); pos != -1; 434263509Sdim pos = MBBLiveness.LiveIn.find_next(pos)) { 435263509Sdim Starts[pos] = Indexes->getMBBStartIdx(MBB); 436243789Sdim } 437263509Sdim for (int pos = MBBLiveness.LiveOut.find_first(); pos != -1; 438263509Sdim pos = MBBLiveness.LiveOut.find_next(pos)) { 439263509Sdim Finishes[pos] = Indexes->getMBBEndIdx(MBB); 440263509Sdim } 441243789Sdim 442243789Sdim for (unsigned i = 0; i < NumSlots; ++i) { 443243789Sdim assert(Starts[i].isValid() == Finishes[i].isValid() && "Unmatched range"); 444243789Sdim if (!Starts[i].isValid()) 445243789Sdim continue; 446243789Sdim 447243789Sdim assert(Starts[i] && Finishes[i] && "Invalid interval"); 448243789Sdim VNInfo *ValNum = Intervals[i]->getValNumInfo(0); 449243789Sdim SlotIndex S = Starts[i]; 450243789Sdim SlotIndex F = Finishes[i]; 451243789Sdim if (S < F) { 452243789Sdim // We have a single consecutive region. 453263509Sdim Intervals[i]->addSegment(LiveInterval::Segment(S, F, ValNum)); 454243789Sdim } else { 455243789Sdim // We have two non consecutive regions. This happens when 456243789Sdim // LIFETIME_START appears after the LIFETIME_END marker. 457243789Sdim SlotIndex NewStart = Indexes->getMBBStartIdx(MBB); 458243789Sdim SlotIndex NewFin = Indexes->getMBBEndIdx(MBB); 459263509Sdim Intervals[i]->addSegment(LiveInterval::Segment(NewStart, F, ValNum)); 460263509Sdim Intervals[i]->addSegment(LiveInterval::Segment(S, NewFin, ValNum)); 461243789Sdim } 462243789Sdim } 463243789Sdim } 464243789Sdim} 465243789Sdim 466243789Sdimbool StackColoring::removeAllMarkers() { 467243789Sdim unsigned Count = 0; 468243789Sdim for (unsigned i = 0; i < Markers.size(); ++i) { 469243789Sdim Markers[i]->eraseFromParent(); 470243789Sdim Count++; 471243789Sdim } 472243789Sdim Markers.clear(); 473243789Sdim 474243789Sdim DEBUG(dbgs()<<"Removed "<<Count<<" markers.\n"); 475243789Sdim return Count; 476243789Sdim} 477243789Sdim 478243789Sdimvoid StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) { 479243789Sdim unsigned FixedInstr = 0; 480243789Sdim unsigned FixedMemOp = 0; 481243789Sdim unsigned FixedDbg = 0; 482243789Sdim MachineModuleInfo *MMI = &MF->getMMI(); 483243789Sdim 484243789Sdim // Remap debug information that refers to stack slots. 485243789Sdim MachineModuleInfo::VariableDbgInfoMapTy &VMap = MMI->getVariableDbgInfo(); 486243789Sdim for (MachineModuleInfo::VariableDbgInfoMapTy::iterator VI = VMap.begin(), 487243789Sdim VE = VMap.end(); VI != VE; ++VI) { 488243789Sdim const MDNode *Var = VI->first; 489243789Sdim if (!Var) continue; 490243789Sdim std::pair<unsigned, DebugLoc> &VP = VI->second; 491243789Sdim if (SlotRemap.count(VP.first)) { 492243789Sdim DEBUG(dbgs()<<"Remapping debug info for ["<<Var->getName()<<"].\n"); 493243789Sdim VP.first = SlotRemap[VP.first]; 494243789Sdim FixedDbg++; 495243789Sdim } 496243789Sdim } 497243789Sdim 498243789Sdim // Keep a list of *allocas* which need to be remapped. 499243789Sdim DenseMap<const AllocaInst*, const AllocaInst*> Allocas; 500252723Sdim for (DenseMap<int, int>::const_iterator it = SlotRemap.begin(), 501243789Sdim e = SlotRemap.end(); it != e; ++it) { 502243789Sdim const AllocaInst *From = MFI->getObjectAllocation(it->first); 503243789Sdim const AllocaInst *To = MFI->getObjectAllocation(it->second); 504243789Sdim assert(To && From && "Invalid allocation object"); 505243789Sdim Allocas[From] = To; 506243789Sdim } 507243789Sdim 508243789Sdim // Remap all instructions to the new stack slots. 509243789Sdim MachineFunction::iterator BB, BBE; 510243789Sdim MachineBasicBlock::iterator I, IE; 511243789Sdim for (BB = MF->begin(), BBE = MF->end(); BB != BBE; ++BB) 512243789Sdim for (I = BB->begin(), IE = BB->end(); I != IE; ++I) { 513243789Sdim 514243789Sdim // Skip lifetime markers. We'll remove them soon. 515243789Sdim if (I->getOpcode() == TargetOpcode::LIFETIME_START || 516243789Sdim I->getOpcode() == TargetOpcode::LIFETIME_END) 517243789Sdim continue; 518243789Sdim 519243789Sdim // Update the MachineMemOperand to use the new alloca. 520243789Sdim for (MachineInstr::mmo_iterator MM = I->memoperands_begin(), 521243789Sdim E = I->memoperands_end(); MM != E; ++MM) { 522243789Sdim MachineMemOperand *MMO = *MM; 523243789Sdim 524243789Sdim const Value *V = MMO->getValue(); 525243789Sdim 526243789Sdim if (!V) 527243789Sdim continue; 528243789Sdim 529263509Sdim const PseudoSourceValue *PSV = dyn_cast<const PseudoSourceValue>(V); 530263509Sdim if (PSV && PSV->isConstant(MFI)) 531263509Sdim continue; 532263509Sdim 533243789Sdim // Climb up and find the original alloca. 534243789Sdim V = GetUnderlyingObject(V); 535243789Sdim // If we did not find one, or if the one that we found is not in our 536243789Sdim // map, then move on. 537243789Sdim if (!V || !isa<AllocaInst>(V)) { 538243789Sdim // Clear mem operand since we don't know for sure that it doesn't 539243789Sdim // alias a merged alloca. 540243789Sdim MMO->setValue(0); 541243789Sdim continue; 542243789Sdim } 543243789Sdim const AllocaInst *AI= cast<AllocaInst>(V); 544243789Sdim if (!Allocas.count(AI)) 545243789Sdim continue; 546243789Sdim 547243789Sdim MMO->setValue(Allocas[AI]); 548243789Sdim FixedMemOp++; 549243789Sdim } 550243789Sdim 551243789Sdim // Update all of the machine instruction operands. 552243789Sdim for (unsigned i = 0 ; i < I->getNumOperands(); ++i) { 553243789Sdim MachineOperand &MO = I->getOperand(i); 554243789Sdim 555243789Sdim if (!MO.isFI()) 556243789Sdim continue; 557243789Sdim int FromSlot = MO.getIndex(); 558243789Sdim 559243789Sdim // Don't touch arguments. 560243789Sdim if (FromSlot<0) 561243789Sdim continue; 562243789Sdim 563243789Sdim // Only look at mapped slots. 564243789Sdim if (!SlotRemap.count(FromSlot)) 565243789Sdim continue; 566243789Sdim 567243789Sdim // In a debug build, check that the instruction that we are modifying is 568243789Sdim // inside the expected live range. If the instruction is not inside 569243789Sdim // the calculated range then it means that the alloca usage moved 570243789Sdim // outside of the lifetime markers, or that the user has a bug. 571243789Sdim // NOTE: Alloca address calculations which happen outside the lifetime 572243789Sdim // zone are are okay, despite the fact that we don't have a good way 573243789Sdim // for validating all of the usages of the calculation. 574243789Sdim#ifndef NDEBUG 575243789Sdim bool TouchesMemory = I->mayLoad() || I->mayStore(); 576243789Sdim // If we *don't* protect the user from escaped allocas, don't bother 577243789Sdim // validating the instructions. 578243789Sdim if (!I->isDebugValue() && TouchesMemory && ProtectFromEscapedAllocas) { 579243789Sdim SlotIndex Index = Indexes->getInstructionIndex(I); 580243789Sdim LiveInterval *Interval = Intervals[FromSlot]; 581243789Sdim assert(Interval->find(Index) != Interval->end() && 582252723Sdim "Found instruction usage outside of live range."); 583243789Sdim } 584243789Sdim#endif 585243789Sdim 586243789Sdim // Fix the machine instructions. 587243789Sdim int ToSlot = SlotRemap[FromSlot]; 588243789Sdim MO.setIndex(ToSlot); 589243789Sdim FixedInstr++; 590243789Sdim } 591243789Sdim } 592243789Sdim 593243789Sdim DEBUG(dbgs()<<"Fixed "<<FixedMemOp<<" machine memory operands.\n"); 594243789Sdim DEBUG(dbgs()<<"Fixed "<<FixedDbg<<" debug locations.\n"); 595243789Sdim DEBUG(dbgs()<<"Fixed "<<FixedInstr<<" machine instructions.\n"); 596243789Sdim} 597243789Sdim 598243789Sdimvoid StackColoring::removeInvalidSlotRanges() { 599252723Sdim MachineFunction::const_iterator BB, BBE; 600252723Sdim MachineBasicBlock::const_iterator I, IE; 601243789Sdim for (BB = MF->begin(), BBE = MF->end(); BB != BBE; ++BB) 602243789Sdim for (I = BB->begin(), IE = BB->end(); I != IE; ++I) { 603243789Sdim 604243789Sdim if (I->getOpcode() == TargetOpcode::LIFETIME_START || 605243789Sdim I->getOpcode() == TargetOpcode::LIFETIME_END || I->isDebugValue()) 606243789Sdim continue; 607243789Sdim 608243789Sdim // Some intervals are suspicious! In some cases we find address 609243789Sdim // calculations outside of the lifetime zone, but not actual memory 610243789Sdim // read or write. Memory accesses outside of the lifetime zone are a clear 611243789Sdim // violation, but address calculations are okay. This can happen when 612243789Sdim // GEPs are hoisted outside of the lifetime zone. 613243789Sdim // So, in here we only check instructions which can read or write memory. 614243789Sdim if (!I->mayLoad() && !I->mayStore()) 615243789Sdim continue; 616243789Sdim 617243789Sdim // Check all of the machine operands. 618243789Sdim for (unsigned i = 0 ; i < I->getNumOperands(); ++i) { 619252723Sdim const MachineOperand &MO = I->getOperand(i); 620243789Sdim 621243789Sdim if (!MO.isFI()) 622243789Sdim continue; 623243789Sdim 624243789Sdim int Slot = MO.getIndex(); 625243789Sdim 626243789Sdim if (Slot<0) 627243789Sdim continue; 628243789Sdim 629243789Sdim if (Intervals[Slot]->empty()) 630243789Sdim continue; 631243789Sdim 632243789Sdim // Check that the used slot is inside the calculated lifetime range. 633243789Sdim // If it is not, warn about it and invalidate the range. 634243789Sdim LiveInterval *Interval = Intervals[Slot]; 635243789Sdim SlotIndex Index = Indexes->getInstructionIndex(I); 636243789Sdim if (Interval->find(Index) == Interval->end()) { 637243789Sdim Intervals[Slot]->clear(); 638243789Sdim DEBUG(dbgs()<<"Invalidating range #"<<Slot<<"\n"); 639243789Sdim EscapedAllocas++; 640243789Sdim } 641243789Sdim } 642243789Sdim } 643243789Sdim} 644243789Sdim 645243789Sdimvoid StackColoring::expungeSlotMap(DenseMap<int, int> &SlotRemap, 646243789Sdim unsigned NumSlots) { 647243789Sdim // Expunge slot remap map. 648243789Sdim for (unsigned i=0; i < NumSlots; ++i) { 649243789Sdim // If we are remapping i 650243789Sdim if (SlotRemap.count(i)) { 651243789Sdim int Target = SlotRemap[i]; 652243789Sdim // As long as our target is mapped to something else, follow it. 653243789Sdim while (SlotRemap.count(Target)) { 654243789Sdim Target = SlotRemap[Target]; 655243789Sdim SlotRemap[i] = Target; 656243789Sdim } 657243789Sdim } 658243789Sdim } 659243789Sdim} 660243789Sdim 661243789Sdimbool StackColoring::runOnMachineFunction(MachineFunction &Func) { 662243789Sdim DEBUG(dbgs() << "********** Stack Coloring **********\n" 663243789Sdim << "********** Function: " 664243789Sdim << ((const Value*)Func.getFunction())->getName() << '\n'); 665243789Sdim MF = &Func; 666243789Sdim MFI = MF->getFrameInfo(); 667243789Sdim Indexes = &getAnalysis<SlotIndexes>(); 668243789Sdim BlockLiveness.clear(); 669243789Sdim BasicBlocks.clear(); 670243789Sdim BasicBlockNumbering.clear(); 671243789Sdim Markers.clear(); 672243789Sdim Intervals.clear(); 673243789Sdim VNInfoAllocator.Reset(); 674243789Sdim 675243789Sdim unsigned NumSlots = MFI->getObjectIndexEnd(); 676243789Sdim 677243789Sdim // If there are no stack slots then there are no markers to remove. 678243789Sdim if (!NumSlots) 679243789Sdim return false; 680243789Sdim 681243789Sdim SmallVector<int, 8> SortedSlots; 682243789Sdim 683243789Sdim SortedSlots.reserve(NumSlots); 684243789Sdim Intervals.reserve(NumSlots); 685243789Sdim 686243789Sdim unsigned NumMarkers = collectMarkers(NumSlots); 687243789Sdim 688243789Sdim unsigned TotalSize = 0; 689243789Sdim DEBUG(dbgs()<<"Found "<<NumMarkers<<" markers and "<<NumSlots<<" slots\n"); 690243789Sdim DEBUG(dbgs()<<"Slot structure:\n"); 691243789Sdim 692243789Sdim for (int i=0; i < MFI->getObjectIndexEnd(); ++i) { 693243789Sdim DEBUG(dbgs()<<"Slot #"<<i<<" - "<<MFI->getObjectSize(i)<<" bytes.\n"); 694243789Sdim TotalSize += MFI->getObjectSize(i); 695243789Sdim } 696243789Sdim 697243789Sdim DEBUG(dbgs()<<"Total Stack size: "<<TotalSize<<" bytes\n\n"); 698243789Sdim 699243789Sdim // Don't continue because there are not enough lifetime markers, or the 700243789Sdim // stack is too small, or we are told not to optimize the slots. 701243789Sdim if (NumMarkers < 2 || TotalSize < 16 || DisableColoring) { 702243789Sdim DEBUG(dbgs()<<"Will not try to merge slots.\n"); 703243789Sdim return removeAllMarkers(); 704243789Sdim } 705243789Sdim 706243789Sdim for (unsigned i=0; i < NumSlots; ++i) { 707243789Sdim LiveInterval *LI = new LiveInterval(i, 0); 708243789Sdim Intervals.push_back(LI); 709243789Sdim LI->getNextValue(Indexes->getZeroIndex(), VNInfoAllocator); 710243789Sdim SortedSlots.push_back(i); 711243789Sdim } 712243789Sdim 713243789Sdim // Calculate the liveness of each block. 714243789Sdim calculateLocalLiveness(); 715243789Sdim 716243789Sdim // Propagate the liveness information. 717243789Sdim calculateLiveIntervals(NumSlots); 718243789Sdim 719243789Sdim // Search for allocas which are used outside of the declared lifetime 720243789Sdim // markers. 721243789Sdim if (ProtectFromEscapedAllocas) 722243789Sdim removeInvalidSlotRanges(); 723243789Sdim 724243789Sdim // Maps old slots to new slots. 725243789Sdim DenseMap<int, int> SlotRemap; 726243789Sdim unsigned RemovedSlots = 0; 727243789Sdim unsigned ReducedSize = 0; 728243789Sdim 729243789Sdim // Do not bother looking at empty intervals. 730243789Sdim for (unsigned I = 0; I < NumSlots; ++I) { 731243789Sdim if (Intervals[SortedSlots[I]]->empty()) 732243789Sdim SortedSlots[I] = -1; 733243789Sdim } 734243789Sdim 735243789Sdim // This is a simple greedy algorithm for merging allocas. First, sort the 736243789Sdim // slots, placing the largest slots first. Next, perform an n^2 scan and look 737243789Sdim // for disjoint slots. When you find disjoint slots, merge the samller one 738243789Sdim // into the bigger one and update the live interval. Remove the small alloca 739243789Sdim // and continue. 740243789Sdim 741243789Sdim // Sort the slots according to their size. Place unused slots at the end. 742252723Sdim // Use stable sort to guarantee deterministic code generation. 743252723Sdim std::stable_sort(SortedSlots.begin(), SortedSlots.end(), 744252723Sdim SlotSizeSorter(MFI)); 745243789Sdim 746252723Sdim bool Changed = true; 747252723Sdim while (Changed) { 748252723Sdim Changed = false; 749243789Sdim for (unsigned I = 0; I < NumSlots; ++I) { 750243789Sdim if (SortedSlots[I] == -1) 751243789Sdim continue; 752243789Sdim 753243789Sdim for (unsigned J=I+1; J < NumSlots; ++J) { 754243789Sdim if (SortedSlots[J] == -1) 755243789Sdim continue; 756243789Sdim 757243789Sdim int FirstSlot = SortedSlots[I]; 758243789Sdim int SecondSlot = SortedSlots[J]; 759243789Sdim LiveInterval *First = Intervals[FirstSlot]; 760243789Sdim LiveInterval *Second = Intervals[SecondSlot]; 761243789Sdim assert (!First->empty() && !Second->empty() && "Found an empty range"); 762243789Sdim 763243789Sdim // Merge disjoint slots. 764243789Sdim if (!First->overlaps(*Second)) { 765252723Sdim Changed = true; 766263509Sdim First->MergeSegmentsInAsValue(*Second, First->getValNumInfo(0)); 767243789Sdim SlotRemap[SecondSlot] = FirstSlot; 768243789Sdim SortedSlots[J] = -1; 769243789Sdim DEBUG(dbgs()<<"Merging #"<<FirstSlot<<" and slots #"<< 770243789Sdim SecondSlot<<" together.\n"); 771243789Sdim unsigned MaxAlignment = std::max(MFI->getObjectAlignment(FirstSlot), 772243789Sdim MFI->getObjectAlignment(SecondSlot)); 773243789Sdim 774243789Sdim assert(MFI->getObjectSize(FirstSlot) >= 775243789Sdim MFI->getObjectSize(SecondSlot) && 776243789Sdim "Merging a small object into a larger one"); 777243789Sdim 778243789Sdim RemovedSlots+=1; 779243789Sdim ReducedSize += MFI->getObjectSize(SecondSlot); 780243789Sdim MFI->setObjectAlignment(FirstSlot, MaxAlignment); 781243789Sdim MFI->RemoveStackObject(SecondSlot); 782243789Sdim } 783243789Sdim } 784243789Sdim } 785243789Sdim }// While changed. 786243789Sdim 787243789Sdim // Record statistics. 788243789Sdim StackSpaceSaved += ReducedSize; 789243789Sdim StackSlotMerged += RemovedSlots; 790243789Sdim DEBUG(dbgs()<<"Merge "<<RemovedSlots<<" slots. Saved "<< 791243789Sdim ReducedSize<<" bytes\n"); 792243789Sdim 793243789Sdim // Scan the entire function and update all machine operands that use frame 794243789Sdim // indices to use the remapped frame index. 795243789Sdim expungeSlotMap(SlotRemap, NumSlots); 796243789Sdim remapInstructions(SlotRemap); 797243789Sdim 798243789Sdim // Release the intervals. 799243789Sdim for (unsigned I = 0; I < NumSlots; ++I) { 800243789Sdim delete Intervals[I]; 801243789Sdim } 802243789Sdim 803243789Sdim return removeAllMarkers(); 804243789Sdim} 805