DebugIR.cpp revision 263508
1//===--- DebugIR.cpp - Transform debug metadata to allow debugging IR -----===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// A Module transform pass that emits a succinct version of the IR and replaces
11// the source file metadata to allow debuggers to step through the IR.
12//
13// FIXME: instead of replacing debug metadata, this pass should allow for
14// additional metadata to be used to point capable debuggers to the IR file
15// without destroying the mapping to the original source file.
16//
17//===----------------------------------------------------------------------===//
18
19#define DEBUG_TYPE "debug-ir"
20
21#include "llvm/ADT/ValueMap.h"
22#include "llvm/Assembly/AssemblyAnnotationWriter.h"
23#include "llvm/DebugInfo.h"
24#include "llvm/DIBuilder.h"
25#include "llvm/InstVisitor.h"
26#include "llvm/IR/DataLayout.h"
27#include "llvm/IR/Instruction.h"
28#include "llvm/IR/LLVMContext.h"
29#include "llvm/IR/Module.h"
30#include "llvm/Transforms/Instrumentation.h"
31#include "llvm/Transforms/Utils/Cloning.h"
32#include "llvm/Support/Debug.h"
33#include "llvm/Support/ToolOutputFile.h"
34#include "llvm/Support/FormattedStream.h"
35#include "llvm/Support/FileSystem.h"
36#include "llvm/Support/Path.h"
37
38#include "DebugIR.h"
39
40#include <string>
41
42#define STR_HELPER(x) #x
43#define STR(x) STR_HELPER(x)
44
45using namespace llvm;
46
47namespace {
48
49/// Builds a map of Value* to line numbers on which the Value appears in a
50/// textual representation of the IR by plugging into the AssemblyWriter by
51/// masquerading as an AssemblyAnnotationWriter.
52class ValueToLineMap : public AssemblyAnnotationWriter {
53  ValueMap<const Value *, unsigned int> Lines;
54  typedef ValueMap<const Value *, unsigned int>::const_iterator LineIter;
55
56  void addEntry(const Value *V, formatted_raw_ostream &Out) {
57    Out.flush();
58    Lines.insert(std::make_pair(V, Out.getLine() + 1));
59  }
60
61public:
62
63  /// Prints Module to a null buffer in order to build the map of Value pointers
64  /// to line numbers.
65  ValueToLineMap(const Module *M) {
66    raw_null_ostream ThrowAway;
67    M->print(ThrowAway, this);
68  }
69
70  // This function is called after an Instruction, GlobalValue, or GlobalAlias
71  // is printed.
72  void printInfoComment(const Value &V, formatted_raw_ostream &Out) {
73    addEntry(&V, Out);
74  }
75
76  void emitFunctionAnnot(const Function *F, formatted_raw_ostream &Out) {
77    addEntry(F, Out);
78  }
79
80  /// If V appears on a line in the textual IR representation, sets Line to the
81  /// line number and returns true, otherwise returns false.
82  bool getLine(const Value *V, unsigned int &Line) const {
83    LineIter i = Lines.find(V);
84    if (i != Lines.end()) {
85      Line = i->second;
86      return true;
87    }
88    return false;
89  }
90};
91
92/// Removes debug intrisncs like llvm.dbg.declare and llvm.dbg.value.
93class DebugIntrinsicsRemover : public InstVisitor<DebugIntrinsicsRemover> {
94  void remove(Instruction &I) { I.eraseFromParent(); }
95
96public:
97  static void process(Module &M) {
98    DebugIntrinsicsRemover Remover;
99    Remover.visit(&M);
100  }
101  void visitDbgDeclareInst(DbgDeclareInst &I) { remove(I); }
102  void visitDbgValueInst(DbgValueInst &I) { remove(I); }
103  void visitDbgInfoIntrinsic(DbgInfoIntrinsic &I) { remove(I); }
104};
105
106/// Removes debug metadata (!dbg) nodes from all instructions, and optionally
107/// metadata named "llvm.dbg.cu" if RemoveNamedInfo is true.
108class DebugMetadataRemover : public InstVisitor<DebugMetadataRemover> {
109  bool RemoveNamedInfo;
110
111public:
112  static void process(Module &M, bool RemoveNamedInfo = true) {
113    DebugMetadataRemover Remover(RemoveNamedInfo);
114    Remover.run(&M);
115  }
116
117  DebugMetadataRemover(bool RemoveNamedInfo)
118      : RemoveNamedInfo(RemoveNamedInfo) {}
119
120  void visitInstruction(Instruction &I) {
121    if (I.getMetadata(LLVMContext::MD_dbg))
122      I.setMetadata(LLVMContext::MD_dbg, 0);
123  }
124
125  void run(Module *M) {
126    // Remove debug metadata attached to instructions
127    visit(M);
128
129    if (RemoveNamedInfo) {
130      // Remove CU named metadata (and all children nodes)
131      NamedMDNode *Node = M->getNamedMetadata("llvm.dbg.cu");
132      if (Node)
133        M->eraseNamedMetadata(Node);
134    }
135  }
136};
137
138/// Updates debug metadata in a Module:
139///   - changes Filename/Directory to values provided on construction
140///   - adds/updates line number (DebugLoc) entries associated with each
141///     instruction to reflect the instruction's location in an LLVM IR file
142class DIUpdater : public InstVisitor<DIUpdater> {
143  /// Builder of debug information
144  DIBuilder Builder;
145
146  /// Helper for type attributes/sizes/etc
147  DataLayout Layout;
148
149  /// Map of Value* to line numbers
150  const ValueToLineMap LineTable;
151
152  /// Map of Value* (in original Module) to Value* (in optional cloned Module)
153  const ValueToValueMapTy *VMap;
154
155  /// Directory of debug metadata
156  DebugInfoFinder Finder;
157
158  /// Source filename and directory
159  StringRef Filename;
160  StringRef Directory;
161
162  // CU nodes needed when creating DI subprograms
163  MDNode *FileNode;
164  MDNode *LexicalBlockFileNode;
165  const MDNode *CUNode;
166
167  ValueMap<const Function *, MDNode *> SubprogramDescriptors;
168  DenseMap<const Type *, MDNode *> TypeDescriptors;
169
170public:
171  DIUpdater(Module &M, StringRef Filename = StringRef(),
172            StringRef Directory = StringRef(), const Module *DisplayM = 0,
173            const ValueToValueMapTy *VMap = 0)
174      : Builder(M), Layout(&M), LineTable(DisplayM ? DisplayM : &M), VMap(VMap),
175        Finder(), Filename(Filename), Directory(Directory), FileNode(0),
176        LexicalBlockFileNode(0), CUNode(0) {
177    Finder.processModule(M);
178    visit(&M);
179  }
180
181  ~DIUpdater() { Builder.finalize(); }
182
183  void visitModule(Module &M) {
184    if (Finder.compile_unit_count() > 1)
185      report_fatal_error("DebugIR pass supports only a signle compile unit per "
186                         "Module.");
187    createCompileUnit(
188        Finder.compile_unit_count() == 1 ? *Finder.compile_unit_begin() : 0);
189  }
190
191  void visitFunction(Function &F) {
192    if (F.isDeclaration() || findDISubprogram(&F))
193      return;
194
195    StringRef MangledName = F.getName();
196    DICompositeType Sig = createFunctionSignature(&F);
197
198    // find line of function declaration
199    unsigned Line = 0;
200    if (!findLine(&F, Line)) {
201      DEBUG(dbgs() << "WARNING: No line for Function " << F.getName().str()
202                   << "\n");
203      return;
204    }
205
206    Instruction *FirstInst = F.begin()->begin();
207    unsigned ScopeLine = 0;
208    if (!findLine(FirstInst, ScopeLine)) {
209      DEBUG(dbgs() << "WARNING: No line for 1st Instruction in Function "
210                   << F.getName().str() << "\n");
211      return;
212    }
213
214    bool Local = F.hasInternalLinkage();
215    bool IsDefinition = !F.isDeclaration();
216    bool IsOptimized = false;
217
218    int FuncFlags = llvm::DIDescriptor::FlagPrototyped;
219    assert(CUNode && FileNode);
220    DISubprogram Sub = Builder.createFunction(
221        DICompileUnit(CUNode), F.getName(), MangledName, DIFile(FileNode), Line,
222        Sig, Local, IsDefinition, ScopeLine, FuncFlags, IsOptimized, &F);
223    assert(Sub.isSubprogram());
224    DEBUG(dbgs() << "create subprogram mdnode " << *Sub << ": "
225                 << "\n");
226
227    SubprogramDescriptors.insert(std::make_pair(&F, Sub));
228  }
229
230  void visitInstruction(Instruction &I) {
231    DebugLoc Loc(I.getDebugLoc());
232
233    /// If a ValueToValueMap is provided, use it to get the real instruction as
234    /// the line table was generated on a clone of the module on which we are
235    /// operating.
236    Value *RealInst = 0;
237    if (VMap)
238      RealInst = VMap->lookup(&I);
239
240    if (!RealInst)
241      RealInst = &I;
242
243    unsigned Col = 0; // FIXME: support columns
244    unsigned Line;
245    if (!LineTable.getLine(RealInst, Line)) {
246      // Instruction has no line, it may have been removed (in the module that
247      // will be passed to the debugger) so there is nothing to do here.
248      DEBUG(dbgs() << "WARNING: no LineTable entry for instruction " << RealInst
249                   << "\n");
250      DEBUG(RealInst->dump());
251      return;
252    }
253
254    DebugLoc NewLoc;
255    if (!Loc.isUnknown())
256      // I had a previous debug location: re-use the DebugLoc
257      NewLoc = DebugLoc::get(Line, Col, Loc.getScope(RealInst->getContext()),
258                             Loc.getInlinedAt(RealInst->getContext()));
259    else if (MDNode *scope = findScope(&I))
260      NewLoc = DebugLoc::get(Line, Col, scope, 0);
261    else {
262      DEBUG(dbgs() << "WARNING: no valid scope for instruction " << &I
263                   << ". no DebugLoc will be present."
264                   << "\n");
265      return;
266    }
267
268    addDebugLocation(I, NewLoc);
269  }
270
271private:
272
273  void createCompileUnit(MDNode *CUToReplace) {
274    std::string Flags;
275    bool IsOptimized = false;
276    StringRef Producer;
277    unsigned RuntimeVersion(0);
278    StringRef SplitName;
279
280    if (CUToReplace) {
281      // save fields from existing CU to re-use in the new CU
282      DICompileUnit ExistingCU(CUToReplace);
283      Producer = ExistingCU.getProducer();
284      IsOptimized = ExistingCU.isOptimized();
285      Flags = ExistingCU.getFlags();
286      RuntimeVersion = ExistingCU.getRunTimeVersion();
287      SplitName = ExistingCU.getSplitDebugFilename();
288    } else {
289      Producer =
290          "LLVM Version " STR(LLVM_VERSION_MAJOR) "." STR(LLVM_VERSION_MINOR);
291    }
292
293    CUNode =
294        Builder.createCompileUnit(dwarf::DW_LANG_C99, Filename, Directory,
295                                  Producer, IsOptimized, Flags, RuntimeVersion);
296
297    if (CUToReplace)
298      CUToReplace->replaceAllUsesWith(const_cast<MDNode *>(CUNode));
299
300    DICompileUnit CU(CUNode);
301    FileNode = Builder.createFile(Filename, Directory);
302    LexicalBlockFileNode = Builder.createLexicalBlockFile(CU, DIFile(FileNode));
303  }
304
305  /// Returns the MDNode* that represents the DI scope to associate with I
306  MDNode *findScope(const Instruction *I) {
307    const Function *F = I->getParent()->getParent();
308    if (MDNode *ret = findDISubprogram(F))
309      return ret;
310
311    DEBUG(dbgs() << "WARNING: Using fallback lexical block file scope "
312                 << LexicalBlockFileNode << " as scope for instruction " << I
313                 << "\n");
314    return LexicalBlockFileNode;
315  }
316
317  /// Returns the MDNode* that is the descriptor for F
318  MDNode *findDISubprogram(const Function *F) {
319    typedef ValueMap<const Function *, MDNode *>::const_iterator FuncNodeIter;
320    FuncNodeIter i = SubprogramDescriptors.find(F);
321    if (i != SubprogramDescriptors.end())
322      return i->second;
323
324    DEBUG(dbgs() << "searching for DI scope node for Function " << F
325                 << " in a list of " << Finder.subprogram_count()
326                 << " subprogram nodes"
327                 << "\n");
328
329    for (DebugInfoFinder::iterator i = Finder.subprogram_begin(),
330                                   e = Finder.subprogram_end();
331         i != e; ++i) {
332      DISubprogram S(*i);
333      if (S.getFunction() == F) {
334        DEBUG(dbgs() << "Found DISubprogram " << *i << " for function "
335                     << S.getFunction() << "\n");
336        return *i;
337      }
338    }
339    DEBUG(dbgs() << "unable to find DISubprogram node for function "
340                 << F->getName().str() << "\n");
341    return 0;
342  }
343
344  /// Sets Line to the line number on which V appears and returns true. If a
345  /// line location for V is not found, returns false.
346  bool findLine(const Value *V, unsigned &Line) {
347    if (LineTable.getLine(V, Line))
348      return true;
349
350    if (VMap) {
351      Value *mapped = VMap->lookup(V);
352      if (mapped && LineTable.getLine(mapped, Line))
353        return true;
354    }
355    return false;
356  }
357
358  std::string getTypeName(Type *T) {
359    std::string TypeName;
360    raw_string_ostream TypeStream(TypeName);
361    T->print(TypeStream);
362    TypeStream.flush();
363    return TypeName;
364  }
365
366  /// Returns the MDNode that represents type T if it is already created, or 0
367  /// if it is not.
368  MDNode *getType(const Type *T) {
369    typedef DenseMap<const Type *, MDNode *>::const_iterator TypeNodeIter;
370    TypeNodeIter i = TypeDescriptors.find(T);
371    if (i != TypeDescriptors.end())
372      return i->second;
373    return 0;
374  }
375
376  /// Returns a DebugInfo type from an LLVM type T.
377  DIDerivedType getOrCreateType(Type *T) {
378    MDNode *N = getType(T);
379    if (N)
380      return DIDerivedType(N);
381    else if (T->isVoidTy())
382      return DIDerivedType(0);
383    else if (T->isStructTy()) {
384      N = Builder.createStructType(
385          DIScope(LexicalBlockFileNode), T->getStructName(), DIFile(FileNode),
386          0, Layout.getTypeSizeInBits(T), Layout.getABITypeAlignment(T), 0,
387          DIType(0), DIArray(0)); // filled in later
388
389      // N is added to the map (early) so that element search below can find it,
390      // so as to avoid infinite recursion for structs that contain pointers to
391      // their own type.
392      TypeDescriptors[T] = N;
393      DICompositeType StructDescriptor(N);
394
395      SmallVector<Value *, 4> Elements;
396      for (unsigned i = 0; i < T->getStructNumElements(); ++i)
397        Elements.push_back(getOrCreateType(T->getStructElementType(i)));
398
399      // set struct elements
400      StructDescriptor.setTypeArray(Builder.getOrCreateArray(Elements));
401    } else if (T->isPointerTy()) {
402      Type *PointeeTy = T->getPointerElementType();
403      if (!(N = getType(PointeeTy)))
404        N = Builder.createPointerType(
405            getOrCreateType(PointeeTy), Layout.getPointerTypeSizeInBits(T),
406            Layout.getPrefTypeAlignment(T), getTypeName(T));
407    } else if (T->isArrayTy()) {
408      SmallVector<Value *, 1> Subrange;
409      Subrange.push_back(
410          Builder.getOrCreateSubrange(0, T->getArrayNumElements() - 1));
411
412      N = Builder.createArrayType(Layout.getTypeSizeInBits(T),
413                                  Layout.getPrefTypeAlignment(T),
414                                  getOrCreateType(T->getArrayElementType()),
415                                  Builder.getOrCreateArray(Subrange));
416    } else {
417      int encoding = llvm::dwarf::DW_ATE_signed;
418      if (T->isIntegerTy())
419        encoding = llvm::dwarf::DW_ATE_unsigned;
420      else if (T->isFloatingPointTy())
421        encoding = llvm::dwarf::DW_ATE_float;
422
423      N = Builder.createBasicType(getTypeName(T), T->getPrimitiveSizeInBits(),
424                                  0, encoding);
425    }
426    TypeDescriptors[T] = N;
427    return DIDerivedType(N);
428  }
429
430  /// Returns a DebugInfo type that represents a function signature for Func.
431  DICompositeType createFunctionSignature(const Function *Func) {
432    SmallVector<Value *, 4> Params;
433    DIDerivedType ReturnType(getOrCreateType(Func->getReturnType()));
434    Params.push_back(ReturnType);
435
436    const Function::ArgumentListType &Args(Func->getArgumentList());
437    for (Function::ArgumentListType::const_iterator i = Args.begin(),
438                                                    e = Args.end();
439         i != e; ++i) {
440      Type *T(i->getType());
441      Params.push_back(getOrCreateType(T));
442    }
443
444    DIArray ParamArray = Builder.getOrCreateArray(Params);
445    return Builder.createSubroutineType(DIFile(FileNode), ParamArray);
446  }
447
448  /// Associates Instruction I with debug location Loc.
449  void addDebugLocation(Instruction &I, DebugLoc Loc) {
450    MDNode *MD = Loc.getAsMDNode(I.getContext());
451    I.setMetadata(LLVMContext::MD_dbg, MD);
452  }
453};
454
455/// Sets Filename/Directory from the Module identifier and returns true, or
456/// false if source information is not present.
457bool getSourceInfoFromModule(const Module &M, std::string &Directory,
458                             std::string &Filename) {
459  std::string PathStr(M.getModuleIdentifier());
460  if (PathStr.length() == 0 || PathStr == "<stdin>")
461    return false;
462
463  Filename = sys::path::filename(PathStr);
464  SmallVector<char, 16> Path(PathStr.begin(), PathStr.end());
465  sys::path::remove_filename(Path);
466  Directory = StringRef(Path.data(), Path.size());
467  return true;
468}
469
470// Sets Filename/Directory from debug information in M and returns true, or
471// false if no debug information available, or cannot be parsed.
472bool getSourceInfoFromDI(const Module &M, std::string &Directory,
473                         std::string &Filename) {
474  NamedMDNode *CUNode = M.getNamedMetadata("llvm.dbg.cu");
475  if (!CUNode || CUNode->getNumOperands() == 0)
476    return false;
477
478  DICompileUnit CU(CUNode->getOperand(0));
479  if (!CU.Verify())
480    return false;
481
482  Filename = CU.getFilename();
483  Directory = CU.getDirectory();
484  return true;
485}
486
487} // anonymous namespace
488
489namespace llvm {
490
491bool DebugIR::getSourceInfo(const Module &M) {
492  ParsedPath = getSourceInfoFromDI(M, Directory, Filename) ||
493               getSourceInfoFromModule(M, Directory, Filename);
494  return ParsedPath;
495}
496
497bool DebugIR::updateExtension(StringRef NewExtension) {
498  size_t dot = Filename.find_last_of(".");
499  if (dot == std::string::npos)
500    return false;
501
502  Filename.erase(dot);
503  Filename += NewExtension.str();
504  return true;
505}
506
507void DebugIR::generateFilename(OwningPtr<int> &fd) {
508  SmallVector<char, 16> PathVec;
509  fd.reset(new int);
510  sys::fs::createTemporaryFile("debug-ir", "ll", *fd, PathVec);
511  StringRef Path(PathVec.data(), PathVec.size());
512  Filename = sys::path::filename(Path);
513  sys::path::remove_filename(PathVec);
514  Directory = StringRef(PathVec.data(), PathVec.size());
515
516  GeneratedPath = true;
517}
518
519std::string DebugIR::getPath() {
520  SmallVector<char, 16> Path;
521  sys::path::append(Path, Directory, Filename);
522  Path.resize(Filename.size() + Directory.size() + 2);
523  Path[Filename.size() + Directory.size() + 1] = '\0';
524  return std::string(Path.data());
525}
526
527void DebugIR::writeDebugBitcode(const Module *M, int *fd) {
528  OwningPtr<raw_fd_ostream> Out;
529  std::string error;
530
531  if (!fd) {
532    std::string Path = getPath();
533    Out.reset(new raw_fd_ostream(Path.c_str(), error));
534    DEBUG(dbgs() << "WRITING debug bitcode from Module " << M << " to file "
535                 << Path << "\n");
536  } else {
537    DEBUG(dbgs() << "WRITING debug bitcode from Module " << M << " to fd "
538                 << *fd << "\n");
539    Out.reset(new raw_fd_ostream(*fd, true));
540  }
541
542  M->print(*Out, 0);
543  Out->close();
544}
545
546void DebugIR::createDebugInfo(Module &M, OwningPtr<Module> &DisplayM) {
547  if (M.getFunctionList().size() == 0)
548    // no functions -- no debug info needed
549    return;
550
551  OwningPtr<ValueToValueMapTy> VMap;
552
553  if (WriteSourceToDisk && (HideDebugIntrinsics || HideDebugMetadata)) {
554    VMap.reset(new ValueToValueMapTy);
555    DisplayM.reset(CloneModule(&M, *VMap));
556
557    if (HideDebugIntrinsics)
558      DebugIntrinsicsRemover::process(*DisplayM);
559
560    if (HideDebugMetadata)
561      DebugMetadataRemover::process(*DisplayM);
562  }
563
564  DIUpdater R(M, Filename, Directory, DisplayM.get(), VMap.get());
565}
566
567bool DebugIR::isMissingPath() { return Filename.empty() || Directory.empty(); }
568
569bool DebugIR::runOnModule(Module &M) {
570  OwningPtr<int> fd;
571
572  if (isMissingPath() && !getSourceInfo(M)) {
573    if (!WriteSourceToDisk)
574      report_fatal_error("DebugIR unable to determine file name in input. "
575                         "Ensure Module contains an identifier, a valid "
576                         "DICompileUnit, or construct DebugIR with "
577                         "non-empty Filename/Directory parameters.");
578    else
579      generateFilename(fd);
580  }
581
582  if (!GeneratedPath && WriteSourceToDisk)
583    updateExtension(".debug-ll");
584
585  // Clear line numbers. Keep debug info (if any) if we were able to read the
586  // file name from the DICompileUnit descriptor.
587  DebugMetadataRemover::process(M, !ParsedPath);
588
589  OwningPtr<Module> DisplayM;
590  createDebugInfo(M, DisplayM);
591  if (WriteSourceToDisk) {
592    Module *OutputM = DisplayM.get() ? DisplayM.get() : &M;
593    writeDebugBitcode(OutputM, fd.get());
594  }
595
596  DEBUG(M.dump());
597  return true;
598}
599
600bool DebugIR::runOnModule(Module &M, std::string &Path) {
601  bool result = runOnModule(M);
602  Path = getPath();
603  return result;
604}
605
606} // llvm namespace
607
608char DebugIR::ID = 0;
609INITIALIZE_PASS(DebugIR, "debug-ir", "Enable debugging IR", false, false)
610
611ModulePass *llvm::createDebugIRPass(bool HideDebugIntrinsics,
612                                    bool HideDebugMetadata, StringRef Directory,
613                                    StringRef Filename) {
614  return new DebugIR(HideDebugIntrinsics, HideDebugMetadata, Directory,
615                     Filename);
616}
617
618ModulePass *llvm::createDebugIRPass() { return new DebugIR(); }
619