SymbolizableObjectFile.cpp revision 360784
1//===- SymbolizableObjectFile.cpp -----------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Implementation of SymbolizableObjectFile class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "SymbolizableObjectFile.h"
14#include "llvm/ADT/STLExtras.h"
15#include "llvm/ADT/StringRef.h"
16#include "llvm/ADT/Triple.h"
17#include "llvm/BinaryFormat/COFF.h"
18#include "llvm/DebugInfo/DWARF/DWARFContext.h"
19#include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
20#include "llvm/Object/COFF.h"
21#include "llvm/Object/ObjectFile.h"
22#include "llvm/Object/SymbolSize.h"
23#include "llvm/Support/Casting.h"
24#include "llvm/Support/DataExtractor.h"
25#include "llvm/Support/Error.h"
26#include <algorithm>
27#include <cstdint>
28#include <memory>
29#include <string>
30#include <system_error>
31#include <utility>
32#include <vector>
33
34using namespace llvm;
35using namespace object;
36using namespace symbolize;
37
38static DILineInfoSpecifier
39getDILineInfoSpecifier(FunctionNameKind FNKind) {
40  return DILineInfoSpecifier(
41      DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, FNKind);
42}
43
44ErrorOr<std::unique_ptr<SymbolizableObjectFile>>
45SymbolizableObjectFile::create(const object::ObjectFile *Obj,
46                               std::unique_ptr<DIContext> DICtx,
47                               bool UntagAddresses) {
48  assert(DICtx);
49  std::unique_ptr<SymbolizableObjectFile> res(
50      new SymbolizableObjectFile(Obj, std::move(DICtx), UntagAddresses));
51  std::unique_ptr<DataExtractor> OpdExtractor;
52  uint64_t OpdAddress = 0;
53  // Find the .opd (function descriptor) section if any, for big-endian
54  // PowerPC64 ELF.
55  if (Obj->getArch() == Triple::ppc64) {
56    for (section_iterator Section : Obj->sections()) {
57      Expected<StringRef> NameOrErr = Section->getName();
58      if (!NameOrErr)
59        return errorToErrorCode(NameOrErr.takeError());
60
61      if (*NameOrErr == ".opd") {
62        Expected<StringRef> E = Section->getContents();
63        if (!E)
64          return errorToErrorCode(E.takeError());
65        OpdExtractor.reset(new DataExtractor(*E, Obj->isLittleEndian(),
66                                             Obj->getBytesInAddress()));
67        OpdAddress = Section->getAddress();
68        break;
69      }
70    }
71  }
72  std::vector<std::pair<SymbolRef, uint64_t>> Symbols =
73      computeSymbolSizes(*Obj);
74  for (auto &P : Symbols)
75    res->addSymbol(P.first, P.second, OpdExtractor.get(), OpdAddress);
76
77  // If this is a COFF object and we didn't find any symbols, try the export
78  // table.
79  if (Symbols.empty()) {
80    if (auto *CoffObj = dyn_cast<COFFObjectFile>(Obj))
81      if (auto EC = res->addCoffExportSymbols(CoffObj))
82        return EC;
83  }
84
85  std::vector<std::pair<SymbolDesc, StringRef>> &Fs = res->Functions,
86                                                &Os = res->Objects;
87  auto Uniquify = [](std::vector<std::pair<SymbolDesc, StringRef>> &S) {
88    // Sort by (Addr,Size,Name). If several SymbolDescs share the same Addr,
89    // pick the one with the largest Size. This helps us avoid symbols with no
90    // size information (Size=0).
91    llvm::sort(S);
92    auto I = S.begin(), E = S.end(), J = S.begin();
93    while (I != E) {
94      auto OI = I;
95      while (++I != E && OI->first.Addr == I->first.Addr) {
96      }
97      *J++ = I[-1];
98    }
99    S.erase(J, S.end());
100  };
101  Uniquify(Fs);
102  Uniquify(Os);
103
104  return std::move(res);
105}
106
107SymbolizableObjectFile::SymbolizableObjectFile(const ObjectFile *Obj,
108                                               std::unique_ptr<DIContext> DICtx,
109                                               bool UntagAddresses)
110    : Module(Obj), DebugInfoContext(std::move(DICtx)),
111      UntagAddresses(UntagAddresses) {}
112
113namespace {
114
115struct OffsetNamePair {
116  uint32_t Offset;
117  StringRef Name;
118
119  bool operator<(const OffsetNamePair &R) const {
120    return Offset < R.Offset;
121  }
122};
123
124} // end anonymous namespace
125
126std::error_code SymbolizableObjectFile::addCoffExportSymbols(
127    const COFFObjectFile *CoffObj) {
128  // Get all export names and offsets.
129  std::vector<OffsetNamePair> ExportSyms;
130  for (const ExportDirectoryEntryRef &Ref : CoffObj->export_directories()) {
131    StringRef Name;
132    uint32_t Offset;
133    if (auto EC = Ref.getSymbolName(Name))
134      return EC;
135    if (auto EC = Ref.getExportRVA(Offset))
136      return EC;
137    ExportSyms.push_back(OffsetNamePair{Offset, Name});
138  }
139  if (ExportSyms.empty())
140    return std::error_code();
141
142  // Sort by ascending offset.
143  array_pod_sort(ExportSyms.begin(), ExportSyms.end());
144
145  // Approximate the symbol sizes by assuming they run to the next symbol.
146  // FIXME: This assumes all exports are functions.
147  uint64_t ImageBase = CoffObj->getImageBase();
148  for (auto I = ExportSyms.begin(), E = ExportSyms.end(); I != E; ++I) {
149    OffsetNamePair &Export = *I;
150    // FIXME: The last export has a one byte size now.
151    uint32_t NextOffset = I != E ? I->Offset : Export.Offset + 1;
152    uint64_t SymbolStart = ImageBase + Export.Offset;
153    uint64_t SymbolSize = NextOffset - Export.Offset;
154    SymbolDesc SD = {SymbolStart, SymbolSize};
155    Functions.emplace_back(SD, Export.Name);
156  }
157  return std::error_code();
158}
159
160std::error_code SymbolizableObjectFile::addSymbol(const SymbolRef &Symbol,
161                                                  uint64_t SymbolSize,
162                                                  DataExtractor *OpdExtractor,
163                                                  uint64_t OpdAddress) {
164  // Avoid adding symbols from an unknown/undefined section.
165  const ObjectFile *Obj = Symbol.getObject();
166  Expected<section_iterator> Sec = Symbol.getSection();
167  if (!Sec || (Obj && Obj->section_end() == *Sec))
168    return std::error_code();
169  Expected<SymbolRef::Type> SymbolTypeOrErr = Symbol.getType();
170  if (!SymbolTypeOrErr)
171    return errorToErrorCode(SymbolTypeOrErr.takeError());
172  SymbolRef::Type SymbolType = *SymbolTypeOrErr;
173  if (SymbolType != SymbolRef::ST_Function && SymbolType != SymbolRef::ST_Data)
174    return std::error_code();
175  Expected<uint64_t> SymbolAddressOrErr = Symbol.getAddress();
176  if (!SymbolAddressOrErr)
177    return errorToErrorCode(SymbolAddressOrErr.takeError());
178  uint64_t SymbolAddress = *SymbolAddressOrErr;
179  if (UntagAddresses) {
180    // For kernel addresses, bits 56-63 need to be set, so we sign extend bit 55
181    // into bits 56-63 instead of masking them out.
182    SymbolAddress &= (1ull << 56) - 1;
183    SymbolAddress = (int64_t(SymbolAddress) << 8) >> 8;
184  }
185  if (OpdExtractor) {
186    // For big-endian PowerPC64 ELF, symbols in the .opd section refer to
187    // function descriptors. The first word of the descriptor is a pointer to
188    // the function's code.
189    // For the purposes of symbolization, pretend the symbol's address is that
190    // of the function's code, not the descriptor.
191    uint64_t OpdOffset = SymbolAddress - OpdAddress;
192    if (OpdExtractor->isValidOffsetForAddress(OpdOffset))
193      SymbolAddress = OpdExtractor->getAddress(&OpdOffset);
194  }
195  Expected<StringRef> SymbolNameOrErr = Symbol.getName();
196  if (!SymbolNameOrErr)
197    return errorToErrorCode(SymbolNameOrErr.takeError());
198  StringRef SymbolName = *SymbolNameOrErr;
199  // Mach-O symbol table names have leading underscore, skip it.
200  if (Module->isMachO() && !SymbolName.empty() && SymbolName[0] == '_')
201    SymbolName = SymbolName.drop_front();
202  // FIXME: If a function has alias, there are two entries in symbol table
203  // with same address size. Make sure we choose the correct one.
204  auto &M = SymbolType == SymbolRef::ST_Function ? Functions : Objects;
205  SymbolDesc SD = { SymbolAddress, SymbolSize };
206  M.emplace_back(SD, SymbolName);
207  return std::error_code();
208}
209
210// Return true if this is a 32-bit x86 PE COFF module.
211bool SymbolizableObjectFile::isWin32Module() const {
212  auto *CoffObject = dyn_cast<COFFObjectFile>(Module);
213  return CoffObject && CoffObject->getMachine() == COFF::IMAGE_FILE_MACHINE_I386;
214}
215
216uint64_t SymbolizableObjectFile::getModulePreferredBase() const {
217  if (auto *CoffObject = dyn_cast<COFFObjectFile>(Module))
218    return CoffObject->getImageBase();
219  return 0;
220}
221
222bool SymbolizableObjectFile::getNameFromSymbolTable(SymbolRef::Type Type,
223                                                    uint64_t Address,
224                                                    std::string &Name,
225                                                    uint64_t &Addr,
226                                                    uint64_t &Size) const {
227  const auto &Symbols = Type == SymbolRef::ST_Function ? Functions : Objects;
228  std::pair<SymbolDesc, StringRef> SD{{Address, UINT64_C(-1)}, StringRef()};
229  auto SymbolIterator = llvm::upper_bound(Symbols, SD);
230  if (SymbolIterator == Symbols.begin())
231    return false;
232  --SymbolIterator;
233  if (SymbolIterator->first.Size != 0 &&
234      SymbolIterator->first.Addr + SymbolIterator->first.Size <= Address)
235    return false;
236  Name = SymbolIterator->second.str();
237  Addr = SymbolIterator->first.Addr;
238  Size = SymbolIterator->first.Size;
239  return true;
240}
241
242bool SymbolizableObjectFile::shouldOverrideWithSymbolTable(
243    FunctionNameKind FNKind, bool UseSymbolTable) const {
244  // When DWARF is used with -gline-tables-only / -gmlt, the symbol table gives
245  // better answers for linkage names than the DIContext. Otherwise, we are
246  // probably using PEs and PDBs, and we shouldn't do the override. PE files
247  // generally only contain the names of exported symbols.
248  return FNKind == FunctionNameKind::LinkageName && UseSymbolTable &&
249         isa<DWARFContext>(DebugInfoContext.get());
250}
251
252DILineInfo
253SymbolizableObjectFile::symbolizeCode(object::SectionedAddress ModuleOffset,
254                                      FunctionNameKind FNKind,
255                                      bool UseSymbolTable) const {
256  if (ModuleOffset.SectionIndex == object::SectionedAddress::UndefSection)
257    ModuleOffset.SectionIndex =
258        getModuleSectionIndexForAddress(ModuleOffset.Address);
259  DILineInfo LineInfo = DebugInfoContext->getLineInfoForAddress(
260      ModuleOffset, getDILineInfoSpecifier(FNKind));
261
262  // Override function name from symbol table if necessary.
263  if (shouldOverrideWithSymbolTable(FNKind, UseSymbolTable)) {
264    std::string FunctionName;
265    uint64_t Start, Size;
266    if (getNameFromSymbolTable(SymbolRef::ST_Function, ModuleOffset.Address,
267                               FunctionName, Start, Size)) {
268      LineInfo.FunctionName = FunctionName;
269    }
270  }
271  return LineInfo;
272}
273
274DIInliningInfo SymbolizableObjectFile::symbolizeInlinedCode(
275    object::SectionedAddress ModuleOffset, FunctionNameKind FNKind,
276    bool UseSymbolTable) const {
277  if (ModuleOffset.SectionIndex == object::SectionedAddress::UndefSection)
278    ModuleOffset.SectionIndex =
279        getModuleSectionIndexForAddress(ModuleOffset.Address);
280  DIInliningInfo InlinedContext = DebugInfoContext->getInliningInfoForAddress(
281      ModuleOffset, getDILineInfoSpecifier(FNKind));
282
283  // Make sure there is at least one frame in context.
284  if (InlinedContext.getNumberOfFrames() == 0)
285    InlinedContext.addFrame(DILineInfo());
286
287  // Override the function name in lower frame with name from symbol table.
288  if (shouldOverrideWithSymbolTable(FNKind, UseSymbolTable)) {
289    std::string FunctionName;
290    uint64_t Start, Size;
291    if (getNameFromSymbolTable(SymbolRef::ST_Function, ModuleOffset.Address,
292                               FunctionName, Start, Size)) {
293      InlinedContext.getMutableFrame(InlinedContext.getNumberOfFrames() - 1)
294          ->FunctionName = FunctionName;
295    }
296  }
297
298  return InlinedContext;
299}
300
301DIGlobal SymbolizableObjectFile::symbolizeData(
302    object::SectionedAddress ModuleOffset) const {
303  DIGlobal Res;
304  getNameFromSymbolTable(SymbolRef::ST_Data, ModuleOffset.Address, Res.Name,
305                         Res.Start, Res.Size);
306  return Res;
307}
308
309std::vector<DILocal> SymbolizableObjectFile::symbolizeFrame(
310    object::SectionedAddress ModuleOffset) const {
311  if (ModuleOffset.SectionIndex == object::SectionedAddress::UndefSection)
312    ModuleOffset.SectionIndex =
313        getModuleSectionIndexForAddress(ModuleOffset.Address);
314  return DebugInfoContext->getLocalsForAddress(ModuleOffset);
315}
316
317/// Search for the first occurence of specified Address in ObjectFile.
318uint64_t SymbolizableObjectFile::getModuleSectionIndexForAddress(
319    uint64_t Address) const {
320
321  for (SectionRef Sec : Module->sections()) {
322    if (!Sec.isText() || Sec.isVirtual())
323      continue;
324
325    if (Address >= Sec.getAddress() &&
326        Address < Sec.getAddress() + Sec.getSize())
327      return Sec.getIndex();
328  }
329
330  return object::SectionedAddress::UndefSection;
331}
332