GsymReader.h revision 360784
1//===- GsymReader.h ---------------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_DEBUGINFO_GSYM_GSYMREADER_H
10#define LLVM_DEBUGINFO_GSYM_GSYMREADER_H
11
12
13#include "llvm/ADT/ArrayRef.h"
14#include "llvm/DebugInfo/GSYM/FileEntry.h"
15#include "llvm/DebugInfo/GSYM/FunctionInfo.h"
16#include "llvm/DebugInfo/GSYM/Header.h"
17#include "llvm/DebugInfo/GSYM/LineEntry.h"
18#include "llvm/DebugInfo/GSYM/StringTable.h"
19#include "llvm/Support/DataExtractor.h"
20#include "llvm/Support/Endian.h"
21#include "llvm/Support/ErrorOr.h"
22
23#include <inttypes.h>
24#include <memory>
25#include <stdint.h>
26#include <string>
27#include <vector>
28
29namespace llvm {
30class MemoryBuffer;
31class raw_ostream;
32
33namespace gsym {
34
35/// GsymReader is used to read GSYM data from a file or buffer.
36///
37/// This class is optimized for very quick lookups when the endianness matches
38/// the host system. The Header, address table, address info offsets, and file
39/// table is designed to be mmap'ed as read only into memory and used without
40/// any parsing needed. If the endianness doesn't match, we swap these objects
41/// and tables into GsymReader::SwappedData and then point our header and
42/// ArrayRefs to this swapped internal data.
43///
44/// GsymReader objects must use one of the static functions to create an
45/// instance: GsymReader::openFile(...) and GsymReader::copyBuffer(...).
46
47class GsymReader {
48  GsymReader(std::unique_ptr<MemoryBuffer> Buffer);
49  llvm::Error parse();
50
51  std::unique_ptr<MemoryBuffer> MemBuffer;
52  StringRef GsymBytes;
53  llvm::support::endianness Endian;
54  const Header *Hdr = nullptr;
55  ArrayRef<uint8_t> AddrOffsets;
56  ArrayRef<uint32_t> AddrInfoOffsets;
57  ArrayRef<FileEntry> Files;
58  StringTable StrTab;
59  /// When the GSYM file's endianness doesn't match the host system then
60  /// we must decode all data structures that need to be swapped into
61  /// local storage and set point the ArrayRef objects above to these swapped
62  /// copies.
63  struct SwappedData {
64    Header Hdr;
65    std::vector<uint8_t> AddrOffsets;
66    std::vector<uint32_t> AddrInfoOffsets;
67    std::vector<FileEntry> Files;
68  };
69  std::unique_ptr<SwappedData> Swap;
70
71public:
72  GsymReader(GsymReader &&RHS);
73  ~GsymReader();
74
75  /// Construct a GsymReader from a file on disk.
76  ///
77  /// \param Path The file path the GSYM file to read.
78  /// \returns An expected GsymReader that contains the object or an error
79  /// object that indicates reason for failing to read the GSYM.
80  static llvm::Expected<GsymReader> openFile(StringRef Path);
81
82  /// Construct a GsymReader from a buffer.
83  ///
84  /// \param Bytes A set of bytes that will be copied and owned by the
85  /// returned object on success.
86  /// \returns An expected GsymReader that contains the object or an error
87  /// object that indicates reason for failing to read the GSYM.
88  static llvm::Expected<GsymReader> copyBuffer(StringRef Bytes);
89
90  /// Access the GSYM header.
91  /// \returns A native endian version of the GSYM header.
92  const Header &getHeader() const;
93
94  /// Get the full function info for an address.
95  ///
96  /// This should be called when a client will store a copy of the complete
97  /// FunctionInfo for a given address. For one off lookups, use the lookup()
98  /// function below.
99  ///
100  /// Symbolication server processes might want to parse the entire function
101  /// info for a given address and cache it if the process stays around to
102  /// service many symbolication addresses, like for parsing profiling
103  /// information.
104  ///
105  /// \param Addr A virtual address from the orignal object file to lookup.
106  ///
107  /// \returns An expected FunctionInfo that contains the function info object
108  /// or an error object that indicates reason for failing to lookup the
109  /// address.
110  llvm::Expected<FunctionInfo> getFunctionInfo(uint64_t Addr) const;
111
112  /// Lookup an address in the a GSYM.
113  ///
114  /// Lookup just the information needed for a specific address \a Addr. This
115  /// function is faster that calling getFunctionInfo() as it will only return
116  /// information that pertains to \a Addr and allows the parsing to skip any
117  /// extra information encoded for other addresses. For example the line table
118  /// parsing can stop when a matching LineEntry has been fouhnd, and the
119  /// InlineInfo can stop parsing early once a match has been found and also
120  /// skip information that doesn't match. This avoids memory allocations and
121  /// is much faster for lookups.
122  ///
123  /// \param Addr A virtual address from the orignal object file to lookup.
124  /// \returns An expected LookupResult that contains only the information
125  /// needed for the current address, or an error object that indicates reason
126  /// for failing to lookup the address.
127  llvm::Expected<LookupResult> lookup(uint64_t Addr) const;
128
129  /// Get a string from the string table.
130  ///
131  /// \param Offset The string table offset for the string to retrieve.
132  /// \returns The string from the strin table.
133  StringRef getString(uint32_t Offset) const { return StrTab[Offset]; }
134
135  /// Get the a file entry for the suppplied file index.
136  ///
137  /// Used to convert any file indexes in the FunctionInfo data back into
138  /// files. This function can be used for iteration, but is more commonly used
139  /// for random access when doing lookups.
140  ///
141  /// \param Index An index into the file table.
142  /// \returns An optional FileInfo that will be valid if the file index is
143  /// valid, or llvm::None if the file index is out of bounds,
144  Optional<FileEntry> getFile(uint32_t Index) const {
145    if (Index < Files.size())
146      return Files[Index];
147    return llvm::None;
148  }
149
150protected:
151  /// Gets an address from the address table.
152  ///
153  /// Addresses are stored as offsets frrom the gsym::Header::BaseAddress.
154  ///
155  /// \param Index A index into the address table.
156  /// \returns A resolved virtual address for adddress in the address table
157  /// or llvm::None if Index is out of bounds.
158  Optional<uint64_t> getAddress(size_t Index) const;
159
160  /// Get an appropriate address info offsets array.
161  ///
162  /// The address table in the GSYM file is stored as array of 1, 2, 4 or 8
163  /// byte offsets from the The gsym::Header::BaseAddress. The table is stored
164  /// internally as a array of bytes that are in the correct endianness. When
165  /// we access this table we must get an array that matches those sizes. This
166  /// templatized helper function is used when accessing address offsets in the
167  /// AddrOffsets member variable.
168  ///
169  /// \returns An ArrayRef of an appropriate address offset size.
170  template <class T> ArrayRef<T>
171  getAddrOffsets() const {
172    return ArrayRef<T>(reinterpret_cast<const T *>(AddrOffsets.data()),
173                       AddrOffsets.size()/sizeof(T));
174  }
175
176  /// Get an appropriate address from the address table.
177  ///
178  /// The address table in the GSYM file is stored as array of 1, 2, 4 or 8
179  /// byte address offsets from the The gsym::Header::BaseAddress. The table is
180  /// stored internally as a array of bytes that are in the correct endianness.
181  /// In order to extract an address from the address table we must access the
182  /// address offset using the correct size and then add it to the BaseAddress
183  /// in the header.
184  ///
185  /// \param Index An index into the AddrOffsets array.
186  /// \returns An virtual address that matches the original object file for the
187  /// address as the specified index, or llvm::None if Index is out of bounds.
188  template <class T> Optional<uint64_t>
189  addressForIndex(size_t Index) const {
190    ArrayRef<T> AIO = getAddrOffsets<T>();
191    if (Index < AIO.size())
192      return AIO[Index] + Hdr->BaseAddress;
193    return llvm::None;
194  }
195  /// Lookup an address offset in the AddrOffsets table.
196  ///
197  /// Given an address offset, look it up using a binary search of the
198  /// AddrOffsets table.
199  ///
200  /// \param AddrOffset An address offset, that has already been computed by
201  /// subtracting the gsym::Header::BaseAddress.
202  /// \returns The matching address offset index. This index will be used to
203  /// extract the FunctionInfo data's offset from the AddrInfoOffsets array.
204  template <class T>
205  uint64_t getAddressOffsetIndex(const uint64_t AddrOffset) const {
206    ArrayRef<T> AIO = getAddrOffsets<T>();
207    const auto Begin = AIO.begin();
208    const auto End = AIO.end();
209    auto Iter = std::lower_bound(Begin, End, AddrOffset);
210    if (Iter == End || AddrOffset < *Iter)
211      --Iter;
212    return std::distance(Begin, Iter);
213  }
214
215  /// Create a GSYM from a memory buffer.
216  ///
217  /// Called by both openFile() and copyBuffer(), this function does all of the
218  /// work of parsing the GSYM file and returning an error.
219  ///
220  /// \param MemBuffer A memory buffer that will transfer ownership into the
221  /// GsymReader.
222  /// \returns An expected GsymReader that contains the object or an error
223  /// object that indicates reason for failing to read the GSYM.
224  static llvm::Expected<llvm::gsym::GsymReader>
225  create(std::unique_ptr<MemoryBuffer> &MemBuffer);
226
227
228  /// Given an address, find the address index.
229  ///
230  /// Binary search the address table and find the matching address index.
231  ///
232  /// \param Addr A virtual address that matches the original object file
233  /// to lookup.
234  /// \returns An index into the address table. This index can be used to
235  /// extract the FunctionInfo data's offset from the AddrInfoOffsets array.
236  /// Returns an error if the address isn't in the GSYM with details of why.
237  Expected<uint64_t> getAddressIndex(const uint64_t Addr) const;
238
239  /// Given an address index, get the offset for the FunctionInfo.
240  ///
241  /// Looking up an address is done by finding the corresponding address
242  /// index for the address. This index is then used to get the offset of the
243  /// FunctionInfo data that we will decode using this function.
244  ///
245  /// \param Index An index into the address table.
246  /// \returns An optional GSYM data offset for the offset of the FunctionInfo
247  /// that needs to be decoded.
248  Optional<uint64_t> getAddressInfoOffset(size_t Index) const;
249};
250
251} // namespace gsym
252} // namespace llvm
253
254#endif // #ifndef LLVM_DEBUGINFO_GSYM_GSYMREADER_H
255