DWARFDebugLine.h revision 360784
1//===- DWARFDebugLine.h -----------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_DEBUGINFO_DWARFDEBUGLINE_H
10#define LLVM_DEBUGINFO_DWARFDEBUGLINE_H
11
12#include "llvm/ADT/Optional.h"
13#include "llvm/ADT/StringRef.h"
14#include "llvm/DebugInfo/DIContext.h"
15#include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
16#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h"
17#include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
18#include "llvm/DebugInfo/DWARF/DWARFRelocMap.h"
19#include "llvm/DebugInfo/DWARF/DWARFTypeUnit.h"
20#include "llvm/Support/MD5.h"
21#include "llvm/Support/Path.h"
22#include <cstdint>
23#include <map>
24#include <string>
25#include <vector>
26
27namespace llvm {
28
29class DWARFUnit;
30class raw_ostream;
31
32class DWARFDebugLine {
33public:
34  struct FileNameEntry {
35    FileNameEntry() = default;
36
37    DWARFFormValue Name;
38    uint64_t DirIdx = 0;
39    uint64_t ModTime = 0;
40    uint64_t Length = 0;
41    MD5::MD5Result Checksum;
42    DWARFFormValue Source;
43  };
44
45  /// Tracks which optional content types are present in a DWARF file name
46  /// entry format.
47  struct ContentTypeTracker {
48    ContentTypeTracker() = default;
49
50    /// Whether filename entries provide a modification timestamp.
51    bool HasModTime = false;
52    /// Whether filename entries provide a file size.
53    bool HasLength = false;
54    /// For v5, whether filename entries provide an MD5 checksum.
55    bool HasMD5 = false;
56    /// For v5, whether filename entries provide source text.
57    bool HasSource = false;
58
59    /// Update tracked content types with \p ContentType.
60    void trackContentType(dwarf::LineNumberEntryFormat ContentType);
61  };
62
63  struct Prologue {
64    Prologue();
65
66    /// The size in bytes of the statement information for this compilation unit
67    /// (not including the total_length field itself).
68    uint64_t TotalLength;
69    /// Version, address size (starting in v5), and DWARF32/64 format; these
70    /// parameters affect interpretation of forms (used in the directory and
71    /// file tables starting with v5).
72    dwarf::FormParams FormParams;
73    /// The number of bytes following the prologue_length field to the beginning
74    /// of the first byte of the statement program itself.
75    uint64_t PrologueLength;
76    /// In v5, size in bytes of a segment selector.
77    uint8_t SegSelectorSize;
78    /// The size in bytes of the smallest target machine instruction. Statement
79    /// program opcodes that alter the address register first multiply their
80    /// operands by this value.
81    uint8_t MinInstLength;
82    /// The maximum number of individual operations that may be encoded in an
83    /// instruction.
84    uint8_t MaxOpsPerInst;
85    /// The initial value of theis_stmtregister.
86    uint8_t DefaultIsStmt;
87    /// This parameter affects the meaning of the special opcodes. See below.
88    int8_t LineBase;
89    /// This parameter affects the meaning of the special opcodes. See below.
90    uint8_t LineRange;
91    /// The number assigned to the first special opcode.
92    uint8_t OpcodeBase;
93    /// This tracks which optional file format content types are present.
94    ContentTypeTracker ContentTypes;
95    std::vector<uint8_t> StandardOpcodeLengths;
96    std::vector<DWARFFormValue> IncludeDirectories;
97    std::vector<FileNameEntry> FileNames;
98
99    const dwarf::FormParams getFormParams() const { return FormParams; }
100    uint16_t getVersion() const { return FormParams.Version; }
101    uint8_t getAddressSize() const { return FormParams.AddrSize; }
102    bool isDWARF64() const { return FormParams.Format == dwarf::DWARF64; }
103
104    uint32_t sizeofTotalLength() const { return isDWARF64() ? 12 : 4; }
105
106    uint32_t sizeofPrologueLength() const { return isDWARF64() ? 8 : 4; }
107
108    bool totalLengthIsValid() const;
109
110    /// Length of the prologue in bytes.
111    uint32_t getLength() const {
112      return PrologueLength + sizeofTotalLength() + sizeof(getVersion()) +
113             sizeofPrologueLength();
114    }
115
116    /// Length of the line table data in bytes (not including the prologue).
117    uint32_t getStatementTableLength() const {
118      return TotalLength + sizeofTotalLength() - getLength();
119    }
120
121    int32_t getMaxLineIncrementForSpecialOpcode() const {
122      return LineBase + (int8_t)LineRange - 1;
123    }
124
125    /// Get DWARF-version aware access to the file name entry at the provided
126    /// index.
127    const llvm::DWARFDebugLine::FileNameEntry &
128    getFileNameEntry(uint64_t Index) const;
129
130    bool hasFileAtIndex(uint64_t FileIndex) const;
131
132    bool
133    getFileNameByIndex(uint64_t FileIndex, StringRef CompDir,
134                       DILineInfoSpecifier::FileLineInfoKind Kind,
135                       std::string &Result,
136                       sys::path::Style Style = sys::path::Style::native) const;
137
138    void clear();
139    void dump(raw_ostream &OS, DIDumpOptions DumpOptions) const;
140    Error parse(const DWARFDataExtractor &DebugLineData, uint64_t *OffsetPtr,
141                const DWARFContext &Ctx, const DWARFUnit *U = nullptr);
142  };
143
144  /// Standard .debug_line state machine structure.
145  struct Row {
146    explicit Row(bool DefaultIsStmt = false);
147
148    /// Called after a row is appended to the matrix.
149    void postAppend();
150    void reset(bool DefaultIsStmt);
151    void dump(raw_ostream &OS) const;
152
153    static void dumpTableHeader(raw_ostream &OS);
154
155    static bool orderByAddress(const Row &LHS, const Row &RHS) {
156      return std::tie(LHS.Address.SectionIndex, LHS.Address.Address) <
157             std::tie(RHS.Address.SectionIndex, RHS.Address.Address);
158    }
159
160    /// The program-counter value corresponding to a machine instruction
161    /// generated by the compiler and section index pointing to the section
162    /// containg this PC. If relocation information is present then section
163    /// index is the index of the section which contains above address.
164    /// Otherwise this is object::SectionedAddress::Undef value.
165    object::SectionedAddress Address;
166    /// An unsigned integer indicating a source line number. Lines are numbered
167    /// beginning at 1. The compiler may emit the value 0 in cases where an
168    /// instruction cannot be attributed to any source line.
169    uint32_t Line;
170    /// An unsigned integer indicating a column number within a source line.
171    /// Columns are numbered beginning at 1. The value 0 is reserved to indicate
172    /// that a statement begins at the 'left edge' of the line.
173    uint16_t Column;
174    /// An unsigned integer indicating the identity of the source file
175    /// corresponding to a machine instruction.
176    uint16_t File;
177    /// An unsigned integer representing the DWARF path discriminator value
178    /// for this location.
179    uint32_t Discriminator;
180    /// An unsigned integer whose value encodes the applicable instruction set
181    /// architecture for the current instruction.
182    uint8_t Isa;
183    /// A boolean indicating that the current instruction is the beginning of a
184    /// statement.
185    uint8_t IsStmt : 1,
186        /// A boolean indicating that the current instruction is the
187        /// beginning of a basic block.
188        BasicBlock : 1,
189        /// A boolean indicating that the current address is that of the
190        /// first byte after the end of a sequence of target machine
191        /// instructions.
192        EndSequence : 1,
193        /// A boolean indicating that the current address is one (of possibly
194        /// many) where execution should be suspended for an entry breakpoint
195        /// of a function.
196        PrologueEnd : 1,
197        /// A boolean indicating that the current address is one (of possibly
198        /// many) where execution should be suspended for an exit breakpoint
199        /// of a function.
200        EpilogueBegin : 1;
201  };
202
203  /// Represents a series of contiguous machine instructions. Line table for
204  /// each compilation unit may consist of multiple sequences, which are not
205  /// guaranteed to be in the order of ascending instruction address.
206  struct Sequence {
207    Sequence();
208
209    /// Sequence describes instructions at address range [LowPC, HighPC)
210    /// and is described by line table rows [FirstRowIndex, LastRowIndex).
211    uint64_t LowPC;
212    uint64_t HighPC;
213    /// If relocation information is present then this is the index of the
214    /// section which contains above addresses. Otherwise this is
215    /// object::SectionedAddress::Undef value.
216    uint64_t SectionIndex;
217    unsigned FirstRowIndex;
218    unsigned LastRowIndex;
219    bool Empty;
220
221    void reset();
222
223    static bool orderByHighPC(const Sequence &LHS, const Sequence &RHS) {
224      return std::tie(LHS.SectionIndex, LHS.HighPC) <
225             std::tie(RHS.SectionIndex, RHS.HighPC);
226    }
227
228    bool isValid() const {
229      return !Empty && (LowPC < HighPC) && (FirstRowIndex < LastRowIndex);
230    }
231
232    bool containsPC(object::SectionedAddress PC) const {
233      return SectionIndex == PC.SectionIndex &&
234             (LowPC <= PC.Address && PC.Address < HighPC);
235    }
236  };
237
238  struct LineTable {
239    LineTable();
240
241    /// Represents an invalid row
242    const uint32_t UnknownRowIndex = UINT32_MAX;
243
244    void appendRow(const DWARFDebugLine::Row &R) { Rows.push_back(R); }
245
246    void appendSequence(const DWARFDebugLine::Sequence &S) {
247      Sequences.push_back(S);
248    }
249
250    /// Returns the index of the row with file/line info for a given address,
251    /// or UnknownRowIndex if there is no such row.
252    uint32_t lookupAddress(object::SectionedAddress Address) const;
253
254    bool lookupAddressRange(object::SectionedAddress Address, uint64_t Size,
255                            std::vector<uint32_t> &Result) const;
256
257    bool hasFileAtIndex(uint64_t FileIndex) const {
258      return Prologue.hasFileAtIndex(FileIndex);
259    }
260
261    /// Extracts filename by its index in filename table in prologue.
262    /// In Dwarf 4, the files are 1-indexed and the current compilation file
263    /// name is not represented in the list. In DWARF v5, the files are
264    /// 0-indexed and the primary source file has the index 0.
265    /// Returns true on success.
266    bool getFileNameByIndex(uint64_t FileIndex, StringRef CompDir,
267                            DILineInfoSpecifier::FileLineInfoKind Kind,
268                            std::string &Result) const {
269      return Prologue.getFileNameByIndex(FileIndex, CompDir, Kind, Result);
270    }
271
272    /// Fills the Result argument with the file and line information
273    /// corresponding to Address. Returns true on success.
274    bool getFileLineInfoForAddress(object::SectionedAddress Address,
275                                   const char *CompDir,
276                                   DILineInfoSpecifier::FileLineInfoKind Kind,
277                                   DILineInfo &Result) const;
278
279    void dump(raw_ostream &OS, DIDumpOptions DumpOptions) const;
280    void clear();
281
282    /// Parse prologue and all rows.
283    Error parse(DWARFDataExtractor &DebugLineData, uint64_t *OffsetPtr,
284                const DWARFContext &Ctx, const DWARFUnit *U,
285                function_ref<void(Error)> RecoverableErrorCallback,
286                raw_ostream *OS = nullptr);
287
288    using RowVector = std::vector<Row>;
289    using RowIter = RowVector::const_iterator;
290    using SequenceVector = std::vector<Sequence>;
291    using SequenceIter = SequenceVector::const_iterator;
292
293    struct Prologue Prologue;
294    RowVector Rows;
295    SequenceVector Sequences;
296
297  private:
298    uint32_t findRowInSeq(const DWARFDebugLine::Sequence &Seq,
299                          object::SectionedAddress Address) const;
300    Optional<StringRef>
301    getSourceByIndex(uint64_t FileIndex,
302                     DILineInfoSpecifier::FileLineInfoKind Kind) const;
303
304    uint32_t lookupAddressImpl(object::SectionedAddress Address) const;
305
306    bool lookupAddressRangeImpl(object::SectionedAddress Address, uint64_t Size,
307                                std::vector<uint32_t> &Result) const;
308  };
309
310  const LineTable *getLineTable(uint64_t Offset) const;
311  Expected<const LineTable *>
312  getOrParseLineTable(DWARFDataExtractor &DebugLineData, uint64_t Offset,
313                      const DWARFContext &Ctx, const DWARFUnit *U,
314                      function_ref<void(Error)> RecoverableErrorCallback);
315
316  /// Helper to allow for parsing of an entire .debug_line section in sequence.
317  class SectionParser {
318  public:
319    using cu_range = DWARFUnitVector::iterator_range;
320    using tu_range = DWARFUnitVector::iterator_range;
321    using LineToUnitMap = std::map<uint64_t, DWARFUnit *>;
322
323    SectionParser(DWARFDataExtractor &Data, const DWARFContext &C, cu_range CUs,
324                  tu_range TUs);
325
326    /// Get the next line table from the section. Report any issues via the
327    /// callbacks.
328    ///
329    /// \param RecoverableErrorCallback - any issues that don't prevent further
330    /// parsing of the table will be reported through this callback.
331    /// \param UnrecoverableErrorCallback - any issues that prevent further
332    /// parsing of the table will be reported through this callback.
333    /// \param OS - if not null, the parser will print information about the
334    /// table as it parses it.
335    LineTable
336    parseNext(
337        function_ref<void(Error)> RecoverableErrorCallback,
338        function_ref<void(Error)> UnrecoverableErrorCallback,
339        raw_ostream *OS = nullptr);
340
341    /// Skip the current line table and go to the following line table (if
342    /// present) immediately.
343    ///
344    /// \param ErrorCallback - report any prologue parsing issues via this
345    /// callback.
346    void skip(function_ref<void(Error)> ErrorCallback);
347
348    /// Indicates if the parser has parsed as much as possible.
349    ///
350    /// \note Certain problems with the line table structure might mean that
351    /// parsing stops before the end of the section is reached.
352    bool done() const { return Done; }
353
354    /// Get the offset the parser has reached.
355    uint64_t getOffset() const { return Offset; }
356
357  private:
358    DWARFUnit *prepareToParse(uint64_t Offset);
359    void moveToNextTable(uint64_t OldOffset, const Prologue &P);
360
361    LineToUnitMap LineToUnit;
362
363    DWARFDataExtractor &DebugLineData;
364    const DWARFContext &Context;
365    uint64_t Offset = 0;
366    bool Done = false;
367  };
368
369private:
370  struct ParsingState {
371    ParsingState(struct LineTable *LT);
372
373    void resetRowAndSequence();
374    void appendRowToMatrix();
375
376    /// Line table we're currently parsing.
377    struct LineTable *LineTable;
378    struct Row Row;
379    struct Sequence Sequence;
380  };
381
382  using LineTableMapTy = std::map<uint64_t, LineTable>;
383  using LineTableIter = LineTableMapTy::iterator;
384  using LineTableConstIter = LineTableMapTy::const_iterator;
385
386  LineTableMapTy LineTableMap;
387};
388
389} // end namespace llvm
390
391#endif // LLVM_DEBUGINFO_DWARFDEBUGLINE_H
392