PDBFile.cpp revision 360784
1//===- PDBFile.cpp - Low level interface to a PDB file ----------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
10#include "llvm/ADT/ArrayRef.h"
11#include "llvm/ADT/STLExtras.h"
12#include "llvm/DebugInfo/MSF/MSFCommon.h"
13#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
14#include "llvm/DebugInfo/PDB/Native/DbiStream.h"
15#include "llvm/DebugInfo/PDB/Native/GlobalsStream.h"
16#include "llvm/DebugInfo/PDB/Native/InfoStream.h"
17#include "llvm/DebugInfo/PDB/Native/InjectedSourceStream.h"
18#include "llvm/DebugInfo/PDB/Native/PDBStringTable.h"
19#include "llvm/DebugInfo/PDB/Native/PublicsStream.h"
20#include "llvm/DebugInfo/PDB/Native/RawError.h"
21#include "llvm/DebugInfo/PDB/Native/SymbolStream.h"
22#include "llvm/DebugInfo/PDB/Native/TpiStream.h"
23#include "llvm/Support/BinaryStream.h"
24#include "llvm/Support/BinaryStreamArray.h"
25#include "llvm/Support/BinaryStreamReader.h"
26#include "llvm/Support/Endian.h"
27#include "llvm/Support/Error.h"
28#include "llvm/Support/Path.h"
29#include <algorithm>
30#include <cassert>
31#include <cstdint>
32
33using namespace llvm;
34using namespace llvm::codeview;
35using namespace llvm::msf;
36using namespace llvm::pdb;
37
38namespace {
39typedef FixedStreamArray<support::ulittle32_t> ulittle_array;
40} // end anonymous namespace
41
42PDBFile::PDBFile(StringRef Path, std::unique_ptr<BinaryStream> PdbFileBuffer,
43                 BumpPtrAllocator &Allocator)
44    : FilePath(Path), Allocator(Allocator), Buffer(std::move(PdbFileBuffer)) {}
45
46PDBFile::~PDBFile() = default;
47
48StringRef PDBFile::getFilePath() const { return FilePath; }
49
50StringRef PDBFile::getFileDirectory() const {
51  return sys::path::parent_path(FilePath);
52}
53
54uint32_t PDBFile::getBlockSize() const { return ContainerLayout.SB->BlockSize; }
55
56uint32_t PDBFile::getFreeBlockMapBlock() const {
57  return ContainerLayout.SB->FreeBlockMapBlock;
58}
59
60uint32_t PDBFile::getBlockCount() const {
61  return ContainerLayout.SB->NumBlocks;
62}
63
64uint32_t PDBFile::getNumDirectoryBytes() const {
65  return ContainerLayout.SB->NumDirectoryBytes;
66}
67
68uint32_t PDBFile::getBlockMapIndex() const {
69  return ContainerLayout.SB->BlockMapAddr;
70}
71
72uint32_t PDBFile::getUnknown1() const { return ContainerLayout.SB->Unknown1; }
73
74uint32_t PDBFile::getNumDirectoryBlocks() const {
75  return msf::bytesToBlocks(ContainerLayout.SB->NumDirectoryBytes,
76                            ContainerLayout.SB->BlockSize);
77}
78
79uint64_t PDBFile::getBlockMapOffset() const {
80  return (uint64_t)ContainerLayout.SB->BlockMapAddr *
81         ContainerLayout.SB->BlockSize;
82}
83
84uint32_t PDBFile::getNumStreams() const {
85  return ContainerLayout.StreamSizes.size();
86}
87
88uint32_t PDBFile::getMaxStreamSize() const {
89  return *std::max_element(ContainerLayout.StreamSizes.begin(),
90                           ContainerLayout.StreamSizes.end());
91}
92
93uint32_t PDBFile::getStreamByteSize(uint32_t StreamIndex) const {
94  return ContainerLayout.StreamSizes[StreamIndex];
95}
96
97ArrayRef<support::ulittle32_t>
98PDBFile::getStreamBlockList(uint32_t StreamIndex) const {
99  return ContainerLayout.StreamMap[StreamIndex];
100}
101
102uint32_t PDBFile::getFileSize() const { return Buffer->getLength(); }
103
104Expected<ArrayRef<uint8_t>> PDBFile::getBlockData(uint32_t BlockIndex,
105                                                  uint32_t NumBytes) const {
106  uint64_t StreamBlockOffset = msf::blockToOffset(BlockIndex, getBlockSize());
107
108  ArrayRef<uint8_t> Result;
109  if (auto EC = Buffer->readBytes(StreamBlockOffset, NumBytes, Result))
110    return std::move(EC);
111  return Result;
112}
113
114Error PDBFile::setBlockData(uint32_t BlockIndex, uint32_t Offset,
115                            ArrayRef<uint8_t> Data) const {
116  return make_error<RawError>(raw_error_code::not_writable,
117                              "PDBFile is immutable");
118}
119
120Error PDBFile::parseFileHeaders() {
121  BinaryStreamReader Reader(*Buffer);
122
123  // Initialize SB.
124  const msf::SuperBlock *SB = nullptr;
125  if (auto EC = Reader.readObject(SB)) {
126    consumeError(std::move(EC));
127    return make_error<RawError>(raw_error_code::corrupt_file,
128                                "MSF superblock is missing");
129  }
130
131  if (auto EC = msf::validateSuperBlock(*SB))
132    return EC;
133
134  if (Buffer->getLength() % SB->BlockSize != 0)
135    return make_error<RawError>(raw_error_code::corrupt_file,
136                                "File size is not a multiple of block size");
137  ContainerLayout.SB = SB;
138
139  // Initialize Free Page Map.
140  ContainerLayout.FreePageMap.resize(SB->NumBlocks);
141  // The Fpm exists either at block 1 or block 2 of the MSF.  However, this
142  // allows for a maximum of getBlockSize() * 8 blocks bits in the Fpm, and
143  // thusly an equal number of total blocks in the file.  For a block size
144  // of 4KiB (very common), this would yield 32KiB total blocks in file, for a
145  // maximum file size of 32KiB * 4KiB = 128MiB.  Obviously this won't do, so
146  // the Fpm is split across the file at `getBlockSize()` intervals.  As a
147  // result, every block whose index is of the form |{1,2} + getBlockSize() * k|
148  // for any non-negative integer k is an Fpm block.  In theory, we only really
149  // need to reserve blocks of the form |{1,2} + getBlockSize() * 8 * k|, but
150  // current versions of the MSF format already expect the Fpm to be arranged
151  // at getBlockSize() intervals, so we have to be compatible.
152  // See the function fpmPn() for more information:
153  // https://github.com/Microsoft/microsoft-pdb/blob/master/PDB/msf/msf.cpp#L489
154  auto FpmStream =
155      MappedBlockStream::createFpmStream(ContainerLayout, *Buffer, Allocator);
156  BinaryStreamReader FpmReader(*FpmStream);
157  ArrayRef<uint8_t> FpmBytes;
158  if (auto EC = FpmReader.readBytes(FpmBytes, FpmReader.bytesRemaining()))
159    return EC;
160  uint32_t BlocksRemaining = getBlockCount();
161  uint32_t BI = 0;
162  for (auto Byte : FpmBytes) {
163    uint32_t BlocksThisByte = std::min(BlocksRemaining, 8U);
164    for (uint32_t I = 0; I < BlocksThisByte; ++I) {
165      if (Byte & (1 << I))
166        ContainerLayout.FreePageMap[BI] = true;
167      --BlocksRemaining;
168      ++BI;
169    }
170  }
171
172  Reader.setOffset(getBlockMapOffset());
173  if (auto EC = Reader.readArray(ContainerLayout.DirectoryBlocks,
174                                 getNumDirectoryBlocks()))
175    return EC;
176
177  return Error::success();
178}
179
180Error PDBFile::parseStreamData() {
181  assert(ContainerLayout.SB);
182  if (DirectoryStream)
183    return Error::success();
184
185  uint32_t NumStreams = 0;
186
187  // Normally you can't use a MappedBlockStream without having fully parsed the
188  // PDB file, because it accesses the directory and various other things, which
189  // is exactly what we are attempting to parse.  By specifying a custom
190  // subclass of IPDBStreamData which only accesses the fields that have already
191  // been parsed, we can avoid this and reuse MappedBlockStream.
192  auto DS = MappedBlockStream::createDirectoryStream(ContainerLayout, *Buffer,
193                                                     Allocator);
194  BinaryStreamReader Reader(*DS);
195  if (auto EC = Reader.readInteger(NumStreams))
196    return EC;
197
198  if (auto EC = Reader.readArray(ContainerLayout.StreamSizes, NumStreams))
199    return EC;
200  for (uint32_t I = 0; I < NumStreams; ++I) {
201    uint32_t StreamSize = getStreamByteSize(I);
202    // FIXME: What does StreamSize ~0U mean?
203    uint64_t NumExpectedStreamBlocks =
204        StreamSize == UINT32_MAX
205            ? 0
206            : msf::bytesToBlocks(StreamSize, ContainerLayout.SB->BlockSize);
207
208    // For convenience, we store the block array contiguously.  This is because
209    // if someone calls setStreamMap(), it is more convenient to be able to call
210    // it with an ArrayRef instead of setting up a StreamRef.  Since the
211    // DirectoryStream is cached in the class and thus lives for the life of the
212    // class, we can be guaranteed that readArray() will return a stable
213    // reference, even if it has to allocate from its internal pool.
214    ArrayRef<support::ulittle32_t> Blocks;
215    if (auto EC = Reader.readArray(Blocks, NumExpectedStreamBlocks))
216      return EC;
217    for (uint32_t Block : Blocks) {
218      uint64_t BlockEndOffset =
219          (uint64_t)(Block + 1) * ContainerLayout.SB->BlockSize;
220      if (BlockEndOffset > getFileSize())
221        return make_error<RawError>(raw_error_code::corrupt_file,
222                                    "Stream block map is corrupt.");
223    }
224    ContainerLayout.StreamMap.push_back(Blocks);
225  }
226
227  // We should have read exactly SB->NumDirectoryBytes bytes.
228  assert(Reader.bytesRemaining() == 0);
229  DirectoryStream = std::move(DS);
230  return Error::success();
231}
232
233ArrayRef<support::ulittle32_t> PDBFile::getDirectoryBlockArray() const {
234  return ContainerLayout.DirectoryBlocks;
235}
236
237std::unique_ptr<MappedBlockStream>
238PDBFile::createIndexedStream(uint16_t SN) const {
239  if (SN == kInvalidStreamIndex)
240    return nullptr;
241  return MappedBlockStream::createIndexedStream(ContainerLayout, *Buffer, SN,
242                                                Allocator);
243}
244
245MSFStreamLayout PDBFile::getStreamLayout(uint32_t StreamIdx) const {
246  MSFStreamLayout Result;
247  auto Blocks = getStreamBlockList(StreamIdx);
248  Result.Blocks.assign(Blocks.begin(), Blocks.end());
249  Result.Length = getStreamByteSize(StreamIdx);
250  return Result;
251}
252
253msf::MSFStreamLayout PDBFile::getFpmStreamLayout() const {
254  return msf::getFpmStreamLayout(ContainerLayout);
255}
256
257Expected<GlobalsStream &> PDBFile::getPDBGlobalsStream() {
258  if (!Globals) {
259    auto DbiS = getPDBDbiStream();
260    if (!DbiS)
261      return DbiS.takeError();
262
263    auto GlobalS =
264        safelyCreateIndexedStream(DbiS->getGlobalSymbolStreamIndex());
265    if (!GlobalS)
266      return GlobalS.takeError();
267    auto TempGlobals = std::make_unique<GlobalsStream>(std::move(*GlobalS));
268    if (auto EC = TempGlobals->reload())
269      return std::move(EC);
270    Globals = std::move(TempGlobals);
271  }
272  return *Globals;
273}
274
275Expected<InfoStream &> PDBFile::getPDBInfoStream() {
276  if (!Info) {
277    auto InfoS = safelyCreateIndexedStream(StreamPDB);
278    if (!InfoS)
279      return InfoS.takeError();
280    auto TempInfo = std::make_unique<InfoStream>(std::move(*InfoS));
281    if (auto EC = TempInfo->reload())
282      return std::move(EC);
283    Info = std::move(TempInfo);
284  }
285  return *Info;
286}
287
288Expected<DbiStream &> PDBFile::getPDBDbiStream() {
289  if (!Dbi) {
290    auto DbiS = safelyCreateIndexedStream(StreamDBI);
291    if (!DbiS)
292      return DbiS.takeError();
293    auto TempDbi = std::make_unique<DbiStream>(std::move(*DbiS));
294    if (auto EC = TempDbi->reload(this))
295      return std::move(EC);
296    Dbi = std::move(TempDbi);
297  }
298  return *Dbi;
299}
300
301Expected<TpiStream &> PDBFile::getPDBTpiStream() {
302  if (!Tpi) {
303    auto TpiS = safelyCreateIndexedStream(StreamTPI);
304    if (!TpiS)
305      return TpiS.takeError();
306    auto TempTpi = std::make_unique<TpiStream>(*this, std::move(*TpiS));
307    if (auto EC = TempTpi->reload())
308      return std::move(EC);
309    Tpi = std::move(TempTpi);
310  }
311  return *Tpi;
312}
313
314Expected<TpiStream &> PDBFile::getPDBIpiStream() {
315  if (!Ipi) {
316    if (!hasPDBIpiStream())
317      return make_error<RawError>(raw_error_code::no_stream);
318
319    auto IpiS = safelyCreateIndexedStream(StreamIPI);
320    if (!IpiS)
321      return IpiS.takeError();
322    auto TempIpi = std::make_unique<TpiStream>(*this, std::move(*IpiS));
323    if (auto EC = TempIpi->reload())
324      return std::move(EC);
325    Ipi = std::move(TempIpi);
326  }
327  return *Ipi;
328}
329
330Expected<PublicsStream &> PDBFile::getPDBPublicsStream() {
331  if (!Publics) {
332    auto DbiS = getPDBDbiStream();
333    if (!DbiS)
334      return DbiS.takeError();
335
336    auto PublicS =
337        safelyCreateIndexedStream(DbiS->getPublicSymbolStreamIndex());
338    if (!PublicS)
339      return PublicS.takeError();
340    auto TempPublics = std::make_unique<PublicsStream>(std::move(*PublicS));
341    if (auto EC = TempPublics->reload())
342      return std::move(EC);
343    Publics = std::move(TempPublics);
344  }
345  return *Publics;
346}
347
348Expected<SymbolStream &> PDBFile::getPDBSymbolStream() {
349  if (!Symbols) {
350    auto DbiS = getPDBDbiStream();
351    if (!DbiS)
352      return DbiS.takeError();
353
354    uint32_t SymbolStreamNum = DbiS->getSymRecordStreamIndex();
355    auto SymbolS = safelyCreateIndexedStream(SymbolStreamNum);
356    if (!SymbolS)
357      return SymbolS.takeError();
358
359    auto TempSymbols = std::make_unique<SymbolStream>(std::move(*SymbolS));
360    if (auto EC = TempSymbols->reload())
361      return std::move(EC);
362    Symbols = std::move(TempSymbols);
363  }
364  return *Symbols;
365}
366
367Expected<PDBStringTable &> PDBFile::getStringTable() {
368  if (!Strings) {
369    auto NS = safelyCreateNamedStream("/names");
370    if (!NS)
371      return NS.takeError();
372
373    auto N = std::make_unique<PDBStringTable>();
374    BinaryStreamReader Reader(**NS);
375    if (auto EC = N->reload(Reader))
376      return std::move(EC);
377    assert(Reader.bytesRemaining() == 0);
378    StringTableStream = std::move(*NS);
379    Strings = std::move(N);
380  }
381  return *Strings;
382}
383
384Expected<InjectedSourceStream &> PDBFile::getInjectedSourceStream() {
385  if (!InjectedSources) {
386    auto IJS = safelyCreateNamedStream("/src/headerblock");
387    if (!IJS)
388      return IJS.takeError();
389
390    auto Strings = getStringTable();
391    if (!Strings)
392      return Strings.takeError();
393
394    auto IJ = std::make_unique<InjectedSourceStream>(std::move(*IJS));
395    if (auto EC = IJ->reload(*Strings))
396      return std::move(EC);
397    InjectedSources = std::move(IJ);
398  }
399  return *InjectedSources;
400}
401
402uint32_t PDBFile::getPointerSize() {
403  auto DbiS = getPDBDbiStream();
404  if (!DbiS)
405    return 0;
406  PDB_Machine Machine = DbiS->getMachineType();
407  if (Machine == PDB_Machine::Amd64)
408    return 8;
409  return 4;
410}
411
412bool PDBFile::hasPDBDbiStream() const {
413  return StreamDBI < getNumStreams() && getStreamByteSize(StreamDBI) > 0;
414}
415
416bool PDBFile::hasPDBGlobalsStream() {
417  auto DbiS = getPDBDbiStream();
418  if (!DbiS) {
419    consumeError(DbiS.takeError());
420    return false;
421  }
422
423  return DbiS->getGlobalSymbolStreamIndex() < getNumStreams();
424}
425
426bool PDBFile::hasPDBInfoStream() const { return StreamPDB < getNumStreams(); }
427
428bool PDBFile::hasPDBIpiStream() const {
429  if (!hasPDBInfoStream())
430    return false;
431
432  if (StreamIPI >= getNumStreams())
433    return false;
434
435  auto &InfoStream = cantFail(const_cast<PDBFile *>(this)->getPDBInfoStream());
436  return InfoStream.containsIdStream();
437}
438
439bool PDBFile::hasPDBPublicsStream() {
440  auto DbiS = getPDBDbiStream();
441  if (!DbiS) {
442    consumeError(DbiS.takeError());
443    return false;
444  }
445  return DbiS->getPublicSymbolStreamIndex() < getNumStreams();
446}
447
448bool PDBFile::hasPDBSymbolStream() {
449  auto DbiS = getPDBDbiStream();
450  if (!DbiS)
451    return false;
452  return DbiS->getSymRecordStreamIndex() < getNumStreams();
453}
454
455bool PDBFile::hasPDBTpiStream() const { return StreamTPI < getNumStreams(); }
456
457bool PDBFile::hasPDBStringTable() {
458  auto IS = getPDBInfoStream();
459  if (!IS)
460    return false;
461  Expected<uint32_t> ExpectedNSI = IS->getNamedStreamIndex("/names");
462  if (!ExpectedNSI) {
463    consumeError(ExpectedNSI.takeError());
464    return false;
465  }
466  assert(*ExpectedNSI < getNumStreams());
467  return true;
468}
469
470bool PDBFile::hasPDBInjectedSourceStream() {
471  auto IS = getPDBInfoStream();
472  if (!IS)
473    return false;
474  Expected<uint32_t> ExpectedNSI = IS->getNamedStreamIndex("/src/headerblock");
475  if (!ExpectedNSI) {
476    consumeError(ExpectedNSI.takeError());
477    return false;
478  }
479  assert(*ExpectedNSI < getNumStreams());
480  return true;
481}
482
483/// Wrapper around MappedBlockStream::createIndexedStream() that checks if a
484/// stream with that index actually exists.  If it does not, the return value
485/// will have an MSFError with code msf_error_code::no_stream.  Else, the return
486/// value will contain the stream returned by createIndexedStream().
487Expected<std::unique_ptr<MappedBlockStream>>
488PDBFile::safelyCreateIndexedStream(uint32_t StreamIndex) const {
489  if (StreamIndex >= getNumStreams())
490    // This rejects kInvalidStreamIndex with an error as well.
491    return make_error<RawError>(raw_error_code::no_stream);
492  return createIndexedStream(StreamIndex);
493}
494
495Expected<std::unique_ptr<MappedBlockStream>>
496PDBFile::safelyCreateNamedStream(StringRef Name) {
497  auto IS = getPDBInfoStream();
498  if (!IS)
499    return IS.takeError();
500
501  Expected<uint32_t> ExpectedNSI = IS->getNamedStreamIndex(Name);
502  if (!ExpectedNSI)
503    return ExpectedNSI.takeError();
504  uint32_t NameStreamIndex = *ExpectedNSI;
505
506  return safelyCreateIndexedStream(NameStreamIndex);
507}
508