1//===- InputFiles.h ---------------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLD_COFF_INPUT_FILES_H
10#define LLD_COFF_INPUT_FILES_H
11
12#include "Config.h"
13#include "lld/Common/LLVM.h"
14#include "llvm/ADT/ArrayRef.h"
15#include "llvm/ADT/DenseMap.h"
16#include "llvm/ADT/DenseSet.h"
17#include "llvm/ADT/StringSet.h"
18#include "llvm/BinaryFormat/Magic.h"
19#include "llvm/Object/Archive.h"
20#include "llvm/Object/COFF.h"
21#include "llvm/Support/StringSaver.h"
22#include <memory>
23#include <set>
24#include <vector>
25
26namespace llvm {
27struct DILineInfo;
28namespace pdb {
29class DbiModuleDescriptorBuilder;
30class NativeSession;
31}
32namespace lto {
33class InputFile;
34}
35}
36
37namespace lld {
38class DWARFCache;
39
40namespace coff {
41class COFFLinkerContext;
42
43std::vector<MemoryBufferRef> getArchiveMembers(llvm::object::Archive *file);
44
45using llvm::COFF::IMAGE_FILE_MACHINE_UNKNOWN;
46using llvm::COFF::MachineTypes;
47using llvm::object::Archive;
48using llvm::object::COFFObjectFile;
49using llvm::object::COFFSymbolRef;
50using llvm::object::coff_import_header;
51using llvm::object::coff_section;
52
53class Chunk;
54class Defined;
55class DefinedImportData;
56class DefinedImportThunk;
57class DefinedRegular;
58class SectionChunk;
59class Symbol;
60class Undefined;
61class TpiSource;
62
63// The root class of input files.
64class InputFile {
65public:
66  enum Kind {
67    ArchiveKind,
68    ObjectKind,
69    LazyObjectKind,
70    PDBKind,
71    ImportKind,
72    BitcodeKind,
73    DLLKind
74  };
75  Kind kind() const { return fileKind; }
76  virtual ~InputFile() {}
77
78  // Returns the filename.
79  StringRef getName() const { return mb.getBufferIdentifier(); }
80
81  // Reads a file (the constructor doesn't do that).
82  virtual void parse() = 0;
83
84  // Returns the CPU type this file was compiled to.
85  virtual MachineTypes getMachineType() { return IMAGE_FILE_MACHINE_UNKNOWN; }
86
87  MemoryBufferRef mb;
88
89  // An archive file name if this file is created from an archive.
90  StringRef parentName;
91
92  // Returns .drectve section contents if exist.
93  StringRef getDirectives() { return directives; }
94
95  COFFLinkerContext &ctx;
96
97protected:
98  InputFile(COFFLinkerContext &c, Kind k, MemoryBufferRef m, bool lazy = false)
99      : mb(m), ctx(c), fileKind(k), lazy(lazy) {}
100
101  StringRef directives;
102
103private:
104  const Kind fileKind;
105
106public:
107  // True if this is a lazy ObjFile or BitcodeFile.
108  bool lazy = false;
109};
110
111// .lib or .a file.
112class ArchiveFile : public InputFile {
113public:
114  explicit ArchiveFile(COFFLinkerContext &ctx, MemoryBufferRef m);
115  static bool classof(const InputFile *f) { return f->kind() == ArchiveKind; }
116  void parse() override;
117
118  // Enqueues an archive member load for the given symbol. If we've already
119  // enqueued a load for the same archive member, this function does nothing,
120  // which ensures that we don't load the same member more than once.
121  void addMember(const Archive::Symbol &sym);
122
123private:
124  std::unique_ptr<Archive> file;
125  llvm::DenseSet<uint64_t> seen;
126};
127
128// .obj or .o file. This may be a member of an archive file.
129class ObjFile : public InputFile {
130public:
131  explicit ObjFile(COFFLinkerContext &ctx, MemoryBufferRef m, bool lazy = false)
132      : InputFile(ctx, ObjectKind, m, lazy) {}
133  static bool classof(const InputFile *f) { return f->kind() == ObjectKind; }
134  void parse() override;
135  void parseLazy();
136  MachineTypes getMachineType() override;
137  ArrayRef<Chunk *> getChunks() { return chunks; }
138  ArrayRef<SectionChunk *> getDebugChunks() { return debugChunks; }
139  ArrayRef<SectionChunk *> getSXDataChunks() { return sxDataChunks; }
140  ArrayRef<SectionChunk *> getGuardFidChunks() { return guardFidChunks; }
141  ArrayRef<SectionChunk *> getGuardIATChunks() { return guardIATChunks; }
142  ArrayRef<SectionChunk *> getGuardLJmpChunks() { return guardLJmpChunks; }
143  ArrayRef<SectionChunk *> getGuardEHContChunks() { return guardEHContChunks; }
144  ArrayRef<Symbol *> getSymbols() { return symbols; }
145
146  MutableArrayRef<Symbol *> getMutableSymbols() { return symbols; }
147
148  ArrayRef<uint8_t> getDebugSection(StringRef secName);
149
150  // Returns a Symbol object for the symbolIndex'th symbol in the
151  // underlying object file.
152  Symbol *getSymbol(uint32_t symbolIndex) {
153    return symbols[symbolIndex];
154  }
155
156  // Returns the underlying COFF file.
157  COFFObjectFile *getCOFFObj() { return coffObj.get(); }
158
159  // Add a symbol for a range extension thunk. Return the new symbol table
160  // index. This index can be used to modify a relocation.
161  uint32_t addRangeThunkSymbol(Symbol *thunk) {
162    symbols.push_back(thunk);
163    return symbols.size() - 1;
164  }
165
166  void includeResourceChunks();
167
168  bool isResourceObjFile() const { return !resourceChunks.empty(); }
169
170  // Flags in the absolute @feat.00 symbol if it is present. These usually
171  // indicate if an object was compiled with certain security features enabled
172  // like stack guard, safeseh, /guard:cf, or other things.
173  uint32_t feat00Flags = 0;
174
175  // True if this object file is compatible with SEH.  COFF-specific and
176  // x86-only. COFF spec 5.10.1. The .sxdata section.
177  bool hasSafeSEH() { return feat00Flags & 0x1; }
178
179  // True if this file was compiled with /guard:cf.
180  bool hasGuardCF() { return feat00Flags & 0x800; }
181
182  // True if this file was compiled with /guard:ehcont.
183  bool hasGuardEHCont() { return feat00Flags & 0x4000; }
184
185  // Pointer to the PDB module descriptor builder. Various debug info records
186  // will reference object files by "module index", which is here. Things like
187  // source files and section contributions are also recorded here. Will be null
188  // if we are not producing a PDB.
189  llvm::pdb::DbiModuleDescriptorBuilder *moduleDBI = nullptr;
190
191  const coff_section *addrsigSec = nullptr;
192
193  const coff_section *callgraphSec = nullptr;
194
195  // When using Microsoft precompiled headers, this is the PCH's key.
196  // The same key is used by both the precompiled object, and objects using the
197  // precompiled object. Any difference indicates out-of-date objects.
198  std::optional<uint32_t> pchSignature;
199
200  // Whether this file was compiled with /hotpatch.
201  bool hotPatchable = false;
202
203  // Whether the object was already merged into the final PDB.
204  bool mergedIntoPDB = false;
205
206  // If the OBJ has a .debug$T stream, this tells how it will be handled.
207  TpiSource *debugTypesObj = nullptr;
208
209  // The .debug$P or .debug$T section data if present. Empty otherwise.
210  ArrayRef<uint8_t> debugTypes;
211
212  std::optional<std::pair<StringRef, uint32_t>>
213  getVariableLocation(StringRef var);
214
215  std::optional<llvm::DILineInfo> getDILineInfo(uint32_t offset,
216                                                uint32_t sectionIndex);
217
218private:
219  const coff_section* getSection(uint32_t i);
220  const coff_section *getSection(COFFSymbolRef sym) {
221    return getSection(sym.getSectionNumber());
222  }
223
224  void enqueuePdbFile(StringRef path, ObjFile *fromFile);
225
226  void initializeChunks();
227  void initializeSymbols();
228  void initializeFlags();
229  void initializeDependencies();
230
231  SectionChunk *
232  readSection(uint32_t sectionNumber,
233              const llvm::object::coff_aux_section_definition *def,
234              StringRef leaderName);
235
236  void readAssociativeDefinition(
237      COFFSymbolRef coffSym,
238      const llvm::object::coff_aux_section_definition *def);
239
240  void readAssociativeDefinition(
241      COFFSymbolRef coffSym,
242      const llvm::object::coff_aux_section_definition *def,
243      uint32_t parentSection);
244
245  void recordPrevailingSymbolForMingw(
246      COFFSymbolRef coffSym,
247      llvm::DenseMap<StringRef, uint32_t> &prevailingSectionMap);
248
249  void maybeAssociateSEHForMingw(
250      COFFSymbolRef sym, const llvm::object::coff_aux_section_definition *def,
251      const llvm::DenseMap<StringRef, uint32_t> &prevailingSectionMap);
252
253  // Given a new symbol Sym with comdat selection Selection, if the new
254  // symbol is not (yet) Prevailing and the existing comdat leader set to
255  // Leader, emits a diagnostic if the new symbol and its selection doesn't
256  // match the existing symbol and its selection. If either old or new
257  // symbol have selection IMAGE_COMDAT_SELECT_LARGEST, Sym might replace
258  // the existing leader. In that case, Prevailing is set to true.
259  void
260  handleComdatSelection(COFFSymbolRef sym, llvm::COFF::COMDATType &selection,
261                        bool &prevailing, DefinedRegular *leader,
262                        const llvm::object::coff_aux_section_definition *def);
263
264  std::optional<Symbol *>
265  createDefined(COFFSymbolRef sym,
266                std::vector<const llvm::object::coff_aux_section_definition *>
267                    &comdatDefs,
268                bool &prevailingComdat);
269  Symbol *createRegular(COFFSymbolRef sym);
270  Symbol *createUndefined(COFFSymbolRef sym);
271
272  std::unique_ptr<COFFObjectFile> coffObj;
273
274  // List of all chunks defined by this file. This includes both section
275  // chunks and non-section chunks for common symbols.
276  std::vector<Chunk *> chunks;
277
278  std::vector<SectionChunk *> resourceChunks;
279
280  // CodeView debug info sections.
281  std::vector<SectionChunk *> debugChunks;
282
283  // Chunks containing symbol table indices of exception handlers. Only used for
284  // 32-bit x86.
285  std::vector<SectionChunk *> sxDataChunks;
286
287  // Chunks containing symbol table indices of address taken symbols, address
288  // taken IAT entries, longjmp and ehcont targets. These are not linked into
289  // the final binary when /guard:cf is set.
290  std::vector<SectionChunk *> guardFidChunks;
291  std::vector<SectionChunk *> guardIATChunks;
292  std::vector<SectionChunk *> guardLJmpChunks;
293  std::vector<SectionChunk *> guardEHContChunks;
294
295  // This vector contains a list of all symbols defined or referenced by this
296  // file. They are indexed such that you can get a Symbol by symbol
297  // index. Nonexistent indices (which are occupied by auxiliary
298  // symbols in the real symbol table) are filled with null pointers.
299  std::vector<Symbol *> symbols;
300
301  // This vector contains the same chunks as Chunks, but they are
302  // indexed such that you can get a SectionChunk by section index.
303  // Nonexistent section indices are filled with null pointers.
304  // (Because section number is 1-based, the first slot is always a
305  // null pointer.) This vector is only valid during initialization.
306  std::vector<SectionChunk *> sparseChunks;
307
308  DWARFCache *dwarf = nullptr;
309};
310
311// This is a PDB type server dependency, that is not a input file per se, but
312// needs to be treated like one. Such files are discovered from the debug type
313// stream.
314class PDBInputFile : public InputFile {
315public:
316  explicit PDBInputFile(COFFLinkerContext &ctx, MemoryBufferRef m);
317  ~PDBInputFile();
318  static bool classof(const InputFile *f) { return f->kind() == PDBKind; }
319  void parse() override;
320
321  static PDBInputFile *findFromRecordPath(const COFFLinkerContext &ctx,
322                                          StringRef path, ObjFile *fromFile);
323
324  // Record possible errors while opening the PDB file
325  std::optional<std::string> loadErrorStr;
326
327  // This is the actual interface to the PDB (if it was opened successfully)
328  std::unique_ptr<llvm::pdb::NativeSession> session;
329
330  // If the PDB has a .debug$T stream, this tells how it will be handled.
331  TpiSource *debugTypesObj = nullptr;
332};
333
334// This type represents import library members that contain DLL names
335// and symbols exported from the DLLs. See Microsoft PE/COFF spec. 7
336// for details about the format.
337class ImportFile : public InputFile {
338public:
339  explicit ImportFile(COFFLinkerContext &ctx, MemoryBufferRef m);
340
341  static bool classof(const InputFile *f) { return f->kind() == ImportKind; }
342
343  Symbol *impSym = nullptr;
344  Symbol *thunkSym = nullptr;
345  std::string dllName;
346
347private:
348  void parse() override;
349
350public:
351  StringRef externalName;
352  const coff_import_header *hdr;
353  Chunk *location = nullptr;
354
355  // We want to eliminate dllimported symbols if no one actually refers to them.
356  // These "Live" bits are used to keep track of which import library members
357  // are actually in use.
358  //
359  // If the Live bit is turned off by MarkLive, Writer will ignore dllimported
360  // symbols provided by this import library member. We also track whether the
361  // imported symbol is used separately from whether the thunk is used in order
362  // to avoid creating unnecessary thunks.
363  bool live;
364  bool thunkLive;
365};
366
367// Used for LTO.
368class BitcodeFile : public InputFile {
369public:
370  explicit BitcodeFile(COFFLinkerContext &ctx, MemoryBufferRef mb,
371                       StringRef archiveName, uint64_t offsetInArchive,
372                       bool lazy);
373  ~BitcodeFile();
374  static bool classof(const InputFile *f) { return f->kind() == BitcodeKind; }
375  ArrayRef<Symbol *> getSymbols() { return symbols; }
376  MachineTypes getMachineType() override;
377  void parseLazy();
378  std::unique_ptr<llvm::lto::InputFile> obj;
379
380private:
381  void parse() override;
382
383  std::vector<Symbol *> symbols;
384};
385
386// .dll file. MinGW only.
387class DLLFile : public InputFile {
388public:
389  explicit DLLFile(COFFLinkerContext &ctx, MemoryBufferRef m)
390      : InputFile(ctx, DLLKind, m) {}
391  static bool classof(const InputFile *f) { return f->kind() == DLLKind; }
392  void parse() override;
393  MachineTypes getMachineType() override;
394
395  struct Symbol {
396    StringRef dllName;
397    StringRef symbolName;
398    llvm::COFF::ImportNameType nameType;
399    llvm::COFF::ImportType importType;
400  };
401
402  void makeImport(Symbol *s);
403
404private:
405  std::unique_ptr<COFFObjectFile> coffObj;
406  llvm::StringSet<> seen;
407};
408
409inline bool isBitcode(MemoryBufferRef mb) {
410  return identify_magic(mb.getBuffer()) == llvm::file_magic::bitcode;
411}
412
413std::string replaceThinLTOSuffix(StringRef path, StringRef suffix,
414                                 StringRef repl);
415} // namespace coff
416
417std::string toString(const coff::InputFile *file);
418} // namespace lld
419
420#endif
421