1//===- Symbols.h ------------------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLD_MACHO_SYMBOLS_H
10#define LLD_MACHO_SYMBOLS_H
11
12#include "Config.h"
13#include "InputFiles.h"
14#include "Target.h"
15
16#include "llvm/Object/Archive.h"
17#include "llvm/Support/MathExtras.h"
18
19namespace lld {
20namespace macho {
21
22class MachHeaderSection;
23
24struct StringRefZ {
25  StringRefZ(const char *s) : data(s), size(-1) {}
26  StringRefZ(StringRef s) : data(s.data()), size(s.size()) {}
27
28  const char *data;
29  const uint32_t size;
30};
31
32class Symbol {
33public:
34  enum Kind {
35    DefinedKind,
36    UndefinedKind,
37    CommonKind,
38    DylibKind,
39    LazyArchiveKind,
40    LazyObjectKind,
41    AliasKind,
42  };
43
44  virtual ~Symbol() {}
45
46  Kind kind() const { return symbolKind; }
47
48  StringRef getName() const {
49    if (nameSize == (uint32_t)-1)
50      nameSize = strlen(nameData);
51    return {nameData, nameSize};
52  }
53
54  bool isLive() const { return used; }
55  bool isLazy() const {
56    return symbolKind == LazyArchiveKind || symbolKind == LazyObjectKind;
57  }
58
59  virtual uint64_t getVA() const { return 0; }
60
61  virtual bool isWeakDef() const { return false; }
62
63  // Only undefined or dylib symbols can be weak references. A weak reference
64  // need not be satisfied at runtime, e.g. due to the symbol not being
65  // available on a given target platform.
66  virtual bool isWeakRef() const { return false; }
67
68  virtual bool isTlv() const { return false; }
69
70  // Whether this symbol is in the GOT or TLVPointer sections.
71  bool isInGot() const { return gotIndex != UINT32_MAX; }
72
73  // Whether this symbol is in the StubsSection.
74  bool isInStubs() const { return stubsIndex != UINT32_MAX; }
75
76  uint64_t getStubVA() const;
77  uint64_t getLazyPtrVA() const;
78  uint64_t getGotVA() const;
79  uint64_t getTlvVA() const;
80  uint64_t resolveBranchVA() const {
81    assert(isa<Defined>(this) || isa<DylibSymbol>(this));
82    return isInStubs() ? getStubVA() : getVA();
83  }
84  uint64_t resolveGotVA() const { return isInGot() ? getGotVA() : getVA(); }
85  uint64_t resolveTlvVA() const { return isInGot() ? getTlvVA() : getVA(); }
86
87  // The index of this symbol in the GOT or the TLVPointer section, depending
88  // on whether it is a thread-local. A given symbol cannot be referenced by
89  // both these sections at once.
90  uint32_t gotIndex = UINT32_MAX;
91  uint32_t lazyBindOffset = UINT32_MAX;
92  uint32_t stubsHelperIndex = UINT32_MAX;
93  uint32_t stubsIndex = UINT32_MAX;
94  uint32_t symtabIndex = UINT32_MAX;
95
96  InputFile *getFile() const { return file; }
97
98protected:
99  Symbol(Kind k, StringRefZ name, InputFile *file)
100      : symbolKind(k), nameData(name.data), file(file), nameSize(name.size),
101        isUsedInRegularObj(!file || isa<ObjFile>(file)),
102        used(!config->deadStrip) {}
103
104  Kind symbolKind;
105  const char *nameData;
106  InputFile *file;
107  mutable uint32_t nameSize;
108
109public:
110  // True if this symbol was referenced by a regular (non-bitcode) object.
111  bool isUsedInRegularObj : 1;
112
113  // True if this symbol is used from a live section.
114  bool used : 1;
115};
116
117class Defined : public Symbol {
118public:
119  Defined(StringRefZ name, InputFile *file, InputSection *isec, uint64_t value,
120          uint64_t size, bool isWeakDef, bool isExternal, bool isPrivateExtern,
121          bool includeInSymtab, bool isReferencedDynamically, bool noDeadStrip,
122          bool canOverrideWeakDef = false, bool isWeakDefCanBeHidden = false,
123          bool interposable = false);
124
125  bool isWeakDef() const override { return weakDef; }
126  bool isExternalWeakDef() const {
127    return isWeakDef() && isExternal() && !privateExtern;
128  }
129  bool isTlv() const override;
130
131  bool isExternal() const { return external; }
132  bool isAbsolute() const { return isec == nullptr; }
133
134  uint64_t getVA() const override;
135
136  // Returns the object file that this symbol was defined in. This value differs
137  // from `getFile()` if the symbol originated from a bitcode file.
138  ObjFile *getObjectFile() const;
139
140  std::string getSourceLocation();
141
142  // Ensure this symbol's pointers to InputSections point to their canonical
143  // copies.
144  void canonicalize();
145
146  static bool classof(const Symbol *s) { return s->kind() == DefinedKind; }
147
148  // Place the bitfields first so that they can get placed in the tail padding
149  // of the parent class, on platforms which support it.
150  bool overridesWeakDef : 1;
151  // Whether this symbol should appear in the output binary's export trie.
152  bool privateExtern : 1;
153  // Whether this symbol should appear in the output symbol table.
154  bool includeInSymtab : 1;
155  // Whether this symbol was folded into a different symbol during ICF.
156  bool wasIdenticalCodeFolded : 1;
157  // Symbols marked referencedDynamically won't be removed from the output's
158  // symbol table by tools like strip. In theory, this could be set on arbitrary
159  // symbols in input object files. In practice, it's used solely for the
160  // synthetic __mh_execute_header symbol.
161  // This is information for the static linker, and it's also written to the
162  // output file's symbol table for tools running later (such as `strip`).
163  bool referencedDynamically : 1;
164  // Set on symbols that should not be removed by dead code stripping.
165  // Set for example on `__attribute__((used))` globals, or on some Objective-C
166  // metadata. This is information only for the static linker and not written
167  // to the output.
168  bool noDeadStrip : 1;
169  // Whether references to this symbol can be interposed at runtime to point to
170  // a different symbol definition (with the same name). For example, if both
171  // dylib A and B define an interposable symbol _foo, and we load A before B at
172  // runtime, then all references to _foo within dylib B will point to the
173  // definition in dylib A.
174  //
175  // Only extern symbols may be interposable.
176  bool interposable : 1;
177
178  bool weakDefCanBeHidden : 1;
179
180private:
181  const bool weakDef : 1;
182  const bool external : 1;
183
184public:
185  InputSection *isec;
186  // Contains the offset from the containing subsection. Note that this is
187  // different from nlist::n_value, which is the absolute address of the symbol.
188  uint64_t value;
189  // size is only calculated for regular (non-bitcode) symbols.
190  uint64_t size;
191  // This can be a subsection of either __compact_unwind or __eh_frame.
192  ConcatInputSection *unwindEntry = nullptr;
193};
194
195// This enum does double-duty: as a symbol property, it indicates whether & how
196// a dylib symbol is referenced. As a DylibFile property, it indicates the kind
197// of referenced symbols contained within the file. If there are both weak
198// and strong references to the same file, we will count the file as
199// strongly-referenced.
200enum class RefState : uint8_t { Unreferenced = 0, Weak = 1, Strong = 2 };
201
202class Undefined : public Symbol {
203public:
204  Undefined(StringRefZ name, InputFile *file, RefState refState,
205            bool wasBitcodeSymbol)
206      : Symbol(UndefinedKind, name, file), refState(refState),
207        wasBitcodeSymbol(wasBitcodeSymbol) {
208    assert(refState != RefState::Unreferenced);
209  }
210
211  bool isWeakRef() const override { return refState == RefState::Weak; }
212
213  static bool classof(const Symbol *s) { return s->kind() == UndefinedKind; }
214
215  RefState refState : 2;
216  bool wasBitcodeSymbol;
217};
218
219// On Unix, it is traditionally allowed to write variable definitions without
220// initialization expressions (such as "int foo;") to header files. These are
221// called tentative definitions.
222//
223// Using tentative definitions is usually considered a bad practice; you should
224// write only declarations (such as "extern int foo;") to header files.
225// Nevertheless, the linker and the compiler have to do something to support
226// bad code by allowing duplicate definitions for this particular case.
227//
228// The compiler creates common symbols when it sees tentative definitions.
229// (You can suppress this behavior and let the compiler create a regular
230// defined symbol by passing -fno-common. -fno-common is the default in clang
231// as of LLVM 11.0.) When linking the final binary, if there are remaining
232// common symbols after name resolution is complete, the linker converts them
233// to regular defined symbols in a __common section.
234class CommonSymbol : public Symbol {
235public:
236  CommonSymbol(StringRefZ name, InputFile *file, uint64_t size, uint32_t align,
237               bool isPrivateExtern)
238      : Symbol(CommonKind, name, file), size(size),
239        align(align != 1 ? align : llvm::PowerOf2Ceil(size)),
240        privateExtern(isPrivateExtern) {
241    // TODO: cap maximum alignment
242  }
243
244  static bool classof(const Symbol *s) { return s->kind() == CommonKind; }
245
246  const uint64_t size;
247  const uint32_t align;
248  const bool privateExtern;
249};
250
251class DylibSymbol : public Symbol {
252public:
253  DylibSymbol(DylibFile *file, StringRefZ name, bool isWeakDef,
254              RefState refState, bool isTlv)
255      : Symbol(DylibKind, name, file), shouldReexport(false),
256        refState(refState), weakDef(isWeakDef), tlv(isTlv) {
257    if (file && refState > RefState::Unreferenced)
258      file->numReferencedSymbols++;
259  }
260
261  uint64_t getVA() const override;
262  bool isWeakDef() const override { return weakDef; }
263
264  // Symbols from weak libraries/frameworks are also weakly-referenced.
265  bool isWeakRef() const override {
266    return refState == RefState::Weak ||
267           (file && getFile()->umbrella->forceWeakImport);
268  }
269  bool isReferenced() const { return refState != RefState::Unreferenced; }
270  bool isTlv() const override { return tlv; }
271  bool isDynamicLookup() const { return file == nullptr; }
272  bool hasStubsHelper() const { return stubsHelperIndex != UINT32_MAX; }
273
274  DylibFile *getFile() const {
275    assert(!isDynamicLookup());
276    return cast<DylibFile>(file);
277  }
278
279  static bool classof(const Symbol *s) { return s->kind() == DylibKind; }
280
281  RefState getRefState() const { return refState; }
282
283  void reference(RefState newState) {
284    assert(newState > RefState::Unreferenced);
285    if (refState == RefState::Unreferenced && file)
286      getFile()->numReferencedSymbols++;
287    refState = std::max(refState, newState);
288  }
289
290  void unreference() {
291    // dynamic_lookup symbols have no file.
292    if (refState > RefState::Unreferenced && file) {
293      assert(getFile()->numReferencedSymbols > 0);
294      getFile()->numReferencedSymbols--;
295    }
296  }
297
298  bool shouldReexport : 1;
299private:
300  RefState refState : 2;
301  const bool weakDef : 1;
302  const bool tlv : 1;
303};
304
305class LazyArchive : public Symbol {
306public:
307  LazyArchive(ArchiveFile *file, const llvm::object::Archive::Symbol &sym)
308      : Symbol(LazyArchiveKind, sym.getName(), file), sym(sym) {}
309
310  ArchiveFile *getFile() const { return cast<ArchiveFile>(file); }
311  void fetchArchiveMember();
312
313  static bool classof(const Symbol *s) { return s->kind() == LazyArchiveKind; }
314
315private:
316  const llvm::object::Archive::Symbol sym;
317};
318
319// A defined symbol in an ObjFile/BitcodeFile surrounded by --start-lib and
320// --end-lib.
321class LazyObject : public Symbol {
322public:
323  LazyObject(InputFile &file, StringRef name)
324      : Symbol(LazyObjectKind, name, &file) {
325    isUsedInRegularObj = false;
326  }
327
328  static bool classof(const Symbol *s) { return s->kind() == LazyObjectKind; }
329};
330
331// Represents N_INDR symbols. Note that if we are given valid, linkable inputs,
332// then all AliasSymbol instances will be converted into one of the other Symbol
333// types after `createAliases()` runs.
334class AliasSymbol final : public Symbol {
335public:
336  AliasSymbol(InputFile *file, StringRef name, StringRef aliasedName,
337              bool isPrivateExtern)
338      : Symbol(AliasKind, name, file), privateExtern(isPrivateExtern),
339        aliasedName(aliasedName) {}
340
341  StringRef getAliasedName() const { return aliasedName; }
342
343  static bool classof(const Symbol *s) { return s->kind() == AliasKind; }
344
345  const bool privateExtern;
346
347private:
348  StringRef aliasedName;
349};
350
351union SymbolUnion {
352  alignas(Defined) char a[sizeof(Defined)];
353  alignas(Undefined) char b[sizeof(Undefined)];
354  alignas(CommonSymbol) char c[sizeof(CommonSymbol)];
355  alignas(DylibSymbol) char d[sizeof(DylibSymbol)];
356  alignas(LazyArchive) char e[sizeof(LazyArchive)];
357  alignas(LazyObject) char f[sizeof(LazyObject)];
358  alignas(AliasSymbol) char g[sizeof(AliasSymbol)];
359};
360
361template <typename T, typename... ArgT>
362T *replaceSymbol(Symbol *s, ArgT &&...arg) {
363  static_assert(sizeof(T) <= sizeof(SymbolUnion), "SymbolUnion too small");
364  static_assert(alignof(T) <= alignof(SymbolUnion),
365                "SymbolUnion not aligned enough");
366  assert(static_cast<Symbol *>(static_cast<T *>(nullptr)) == nullptr &&
367         "Not a Symbol");
368
369  bool isUsedInRegularObj = s->isUsedInRegularObj;
370  bool used = s->used;
371  T *sym = new (s) T(std::forward<ArgT>(arg)...);
372  sym->isUsedInRegularObj |= isUsedInRegularObj;
373  sym->used |= used;
374  return sym;
375}
376
377// Can a symbol's address only be resolved at runtime?
378inline bool needsBinding(const Symbol *sym) {
379  if (isa<DylibSymbol>(sym))
380    return true;
381  if (const auto *defined = dyn_cast<Defined>(sym))
382    return defined->isExternalWeakDef() || defined->interposable;
383  return false;
384}
385
386// Symbols with `l` or `L` as a prefix are linker-private and never appear in
387// the output.
388inline bool isPrivateLabel(StringRef name) {
389  return name.starts_with("l") || name.starts_with("L");
390}
391} // namespace macho
392
393std::string toString(const macho::Symbol &);
394std::string toMachOString(const llvm::object::Archive::Symbol &);
395
396} // namespace lld
397
398#endif
399