1//===- InputSection.h -------------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLD_MACHO_INPUT_SECTION_H
10#define LLD_MACHO_INPUT_SECTION_H
11
12#include "Config.h"
13#include "Relocations.h"
14#include "Symbols.h"
15
16#include "lld/Common/LLVM.h"
17#include "lld/Common/Memory.h"
18#include "llvm/ADT/ArrayRef.h"
19#include "llvm/ADT/BitVector.h"
20#include "llvm/ADT/CachedHashString.h"
21#include "llvm/ADT/TinyPtrVector.h"
22#include "llvm/BinaryFormat/MachO.h"
23
24namespace lld {
25namespace macho {
26
27class InputFile;
28class OutputSection;
29
30class InputSection {
31public:
32  enum Kind : uint8_t {
33    ConcatKind,
34    CStringLiteralKind,
35    WordLiteralKind,
36  };
37
38  Kind kind() const { return sectionKind; }
39  virtual ~InputSection() = default;
40  virtual uint64_t getSize() const { return data.size(); }
41  virtual bool empty() const { return data.empty(); }
42  InputFile *getFile() const { return section.file; }
43  StringRef getName() const { return section.name; }
44  StringRef getSegName() const { return section.segname; }
45  uint32_t getFlags() const { return section.flags; }
46  uint64_t getFileSize() const;
47  // Translates \p off -- an offset relative to this InputSection -- into an
48  // offset from the beginning of its parent OutputSection.
49  virtual uint64_t getOffset(uint64_t off) const = 0;
50  // The offset from the beginning of the file.
51  uint64_t getVA(uint64_t off) const;
52  // Return a user-friendly string for use in diagnostics.
53  // Format: /path/to/object.o:(symbol _func+0x123)
54  std::string getLocation(uint64_t off) const;
55  // Return the source line corresponding to an address, or the empty string.
56  // Format: Source.cpp:123 (/path/to/Source.cpp:123)
57  std::string getSourceLocation(uint64_t off) const;
58  // Return the relocation at \p off, if it exists. This does a linear search.
59  const Reloc *getRelocAt(uint32_t off) const;
60  // Whether the data at \p off in this InputSection is live.
61  virtual bool isLive(uint64_t off) const = 0;
62  virtual void markLive(uint64_t off) = 0;
63  virtual InputSection *canonical() { return this; }
64  virtual const InputSection *canonical() const { return this; }
65
66protected:
67  InputSection(Kind kind, const Section &section, ArrayRef<uint8_t> data,
68               uint32_t align)
69      : sectionKind(kind), keepUnique(false), hasAltEntry(false), align(align),
70        data(data), section(section) {}
71
72  InputSection(const InputSection &rhs)
73      : sectionKind(rhs.sectionKind), keepUnique(false), hasAltEntry(false),
74        align(rhs.align), data(rhs.data), section(rhs.section) {}
75
76  Kind sectionKind;
77
78public:
79  // is address assigned?
80  bool isFinal = false;
81  // keep the address of the symbol(s) in this section unique in the final
82  // binary ?
83  bool keepUnique : 1;
84  // Does this section have symbols at offsets other than zero? (NOTE: only
85  // applies to ConcatInputSections.)
86  bool hasAltEntry : 1;
87  uint32_t align = 1;
88
89  OutputSection *parent = nullptr;
90  ArrayRef<uint8_t> data;
91  std::vector<Reloc> relocs;
92  // The symbols that belong to this InputSection, sorted by value. With
93  // .subsections_via_symbols, there is typically only one element here.
94  llvm::TinyPtrVector<Defined *> symbols;
95
96protected:
97  const Section &section;
98
99  const Defined *getContainingSymbol(uint64_t off) const;
100};
101
102// ConcatInputSections are combined into (Concat)OutputSections through simple
103// concatenation, in contrast with literal sections which may have their
104// contents merged before output.
105class ConcatInputSection final : public InputSection {
106public:
107  ConcatInputSection(const Section &section, ArrayRef<uint8_t> data,
108                     uint32_t align = 1)
109      : InputSection(ConcatKind, section, data, align) {}
110
111  uint64_t getOffset(uint64_t off) const override { return outSecOff + off; }
112  uint64_t getVA() const { return InputSection::getVA(0); }
113  // ConcatInputSections are entirely live or dead, so the offset is irrelevant.
114  bool isLive(uint64_t off) const override { return live; }
115  void markLive(uint64_t off) override { live = true; }
116  bool isCoalescedWeak() const { return wasCoalesced && symbols.empty(); }
117  bool shouldOmitFromOutput() const { return !live || isCoalescedWeak(); }
118  void writeTo(uint8_t *buf);
119
120  void foldIdentical(ConcatInputSection *redundant);
121  ConcatInputSection *canonical() override {
122    return replacement ? replacement : this;
123  }
124  const InputSection *canonical() const override {
125    return replacement ? replacement : this;
126  }
127
128  static bool classof(const InputSection *isec) {
129    return isec->kind() == ConcatKind;
130  }
131
132  // Points to the surviving section after this one is folded by ICF
133  ConcatInputSection *replacement = nullptr;
134  // Equivalence-class ID for ICF
135  uint32_t icfEqClass[2] = {0, 0};
136
137  // With subsections_via_symbols, most symbols have their own InputSection,
138  // and for weak symbols (e.g. from inline functions), only the
139  // InputSection from one translation unit will make it to the output,
140  // while all copies in other translation units are coalesced into the
141  // first and not copied to the output.
142  bool wasCoalesced = false;
143  bool live = !config->deadStrip;
144  bool hasCallSites = false;
145  // This variable has two usages. Initially, it represents the input order.
146  // After assignAddresses is called, it represents the offset from the
147  // beginning of the output section this section was assigned to.
148  uint64_t outSecOff = 0;
149};
150
151// Initialize a fake InputSection that does not belong to any InputFile.
152ConcatInputSection *makeSyntheticInputSection(StringRef segName,
153                                              StringRef sectName,
154                                              uint32_t flags = 0,
155                                              ArrayRef<uint8_t> data = {},
156                                              uint32_t align = 1);
157
158// Helper functions to make it easy to sprinkle asserts.
159
160inline bool shouldOmitFromOutput(InputSection *isec) {
161  return isa<ConcatInputSection>(isec) &&
162         cast<ConcatInputSection>(isec)->shouldOmitFromOutput();
163}
164
165inline bool isCoalescedWeak(InputSection *isec) {
166  return isa<ConcatInputSection>(isec) &&
167         cast<ConcatInputSection>(isec)->isCoalescedWeak();
168}
169
170// We allocate a lot of these and binary search on them, so they should be as
171// compact as possible. Hence the use of 31 rather than 64 bits for the hash.
172struct StringPiece {
173  // Offset from the start of the containing input section.
174  uint32_t inSecOff;
175  uint32_t live : 1;
176  // Only set if deduplicating literals
177  uint32_t hash : 31;
178  // Offset from the start of the containing output section.
179  uint64_t outSecOff = 0;
180
181  StringPiece(uint64_t off, uint32_t hash)
182      : inSecOff(off), live(!config->deadStrip), hash(hash) {}
183};
184
185static_assert(sizeof(StringPiece) == 16, "StringPiece is too big!");
186
187// CStringInputSections are composed of multiple null-terminated string
188// literals, which we represent using StringPieces. These literals can be
189// deduplicated and tail-merged, so translating offsets between the input and
190// outputs sections is more complicated.
191//
192// NOTE: One significant difference between LLD and ld64 is that we merge all
193// cstring literals, even those referenced directly by non-private symbols.
194// ld64 is more conservative and does not do that. This was mostly done for
195// implementation simplicity; if we find programs that need the more
196// conservative behavior we can certainly implement that.
197class CStringInputSection final : public InputSection {
198public:
199  CStringInputSection(const Section &section, ArrayRef<uint8_t> data,
200                      uint32_t align, bool dedupLiterals)
201      : InputSection(CStringLiteralKind, section, data, align),
202        deduplicateLiterals(dedupLiterals) {}
203
204  uint64_t getOffset(uint64_t off) const override;
205  bool isLive(uint64_t off) const override { return getStringPiece(off).live; }
206  void markLive(uint64_t off) override { getStringPiece(off).live = true; }
207  // Find the StringPiece that contains this offset.
208  StringPiece &getStringPiece(uint64_t off);
209  const StringPiece &getStringPiece(uint64_t off) const;
210  // Split at each null byte.
211  void splitIntoPieces();
212
213  LLVM_ATTRIBUTE_ALWAYS_INLINE
214  StringRef getStringRef(size_t i) const {
215    size_t begin = pieces[i].inSecOff;
216    // The endpoint should be *at* the null terminator, not after. This matches
217    // the behavior of StringRef(const char *Str).
218    size_t end =
219        ((pieces.size() - 1 == i) ? data.size() : pieces[i + 1].inSecOff) - 1;
220    return toStringRef(data.slice(begin, end - begin));
221  }
222
223  StringRef getStringRefAtOffset(uint64_t off) const {
224    return getStringRef(getStringPieceIndex(off));
225  }
226
227  // Returns i'th piece as a CachedHashStringRef. This function is very hot when
228  // string merging is enabled, so we want to inline.
229  LLVM_ATTRIBUTE_ALWAYS_INLINE
230  llvm::CachedHashStringRef getCachedHashStringRef(size_t i) const {
231    assert(deduplicateLiterals);
232    return {getStringRef(i), pieces[i].hash};
233  }
234
235  static bool classof(const InputSection *isec) {
236    return isec->kind() == CStringLiteralKind;
237  }
238
239  bool deduplicateLiterals = false;
240  std::vector<StringPiece> pieces;
241
242private:
243  size_t getStringPieceIndex(uint64_t off) const;
244};
245
246class WordLiteralInputSection final : public InputSection {
247public:
248  WordLiteralInputSection(const Section &section, ArrayRef<uint8_t> data,
249                          uint32_t align);
250  uint64_t getOffset(uint64_t off) const override;
251  bool isLive(uint64_t off) const override {
252    return live[off >> power2LiteralSize];
253  }
254  void markLive(uint64_t off) override {
255    live[off >> power2LiteralSize] = true;
256  }
257
258  static bool classof(const InputSection *isec) {
259    return isec->kind() == WordLiteralKind;
260  }
261
262private:
263  unsigned power2LiteralSize;
264  // The liveness of data[off] is tracked by live[off >> power2LiteralSize].
265  llvm::BitVector live;
266};
267
268inline uint8_t sectionType(uint32_t flags) {
269  return flags & llvm::MachO::SECTION_TYPE;
270}
271
272inline bool isZeroFill(uint32_t flags) {
273  return llvm::MachO::isVirtualSection(sectionType(flags));
274}
275
276inline bool isThreadLocalVariables(uint32_t flags) {
277  return sectionType(flags) == llvm::MachO::S_THREAD_LOCAL_VARIABLES;
278}
279
280// These sections contain the data for initializing thread-local variables.
281inline bool isThreadLocalData(uint32_t flags) {
282  return sectionType(flags) == llvm::MachO::S_THREAD_LOCAL_REGULAR ||
283         sectionType(flags) == llvm::MachO::S_THREAD_LOCAL_ZEROFILL;
284}
285
286inline bool isDebugSection(uint32_t flags) {
287  return (flags & llvm::MachO::SECTION_ATTRIBUTES_USR) ==
288         llvm::MachO::S_ATTR_DEBUG;
289}
290
291inline bool isWordLiteralSection(uint32_t flags) {
292  return sectionType(flags) == llvm::MachO::S_4BYTE_LITERALS ||
293         sectionType(flags) == llvm::MachO::S_8BYTE_LITERALS ||
294         sectionType(flags) == llvm::MachO::S_16BYTE_LITERALS;
295}
296
297bool isCodeSection(const InputSection *);
298bool isCfStringSection(const InputSection *);
299bool isClassRefsSection(const InputSection *);
300bool isSelRefsSection(const InputSection *);
301bool isEhFrameSection(const InputSection *);
302bool isGccExceptTabSection(const InputSection *);
303
304extern std::vector<ConcatInputSection *> inputSections;
305
306namespace section_names {
307
308constexpr const char authGot[] = "__auth_got";
309constexpr const char authPtr[] = "__auth_ptr";
310constexpr const char binding[] = "__binding";
311constexpr const char bitcodeBundle[] = "__bundle";
312constexpr const char cString[] = "__cstring";
313constexpr const char cfString[] = "__cfstring";
314constexpr const char cgProfile[] = "__cg_profile";
315constexpr const char chainFixups[] = "__chainfixups";
316constexpr const char codeSignature[] = "__code_signature";
317constexpr const char common[] = "__common";
318constexpr const char compactUnwind[] = "__compact_unwind";
319constexpr const char data[] = "__data";
320constexpr const char debugAbbrev[] = "__debug_abbrev";
321constexpr const char debugInfo[] = "__debug_info";
322constexpr const char debugLine[] = "__debug_line";
323constexpr const char debugStr[] = "__debug_str";
324constexpr const char debugStrOffs[] = "__debug_str_offs";
325constexpr const char ehFrame[] = "__eh_frame";
326constexpr const char gccExceptTab[] = "__gcc_except_tab";
327constexpr const char export_[] = "__export";
328constexpr const char dataInCode[] = "__data_in_code";
329constexpr const char functionStarts[] = "__func_starts";
330constexpr const char got[] = "__got";
331constexpr const char header[] = "__mach_header";
332constexpr const char indirectSymbolTable[] = "__ind_sym_tab";
333constexpr const char initOffsets[] = "__init_offsets";
334constexpr const char const_[] = "__const";
335constexpr const char lazySymbolPtr[] = "__la_symbol_ptr";
336constexpr const char lazyBinding[] = "__lazy_binding";
337constexpr const char literals[] = "__literals";
338constexpr const char moduleInitFunc[] = "__mod_init_func";
339constexpr const char moduleTermFunc[] = "__mod_term_func";
340constexpr const char nonLazySymbolPtr[] = "__nl_symbol_ptr";
341constexpr const char objcCatList[] = "__objc_catlist";
342constexpr const char objcClassList[] = "__objc_classlist";
343constexpr const char objcClassRefs[] = "__objc_classrefs";
344constexpr const char objcConst[] = "__objc_const";
345constexpr const char objCImageInfo[] = "__objc_imageinfo";
346constexpr const char objcStubs[] = "__objc_stubs";
347constexpr const char objcSelrefs[] = "__objc_selrefs";
348constexpr const char objcMethname[] = "__objc_methname";
349constexpr const char objcNonLazyCatList[] = "__objc_nlcatlist";
350constexpr const char objcNonLazyClassList[] = "__objc_nlclslist";
351constexpr const char objcProtoList[] = "__objc_protolist";
352constexpr const char pageZero[] = "__pagezero";
353constexpr const char pointers[] = "__pointers";
354constexpr const char rebase[] = "__rebase";
355constexpr const char staticInit[] = "__StaticInit";
356constexpr const char stringTable[] = "__string_table";
357constexpr const char stubHelper[] = "__stub_helper";
358constexpr const char stubs[] = "__stubs";
359constexpr const char swift[] = "__swift";
360constexpr const char symbolTable[] = "__symbol_table";
361constexpr const char textCoalNt[] = "__textcoal_nt";
362constexpr const char text[] = "__text";
363constexpr const char threadPtrs[] = "__thread_ptrs";
364constexpr const char threadVars[] = "__thread_vars";
365constexpr const char unwindInfo[] = "__unwind_info";
366constexpr const char weakBinding[] = "__weak_binding";
367constexpr const char zeroFill[] = "__zerofill";
368constexpr const char addrSig[] = "__llvm_addrsig";
369
370} // namespace section_names
371
372} // namespace macho
373
374std::string toString(const macho::InputSection *);
375
376} // namespace lld
377
378#endif
379