1//===- MarkLive.cpp -------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "MarkLive.h"
10#include "Config.h"
11#include "OutputSegment.h"
12#include "SymbolTable.h"
13#include "Symbols.h"
14#include "UnwindInfoSection.h"
15
16#include "lld/Common/ErrorHandler.h"
17#include "llvm/Support/TimeProfiler.h"
18
19#include "mach-o/compact_unwind_encoding.h"
20
21namespace lld::macho {
22
23using namespace llvm;
24using namespace llvm::MachO;
25
26struct WhyLiveEntry {
27  InputSection *isec;
28  // Keep track of the entry that caused us to mark `isec` as live.
29  const WhyLiveEntry *prev;
30
31  WhyLiveEntry(InputSection *isec, const WhyLiveEntry *prev)
32      : isec(isec), prev(prev) {}
33};
34
35// Type-erased interface to MarkLiveImpl. Used for adding roots to the liveness
36// graph.
37class MarkLive {
38public:
39  virtual void enqueue(InputSection *isec, uint64_t off) = 0;
40  virtual void addSym(Symbol *s) = 0;
41  virtual void markTransitively() = 0;
42  virtual ~MarkLive() = default;
43};
44
45template <bool RecordWhyLive> class MarkLiveImpl : public MarkLive {
46public:
47  // -why_live is a rarely used option, so we don't want support for that flag
48  // to slow down the main -dead_strip code path. As such, we employ templates
49  // to avoid the usage of WhyLiveEntry in the main code path. This saves us
50  // from needless allocations and pointer indirections.
51  using WorklistEntry =
52      std::conditional_t<RecordWhyLive, WhyLiveEntry, InputSection>;
53
54  void enqueue(InputSection *isec, uint64_t off) override {
55    enqueue(isec, off, nullptr);
56  }
57  void addSym(Symbol *s) override { addSym(s, nullptr); }
58  void markTransitively() override;
59
60private:
61  void enqueue(InputSection *isec, uint64_t off, const WorklistEntry *prev);
62  void addSym(Symbol *s, const WorklistEntry *prev);
63  const InputSection *getInputSection(const WorklistEntry *) const;
64  WorklistEntry *makeEntry(InputSection *, const WorklistEntry *prev) const;
65
66  // We build up a worklist of sections which have been marked as live. We
67  // only push into the worklist when we discover an unmarked section, and we
68  // mark as we push, so sections never appear twice in the list. Literal
69  // sections cannot contain references to other sections, so we only store
70  // ConcatInputSections in our worklist.
71  SmallVector<WorklistEntry *, 256> worklist;
72};
73
74template <bool RecordWhyLive>
75void MarkLiveImpl<RecordWhyLive>::enqueue(
76    InputSection *isec, uint64_t off,
77    const typename MarkLiveImpl<RecordWhyLive>::WorklistEntry *prev) {
78  if (isec->isLive(off))
79    return;
80  isec->markLive(off);
81  if (auto s = dyn_cast<ConcatInputSection>(isec)) {
82    assert(!s->isCoalescedWeak());
83    worklist.push_back(makeEntry(s, prev));
84  }
85}
86
87static void printWhyLive(const Symbol *s, const WhyLiveEntry *prev) {
88  std::string out = toString(*s) + " from " + toString(s->getFile());
89  int indent = 2;
90  for (const WhyLiveEntry *entry = prev; entry;
91       entry = entry->prev, indent += 2) {
92    const TinyPtrVector<Defined *> &symbols = entry->isec->symbols;
93    // With .subsections_with_symbols set, most isecs will have exactly one
94    // entry in their symbols vector, so we just print the first one.
95    if (!symbols.empty())
96      out += "\n" + std::string(indent, ' ') + toString(*symbols.front()) +
97             " from " + toString(symbols.front()->getFile());
98  }
99  message(out);
100}
101
102template <bool RecordWhyLive>
103void MarkLiveImpl<RecordWhyLive>::addSym(
104    Symbol *s,
105    const typename MarkLiveImpl<RecordWhyLive>::WorklistEntry *prev) {
106  if (s->used)
107    return;
108  s->used = true;
109  if constexpr (RecordWhyLive)
110    if (!config->whyLive.empty() && config->whyLive.match(s->getName()))
111      printWhyLive(s, prev);
112  if (auto *d = dyn_cast<Defined>(s)) {
113    if (d->isec)
114      enqueue(d->isec, d->value, prev);
115    if (d->unwindEntry)
116      enqueue(d->unwindEntry, 0, prev);
117  }
118}
119
120template <bool RecordWhyLive>
121const InputSection *MarkLiveImpl<RecordWhyLive>::getInputSection(
122    const MarkLiveImpl<RecordWhyLive>::WorklistEntry *entry) const {
123  if constexpr (RecordWhyLive)
124    return entry->isec;
125  else
126    return entry;
127}
128
129template <bool RecordWhyLive>
130typename MarkLiveImpl<RecordWhyLive>::WorklistEntry *
131MarkLiveImpl<RecordWhyLive>::makeEntry(
132    InputSection *isec,
133    const MarkLiveImpl<RecordWhyLive>::WorklistEntry *prev) const {
134  if constexpr (RecordWhyLive) {
135    if (!isec) {
136      assert(!prev);
137      return nullptr;
138    }
139    return make<WhyLiveEntry>(isec, prev);
140  } else {
141    return isec;
142  }
143}
144
145template <bool RecordWhyLive>
146void MarkLiveImpl<RecordWhyLive>::markTransitively() {
147  do {
148    // Mark things reachable from GC roots as live.
149    while (!worklist.empty()) {
150      WorklistEntry *entry = worklist.pop_back_val();
151      // Entries that get placed onto the worklist always contain
152      // ConcatInputSections. `WhyLiveEntry::prev` may point to entries that
153      // contain other types of InputSections (due to S_ATTR_LIVE_SUPPORT), but
154      // those entries should never be pushed onto the worklist.
155      auto *isec = cast<ConcatInputSection>(getInputSection(entry));
156      assert(isec->live && "We mark as live when pushing onto the worklist!");
157
158      // Mark all symbols listed in the relocation table for this section.
159      for (const Reloc &r : isec->relocs) {
160        if (auto *s = r.referent.dyn_cast<Symbol *>())
161          addSym(s, entry);
162        else
163          enqueue(r.referent.get<InputSection *>(), r.addend, entry);
164      }
165      for (Defined *d : getInputSection(entry)->symbols)
166        addSym(d, entry);
167    }
168
169    // S_ATTR_LIVE_SUPPORT sections are live if they point _to_ a live
170    // section. Process them in a second pass.
171    for (ConcatInputSection *isec : inputSections) {
172      // FIXME: Check if copying all S_ATTR_LIVE_SUPPORT sections into a
173      // separate vector and only walking that here is faster.
174      if (!(isec->getFlags() & S_ATTR_LIVE_SUPPORT) || isec->live)
175        continue;
176
177      for (const Reloc &r : isec->relocs) {
178        if (auto *s = r.referent.dyn_cast<Symbol *>()) {
179          if (s->isLive()) {
180            InputSection *referentIsec = nullptr;
181            if (auto *d = dyn_cast<Defined>(s))
182              referentIsec = d->isec;
183            enqueue(isec, 0, makeEntry(referentIsec, nullptr));
184          }
185        } else {
186          auto *referentIsec = r.referent.get<InputSection *>();
187          if (referentIsec->isLive(r.addend))
188            enqueue(isec, 0, makeEntry(referentIsec, nullptr));
189        }
190      }
191    }
192
193    // S_ATTR_LIVE_SUPPORT could have marked additional sections live,
194    // which in turn could mark additional S_ATTR_LIVE_SUPPORT sections live.
195    // Iterate. In practice, the second iteration won't mark additional
196    // S_ATTR_LIVE_SUPPORT sections live.
197  } while (!worklist.empty());
198}
199
200// Set live bit on for each reachable chunk. Unmarked (unreachable)
201// InputSections will be ignored by Writer, so they will be excluded
202// from the final output.
203void markLive() {
204  TimeTraceScope timeScope("markLive");
205  MarkLive *marker;
206  if (config->whyLive.empty())
207    marker = make<MarkLiveImpl<false>>();
208  else
209    marker = make<MarkLiveImpl<true>>();
210  // Add GC roots.
211  if (config->entry)
212    marker->addSym(config->entry);
213  for (Symbol *sym : symtab->getSymbols()) {
214    if (auto *defined = dyn_cast<Defined>(sym)) {
215      // -exported_symbol(s_list)
216      if (!config->exportedSymbols.empty() &&
217          config->exportedSymbols.match(defined->getName())) {
218        // NOTE: Even though exporting private externs is an ill-defined
219        // operation, we are purposely not checking for privateExtern in
220        // order to follow ld64's behavior of treating all exported private
221        // extern symbols as live, irrespective of whether they are autohide.
222        marker->addSym(defined);
223        continue;
224      }
225
226      // public symbols explicitly marked .no_dead_strip
227      if (defined->referencedDynamically || defined->noDeadStrip) {
228        marker->addSym(defined);
229        continue;
230      }
231
232      // FIXME: When we implement these flags, make symbols from them GC
233      // roots:
234      // * -reexported_symbol(s_list)
235      // * -alias_list
236      // * -init
237
238      // In dylibs and bundles and in executables with -export_dynamic,
239      // all external functions are GC roots.
240      bool externsAreRoots =
241          config->outputType != MH_EXECUTE || config->exportDynamic;
242      if (externsAreRoots && !defined->privateExtern) {
243        marker->addSym(defined);
244        continue;
245      }
246    }
247  }
248  // -u symbols
249  for (Symbol *sym : config->explicitUndefineds)
250    marker->addSym(sym);
251  // local symbols explicitly marked .no_dead_strip
252  for (const InputFile *file : inputFiles)
253    if (auto *objFile = dyn_cast<ObjFile>(file))
254      for (Symbol *sym : objFile->symbols)
255        if (auto *defined = dyn_cast_or_null<Defined>(sym))
256          if (!defined->isExternal() && defined->noDeadStrip)
257            marker->addSym(defined);
258  if (auto *stubBinder =
259          dyn_cast_or_null<DylibSymbol>(symtab->find("dyld_stub_binder")))
260    marker->addSym(stubBinder);
261  for (ConcatInputSection *isec : inputSections) {
262    // Sections marked no_dead_strip
263    if (isec->getFlags() & S_ATTR_NO_DEAD_STRIP) {
264      marker->enqueue(isec, 0);
265      continue;
266    }
267
268    // mod_init_funcs, mod_term_funcs sections
269    if (sectionType(isec->getFlags()) == S_MOD_INIT_FUNC_POINTERS ||
270        sectionType(isec->getFlags()) == S_MOD_TERM_FUNC_POINTERS) {
271      assert(!config->emitInitOffsets ||
272             sectionType(isec->getFlags()) != S_MOD_INIT_FUNC_POINTERS);
273      marker->enqueue(isec, 0);
274      continue;
275    }
276  }
277
278  for (ConcatInputSection *isec : in.initOffsets->inputs())
279    marker->enqueue(isec, 0);
280
281  marker->markTransitively();
282}
283
284} // namespace lld::macho
285