File.h revision 360784
1//===- lib/ReaderWriter/MachO/File.h ----------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLD_READER_WRITER_MACHO_FILE_H
10#define LLD_READER_WRITER_MACHO_FILE_H
11
12#include "Atoms.h"
13#include "DebugInfo.h"
14#include "MachONormalizedFile.h"
15#include "lld/Core/SharedLibraryFile.h"
16#include "lld/Core/Simple.h"
17#include "llvm/ADT/DenseMap.h"
18#include "llvm/ADT/StringMap.h"
19#include "llvm/Support/Format.h"
20#include <unordered_map>
21
22namespace lld {
23namespace mach_o {
24
25using lld::mach_o::normalized::Section;
26
27class MachOFile : public SimpleFile {
28public:
29
30  /// Real file constructor - for on-disk files.
31  MachOFile(std::unique_ptr<MemoryBuffer> mb, MachOLinkingContext *ctx)
32    : SimpleFile(mb->getBufferIdentifier(), File::kindMachObject),
33      _mb(std::move(mb)), _ctx(ctx) {}
34
35  /// Dummy file constructor - for virtual files.
36  MachOFile(StringRef path)
37    : SimpleFile(path, File::kindMachObject) {}
38
39  void addDefinedAtom(StringRef name, Atom::Scope scope,
40                      DefinedAtom::ContentType type, DefinedAtom::Merge merge,
41                      uint64_t sectionOffset, uint64_t contentSize, bool thumb,
42                      bool noDeadStrip, bool copyRefs,
43                      const Section *inSection) {
44    assert(sectionOffset+contentSize <= inSection->content.size());
45    ArrayRef<uint8_t> content = inSection->content.slice(sectionOffset,
46                                                        contentSize);
47    if (copyRefs) {
48      // Make a copy of the atom's name and content that is owned by this file.
49      name = name.copy(allocator());
50      content = content.copy(allocator());
51    }
52    DefinedAtom::Alignment align(
53        inSection->alignment,
54        sectionOffset % inSection->alignment);
55    auto *atom =
56        new (allocator()) MachODefinedAtom(*this, name, scope, type, merge,
57                                           thumb, noDeadStrip, content, align);
58    addAtomForSection(inSection, atom, sectionOffset);
59  }
60
61  void addDefinedAtomInCustomSection(StringRef name, Atom::Scope scope,
62                      DefinedAtom::ContentType type, DefinedAtom::Merge merge,
63                      bool thumb, bool noDeadStrip, uint64_t sectionOffset,
64                      uint64_t contentSize, StringRef sectionName,
65                      bool copyRefs, const Section *inSection) {
66    assert(sectionOffset+contentSize <= inSection->content.size());
67    ArrayRef<uint8_t> content = inSection->content.slice(sectionOffset,
68                                                        contentSize);
69   if (copyRefs) {
70      // Make a copy of the atom's name and content that is owned by this file.
71      name = name.copy(allocator());
72      content = content.copy(allocator());
73      sectionName = sectionName.copy(allocator());
74    }
75    DefinedAtom::Alignment align(
76        inSection->alignment,
77        sectionOffset % inSection->alignment);
78    auto *atom =
79        new (allocator()) MachODefinedCustomSectionAtom(*this, name, scope, type,
80                                                        merge, thumb,
81                                                        noDeadStrip, content,
82                                                        sectionName, align);
83    addAtomForSection(inSection, atom, sectionOffset);
84  }
85
86  void addZeroFillDefinedAtom(StringRef name, Atom::Scope scope,
87                              uint64_t sectionOffset, uint64_t size,
88                              bool noDeadStrip, bool copyRefs,
89                              const Section *inSection) {
90    if (copyRefs) {
91      // Make a copy of the atom's name and content that is owned by this file.
92      name = name.copy(allocator());
93    }
94    DefinedAtom::Alignment align(
95        inSection->alignment,
96        sectionOffset % inSection->alignment);
97
98    DefinedAtom::ContentType type = DefinedAtom::typeUnknown;
99    switch (inSection->type) {
100    case llvm::MachO::S_ZEROFILL:
101      type = DefinedAtom::typeZeroFill;
102      break;
103    case llvm::MachO::S_THREAD_LOCAL_ZEROFILL:
104      type = DefinedAtom::typeTLVInitialZeroFill;
105      break;
106    default:
107      llvm_unreachable("Unrecognized zero-fill section");
108    }
109
110    auto *atom =
111        new (allocator()) MachODefinedAtom(*this, name, scope, type, size,
112                                           noDeadStrip, align);
113    addAtomForSection(inSection, atom, sectionOffset);
114  }
115
116  void addUndefinedAtom(StringRef name, bool copyRefs) {
117    if (copyRefs) {
118      // Make a copy of the atom's name that is owned by this file.
119      name = name.copy(allocator());
120    }
121    auto *atom = new (allocator()) SimpleUndefinedAtom(*this, name);
122    addAtom(*atom);
123    _undefAtoms[name] = atom;
124  }
125
126  void addTentativeDefAtom(StringRef name, Atom::Scope scope, uint64_t size,
127                           DefinedAtom::Alignment align, bool copyRefs) {
128    if (copyRefs) {
129      // Make a copy of the atom's name that is owned by this file.
130      name = name.copy(allocator());
131    }
132    auto *atom =
133        new (allocator()) MachOTentativeDefAtom(*this, name, scope, size, align);
134    addAtom(*atom);
135    _undefAtoms[name] = atom;
136  }
137
138  /// Search this file for the atom from 'section' that covers
139  /// 'offsetInSect'.  Returns nullptr is no atom found.
140  MachODefinedAtom *findAtomCoveringAddress(const Section &section,
141                                            uint64_t offsetInSect,
142                                            uint32_t *foundOffsetAtom=nullptr) {
143    const auto &pos = _sectionAtoms.find(&section);
144    if (pos == _sectionAtoms.end())
145      return nullptr;
146    const auto &vec = pos->second;
147    assert(offsetInSect < section.content.size());
148    // Vector of atoms for section are already sorted, so do binary search.
149    const auto &atomPos = std::lower_bound(vec.begin(), vec.end(), offsetInSect,
150        [offsetInSect](const SectionOffsetAndAtom &ao,
151                       uint64_t targetAddr) -> bool {
152          // Each atom has a start offset of its slice of the
153          // section's content. This compare function must return true
154          // iff the atom's range is before the offset being searched for.
155          uint64_t atomsEndOffset = ao.offset+ao.atom->rawContent().size();
156          return (atomsEndOffset <= offsetInSect);
157        });
158    if (atomPos == vec.end())
159      return nullptr;
160    if (foundOffsetAtom)
161      *foundOffsetAtom = offsetInSect - atomPos->offset;
162    return atomPos->atom;
163  }
164
165  /// Searches this file for an UndefinedAtom named 'name'. Returns
166  /// nullptr is no such atom found.
167  const lld::Atom *findUndefAtom(StringRef name) {
168    auto pos = _undefAtoms.find(name);
169    if (pos == _undefAtoms.end())
170      return nullptr;
171    return pos->second;
172  }
173
174  typedef std::function<void (MachODefinedAtom* atom)> DefinedAtomVisitor;
175
176  void eachDefinedAtom(DefinedAtomVisitor vistor) {
177    for (auto &sectAndAtoms : _sectionAtoms) {
178      for (auto &offAndAtom : sectAndAtoms.second) {
179        vistor(offAndAtom.atom);
180      }
181    }
182  }
183
184  typedef std::function<void(MachODefinedAtom *atom, uint64_t offset)>
185      SectionAtomVisitor;
186
187  void eachAtomInSection(const Section &section, SectionAtomVisitor visitor) {
188    auto pos = _sectionAtoms.find(&section);
189    if (pos == _sectionAtoms.end())
190      return;
191    auto vec = pos->second;
192
193    for (auto &offAndAtom : vec)
194      visitor(offAndAtom.atom, offAndAtom.offset);
195  }
196
197  MachOLinkingContext::Arch arch() const { return _arch; }
198  void setArch(MachOLinkingContext::Arch arch) { _arch = arch; }
199
200  MachOLinkingContext::OS OS() const { return _os; }
201  void setOS(MachOLinkingContext::OS os) { _os = os; }
202
203  MachOLinkingContext::ObjCConstraint objcConstraint() const {
204    return _objcConstraint;
205  }
206  void setObjcConstraint(MachOLinkingContext::ObjCConstraint v) {
207    _objcConstraint = v;
208  }
209
210  uint32_t minVersion() const { return _minVersion; }
211  void setMinVersion(uint32_t v) { _minVersion = v; }
212
213  LoadCommandType minVersionLoadCommandKind() const {
214    return _minVersionLoadCommandKind;
215  }
216  void setMinVersionLoadCommandKind(LoadCommandType v) {
217    _minVersionLoadCommandKind = v;
218  }
219
220  uint32_t swiftVersion() const { return _swiftVersion; }
221  void setSwiftVersion(uint32_t v) { _swiftVersion = v; }
222
223  bool subsectionsViaSymbols() const {
224    return _flags & llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS;
225  }
226  void setFlags(normalized::FileFlags v) { _flags = v; }
227
228  /// Methods for support type inquiry through isa, cast, and dyn_cast:
229  static inline bool classof(const File *F) {
230    return F->kind() == File::kindMachObject;
231  }
232
233  void setDebugInfo(std::unique_ptr<DebugInfo> debugInfo) {
234    _debugInfo = std::move(debugInfo);
235  }
236
237  DebugInfo* debugInfo() const { return _debugInfo.get(); }
238  std::unique_ptr<DebugInfo> takeDebugInfo() { return std::move(_debugInfo); }
239
240protected:
241  std::error_code doParse() override {
242    // Convert binary file to normalized mach-o.
243    auto normFile = normalized::readBinary(_mb, _ctx->arch());
244    if (auto ec = normFile.takeError())
245      return llvm::errorToErrorCode(std::move(ec));
246    // Convert normalized mach-o to atoms.
247    if (auto ec = normalized::normalizedObjectToAtoms(this, **normFile, false))
248      return llvm::errorToErrorCode(std::move(ec));
249    return std::error_code();
250  }
251
252private:
253  struct SectionOffsetAndAtom { uint64_t offset;  MachODefinedAtom *atom; };
254
255  void addAtomForSection(const Section *inSection, MachODefinedAtom* atom,
256                         uint64_t sectionOffset) {
257    SectionOffsetAndAtom offAndAtom;
258    offAndAtom.offset = sectionOffset;
259    offAndAtom.atom   = atom;
260     _sectionAtoms[inSection].push_back(offAndAtom);
261    addAtom(*atom);
262  }
263
264  typedef llvm::DenseMap<const normalized::Section *,
265                         std::vector<SectionOffsetAndAtom>>  SectionToAtoms;
266  typedef llvm::StringMap<const lld::Atom *> NameToAtom;
267
268  std::unique_ptr<MemoryBuffer> _mb;
269  MachOLinkingContext          *_ctx;
270  SectionToAtoms                _sectionAtoms;
271  NameToAtom                     _undefAtoms;
272  MachOLinkingContext::Arch      _arch = MachOLinkingContext::arch_unknown;
273  MachOLinkingContext::OS        _os = MachOLinkingContext::OS::unknown;
274  uint32_t                       _minVersion = 0;
275  LoadCommandType               _minVersionLoadCommandKind = (LoadCommandType)0;
276  MachOLinkingContext::ObjCConstraint _objcConstraint =
277      MachOLinkingContext::objc_unknown;
278  uint32_t                       _swiftVersion = 0;
279  normalized::FileFlags        _flags = llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS;
280  std::unique_ptr<DebugInfo>   _debugInfo;
281};
282
283class MachODylibFile : public SharedLibraryFile {
284public:
285  MachODylibFile(std::unique_ptr<MemoryBuffer> mb, MachOLinkingContext *ctx)
286      : SharedLibraryFile(mb->getBufferIdentifier()),
287        _mb(std::move(mb)), _ctx(ctx) {}
288
289  MachODylibFile(StringRef path) : SharedLibraryFile(path) {}
290
291  OwningAtomPtr<SharedLibraryAtom> exports(StringRef name) const override {
292    // Pass down _installName so that if this requested symbol
293    // is re-exported through this dylib, the SharedLibraryAtom's loadName()
294    // is this dylib installName and not the implementation dylib's.
295    // NOTE: isData is not needed for dylibs (it matters for static libs).
296    return exports(name, _installName);
297  }
298
299  /// Adds symbol name that this dylib exports. The corresponding
300  /// SharedLibraryAtom is created lazily (since most symbols are not used).
301  void addExportedSymbol(StringRef name, bool weakDef, bool copyRefs) {
302    if (copyRefs) {
303      name = name.copy(allocator());
304    }
305    AtomAndFlags info(weakDef);
306    _nameToAtom[name] = info;
307  }
308
309  void addReExportedDylib(StringRef dylibPath) {
310    _reExportedDylibs.emplace_back(dylibPath);
311  }
312
313  StringRef installName() const { return _installName; }
314  uint32_t currentVersion() { return _currentVersion; }
315  uint32_t compatVersion() { return _compatVersion; }
316
317  void setInstallName(StringRef name) { _installName = name; }
318  void setCompatVersion(uint32_t version) { _compatVersion = version; }
319  void setCurrentVersion(uint32_t version) { _currentVersion = version; }
320
321  typedef std::function<MachODylibFile *(StringRef)> FindDylib;
322
323  void loadReExportedDylibs(FindDylib find) {
324    for (ReExportedDylib &entry : _reExportedDylibs) {
325      entry.file = find(entry.path);
326    }
327  }
328
329  StringRef getDSOName() const override { return _installName; }
330
331  std::error_code doParse() override {
332    // Convert binary file to normalized mach-o.
333    auto normFile = normalized::readBinary(_mb, _ctx->arch());
334    if (auto ec = normFile.takeError())
335      return llvm::errorToErrorCode(std::move(ec));
336    // Convert normalized mach-o to atoms.
337    if (auto ec = normalized::normalizedDylibToAtoms(this, **normFile, false))
338      return llvm::errorToErrorCode(std::move(ec));
339    return std::error_code();
340  }
341
342private:
343  OwningAtomPtr<SharedLibraryAtom> exports(StringRef name,
344                                   StringRef installName) const {
345    // First, check if requested symbol is directly implemented by this dylib.
346    auto entry = _nameToAtom.find(name);
347    if (entry != _nameToAtom.end()) {
348      // FIXME: Make this map a set and only used in assert builds.
349      // Note, its safe to assert here as the resolver is the only client of
350      // this API and it only requests exports for undefined symbols.
351      // If we return from here we are no longer undefined so we should never
352      // get here again.
353      assert(!entry->second.atom && "Duplicate shared library export");
354      bool weakDef = entry->second.weakDef;
355      auto *atom = new (allocator()) MachOSharedLibraryAtom(*this, name,
356                                                            installName,
357                                                            weakDef);
358      entry->second.atom = atom;
359      return atom;
360    }
361
362    // Next, check if symbol is implemented in some re-exported dylib.
363    for (const ReExportedDylib &dylib : _reExportedDylibs) {
364      assert(dylib.file);
365      auto atom = dylib.file->exports(name, installName);
366      if (atom.get())
367        return atom;
368    }
369
370    // Symbol not exported or re-exported by this dylib.
371    return nullptr;
372  }
373
374  struct ReExportedDylib {
375    ReExportedDylib(StringRef p) : path(p), file(nullptr) { }
376    StringRef       path;
377    MachODylibFile *file;
378  };
379
380  struct AtomAndFlags {
381    AtomAndFlags() : atom(nullptr), weakDef(false) { }
382    AtomAndFlags(bool weak) : atom(nullptr), weakDef(weak) { }
383    const SharedLibraryAtom  *atom;
384    bool                      weakDef;
385  };
386
387  std::unique_ptr<MemoryBuffer>              _mb;
388  MachOLinkingContext                       *_ctx;
389  StringRef                                  _installName;
390  uint32_t                                   _currentVersion;
391  uint32_t                                   _compatVersion;
392  std::vector<ReExportedDylib>               _reExportedDylibs;
393  mutable std::unordered_map<StringRef, AtomAndFlags> _nameToAtom;
394};
395
396} // end namespace mach_o
397} // end namespace lld
398
399#endif // LLD_READER_WRITER_MACHO_FILE_H
400