1235537Sgber//===- MachO.h - MachO object file implementation ---------------*- C++ -*-===//
2235537Sgber//
3235537Sgber// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4235537Sgber// See https://llvm.org/LICENSE.txt for license information.
5235537Sgber// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6235537Sgber//
7235537Sgber//===----------------------------------------------------------------------===//
8235537Sgber//
9235537Sgber// This file declares the MachOObjectFile class, which implement the ObjectFile
10235537Sgber// interface for MachO files.
11235537Sgber//
12235537Sgber//===----------------------------------------------------------------------===//
13235537Sgber
14235537Sgber#ifndef LLVM_OBJECT_MACHO_H
15235537Sgber#define LLVM_OBJECT_MACHO_H
16235537Sgber
17235537Sgber#include "llvm/ADT/ArrayRef.h"
18235537Sgber#include "llvm/ADT/SmallString.h"
19235537Sgber#include "llvm/ADT/SmallVector.h"
20235537Sgber#include "llvm/ADT/StringExtras.h"
21235537Sgber#include "llvm/ADT/StringRef.h"
22235537Sgber#include "llvm/ADT/iterator_range.h"
23235537Sgber#include "llvm/BinaryFormat/MachO.h"
24235537Sgber#include "llvm/BinaryFormat/Swift.h"
25235537Sgber#include "llvm/Object/Binary.h"
26235537Sgber#include "llvm/Object/ObjectFile.h"
27235537Sgber#include "llvm/Object/SymbolicFile.h"
28235537Sgber#include "llvm/Support/Error.h"
29235537Sgber#include "llvm/Support/Format.h"
30235537Sgber#include "llvm/Support/MemoryBuffer.h"
31235537Sgber#include "llvm/Support/raw_ostream.h"
32235537Sgber#include "llvm/TargetParser/SubtargetFeature.h"
33235537Sgber#include "llvm/TargetParser/Triple.h"
34235537Sgber#include <cstdint>
35235537Sgber#include <memory>
36235537Sgber#include <string>
37235537Sgber#include <system_error>
38235537Sgber
39235537Sgbernamespace llvm {
40235537Sgbernamespace object {
41235537Sgber
42235537Sgber/// DiceRef - This is a value type class that represents a single
43235537Sgber/// data in code entry in the table in a Mach-O object file.
44235537Sgberclass DiceRef {
45235537Sgber  DataRefImpl DicePimpl;
46235537Sgber  const ObjectFile *OwningObject = nullptr;
47235537Sgber
48235537Sgberpublic:
49235537Sgber  DiceRef() = default;
50235537Sgber  DiceRef(DataRefImpl DiceP, const ObjectFile *Owner);
51235537Sgber
52235537Sgber  bool operator==(const DiceRef &Other) const;
53235537Sgber  bool operator<(const DiceRef &Other) const;
54235537Sgber
55235537Sgber  void moveNext();
56235537Sgber
57235537Sgber  std::error_code getOffset(uint32_t &Result) const;
58235537Sgber  std::error_code getLength(uint16_t &Result) const;
59235537Sgber  std::error_code getKind(uint16_t &Result) const;
60235537Sgber
61235537Sgber  DataRefImpl getRawDataRefImpl() const;
62235537Sgber  const ObjectFile *getObjectFile() const;
63235537Sgber};
64235537Sgberusing dice_iterator = content_iterator<DiceRef>;
65235537Sgber
66235537Sgber/// ExportEntry encapsulates the current-state-of-the-walk used when doing a
67235537Sgber/// non-recursive walk of the trie data structure.  This allows you to iterate
68235537Sgber/// across all exported symbols using:
69235537Sgber///      Error Err = Error::success();
70235537Sgber///      for (const llvm::object::ExportEntry &AnExport : Obj->exports(&Err)) {
71235537Sgber///      }
72235537Sgber///      if (Err) { report error ...
73235537Sgberclass ExportEntry {
74235537Sgberpublic:
75235537Sgber  ExportEntry(Error *Err, const MachOObjectFile *O, ArrayRef<uint8_t> Trie);
76235537Sgber
77235537Sgber  StringRef name() const;
78235537Sgber  uint64_t flags() const;
79235537Sgber  uint64_t address() const;
80235537Sgber  uint64_t other() const;
81235537Sgber  StringRef otherName() const;
82235537Sgber  uint32_t nodeOffset() const;
83235537Sgber
84235537Sgber  bool operator==(const ExportEntry &) const;
85235537Sgber
86235537Sgber  void moveNext();
87235537Sgber
88235537Sgberprivate:
89235537Sgber  friend class MachOObjectFile;
90235537Sgber
91235537Sgber  void moveToFirst();
92235537Sgber  void moveToEnd();
93235537Sgber  uint64_t readULEB128(const uint8_t *&p, const char **error);
94235537Sgber  void pushDownUntilBottom();
95235537Sgber  void pushNode(uint64_t Offset);
96235537Sgber
97235537Sgber  // Represents a node in the mach-o exports trie.
98235537Sgber  struct NodeState {
99235537Sgber    NodeState(const uint8_t *Ptr);
100235537Sgber
101235537Sgber    const uint8_t *Start;
102235537Sgber    const uint8_t *Current;
103235537Sgber    uint64_t Flags = 0;
104235537Sgber    uint64_t Address = 0;
105235537Sgber    uint64_t Other = 0;
106235537Sgber    const char *ImportName = nullptr;
107235537Sgber    unsigned ChildCount = 0;
108235537Sgber    unsigned NextChildIndex = 0;
109235537Sgber    unsigned ParentStringLength = 0;
110235537Sgber    bool IsExportNode = false;
111235537Sgber  };
112235537Sgber  using NodeList = SmallVector<NodeState, 16>;
113235537Sgber  using node_iterator = NodeList::const_iterator;
114235537Sgber
115235537Sgber  Error *E;
116235537Sgber  const MachOObjectFile *O;
117235537Sgber  ArrayRef<uint8_t> Trie;
118235537Sgber  SmallString<256> CumulativeString;
119235537Sgber  NodeList Stack;
120235537Sgber  bool Done = false;
121235537Sgber
122235537Sgber  iterator_range<node_iterator> nodes() const {
123235537Sgber    return make_range(Stack.begin(), Stack.end());
124235537Sgber  }
125235537Sgber};
126235537Sgberusing export_iterator = content_iterator<ExportEntry>;
127235537Sgber
128235537Sgber// Segment info so SegIndex/SegOffset pairs in a Mach-O Bind or Rebase entry
129235537Sgber// can be checked and translated.  Only the SegIndex/SegOffset pairs from
130235537Sgber// checked entries are to be used with the segmentName(), sectionName() and
131235537Sgber// address() methods below.
132235537Sgberclass BindRebaseSegInfo {
133235537Sgberpublic:
134235537Sgber  BindRebaseSegInfo(const MachOObjectFile *Obj);
135235537Sgber
136235537Sgber  // Used to check a Mach-O Bind or Rebase entry for errors when iterating.
137235537Sgber  const char* checkSegAndOffsets(int32_t SegIndex, uint64_t SegOffset,
138235537Sgber                                 uint8_t PointerSize, uint32_t Count=1,
139235537Sgber                                 uint32_t Skip=0);
140235537Sgber  // Used with valid SegIndex/SegOffset values from checked entries.
141235537Sgber  StringRef segmentName(int32_t SegIndex);
142235537Sgber  StringRef sectionName(int32_t SegIndex, uint64_t SegOffset);
143235537Sgber  uint64_t address(uint32_t SegIndex, uint64_t SegOffset);
144235537Sgber
145235537Sgberprivate:
146235537Sgber  struct SectionInfo {
147235537Sgber    uint64_t Address;
148235537Sgber    uint64_t Size;
149235537Sgber    StringRef SectionName;
150235537Sgber    StringRef SegmentName;
151235537Sgber    uint64_t OffsetInSegment;
152235537Sgber    uint64_t SegmentStartAddress;
153235537Sgber    int32_t SegmentIndex;
154235537Sgber  };
155235537Sgber  const SectionInfo &findSection(int32_t SegIndex, uint64_t SegOffset);
156235537Sgber
157235537Sgber  SmallVector<SectionInfo, 32> Sections;
158235537Sgber  int32_t MaxSegIndex;
159235537Sgber};
160235537Sgber
161235537Sgber/// MachORebaseEntry encapsulates the current state in the decompression of
162235537Sgber/// rebasing opcodes. This allows you to iterate through the compressed table of
163235537Sgber/// rebasing using:
164235537Sgber///    Error Err = Error::success();
165235537Sgber///    for (const llvm::object::MachORebaseEntry &Entry : Obj->rebaseTable(&Err)) {
166235537Sgber///    }
167235537Sgber///    if (Err) { report error ...
168235537Sgberclass MachORebaseEntry {
169235537Sgberpublic:
170235537Sgber  MachORebaseEntry(Error *Err, const MachOObjectFile *O,
171235537Sgber                   ArrayRef<uint8_t> opcodes, bool is64Bit);
172235537Sgber
173235537Sgber  int32_t segmentIndex() const;
174235537Sgber  uint64_t segmentOffset() const;
175235537Sgber  StringRef typeName() const;
176235537Sgber  StringRef segmentName() const;
177235537Sgber  StringRef sectionName() const;
178235537Sgber  uint64_t address() const;
179235537Sgber
180235537Sgber  bool operator==(const MachORebaseEntry &) const;
181235537Sgber
182235537Sgber  void moveNext();
183235537Sgber
184235537Sgberprivate:
185235537Sgber  friend class MachOObjectFile;
186235537Sgber
187235537Sgber  void moveToFirst();
188235537Sgber  void moveToEnd();
189235537Sgber  uint64_t readULEB128(const char **error);
190235537Sgber
191235537Sgber  Error *E;
192235537Sgber  const MachOObjectFile *O;
193235537Sgber  ArrayRef<uint8_t> Opcodes;
194235537Sgber  const uint8_t *Ptr;
195235537Sgber  uint64_t SegmentOffset = 0;
196235537Sgber  int32_t SegmentIndex = -1;
197235537Sgber  uint64_t RemainingLoopCount = 0;
198235537Sgber  uint64_t AdvanceAmount = 0;
199235537Sgber  uint8_t  RebaseType = 0;
200235537Sgber  uint8_t  PointerSize;
201235537Sgber  bool     Done = false;
202235537Sgber};
203235537Sgberusing rebase_iterator = content_iterator<MachORebaseEntry>;
204235537Sgber
205235537Sgber/// MachOBindEntry encapsulates the current state in the decompression of
206235537Sgber/// binding opcodes. This allows you to iterate through the compressed table of
207235537Sgber/// bindings using:
208235537Sgber///    Error Err = Error::success();
209235537Sgber///    for (const llvm::object::MachOBindEntry &Entry : Obj->bindTable(&Err)) {
210235537Sgber///    }
211235537Sgber///    if (Err) { report error ...
212235537Sgberclass MachOBindEntry {
213235537Sgberpublic:
214235537Sgber  enum class Kind { Regular, Lazy, Weak };
215235537Sgber
216235537Sgber  MachOBindEntry(Error *Err, const MachOObjectFile *O,
217235537Sgber                 ArrayRef<uint8_t> Opcodes, bool is64Bit, MachOBindEntry::Kind);
218235537Sgber
219235537Sgber  int32_t segmentIndex() const;
220235537Sgber  uint64_t segmentOffset() const;
221235537Sgber  StringRef typeName() const;
222235537Sgber  StringRef symbolName() const;
223235537Sgber  uint32_t flags() const;
224235537Sgber  int64_t addend() const;
225235537Sgber  int ordinal() const;
226235537Sgber
227235537Sgber  StringRef segmentName() const;
228235537Sgber  StringRef sectionName() const;
229235537Sgber  uint64_t address() const;
230235537Sgber
231235537Sgber  bool operator==(const MachOBindEntry &) const;
232235537Sgber
233235537Sgber  void moveNext();
234235537Sgber
235235537Sgberprivate:
236235537Sgber  friend class MachOObjectFile;
237235537Sgber
238235537Sgber  void moveToFirst();
239235537Sgber  void moveToEnd();
240235537Sgber  uint64_t readULEB128(const char **error);
241235537Sgber  int64_t readSLEB128(const char **error);
242235537Sgber
243235537Sgber  Error *E;
244235537Sgber  const MachOObjectFile *O;
245235537Sgber  ArrayRef<uint8_t> Opcodes;
246235537Sgber  const uint8_t *Ptr;
247235537Sgber  uint64_t SegmentOffset = 0;
248235537Sgber  int32_t  SegmentIndex = -1;
249235537Sgber  StringRef SymbolName;
250235537Sgber  bool     LibraryOrdinalSet = false;
251235537Sgber  int      Ordinal = 0;
252235537Sgber  uint32_t Flags = 0;
253235537Sgber  int64_t  Addend = 0;
254235537Sgber  uint64_t RemainingLoopCount = 0;
255235537Sgber  uint64_t AdvanceAmount = 0;
256235537Sgber  uint8_t  BindType = 0;
257235537Sgber  uint8_t  PointerSize;
258235537Sgber  Kind     TableKind;
259235537Sgber  bool     Done = false;
260235537Sgber};
261235537Sgberusing bind_iterator = content_iterator<MachOBindEntry>;
262235537Sgber
263235537Sgber/// ChainedFixupTarget holds all the information about an external symbol
264235537Sgber/// necessary to bind this binary to that symbol. These values are referenced
265235537Sgber/// indirectly by chained fixup binds. This structure captures values from all
266235537Sgber/// import and symbol formats.
267235537Sgber///
268235537Sgber/// Be aware there are two notions of weak here:
269235537Sgber///   WeakImport == true
270235537Sgber///     The associated bind may be set to 0 if this symbol is missing from its
271235537Sgber///     parent library. This is called a "weak import."
272235537Sgber///   LibOrdinal == BIND_SPECIAL_DYLIB_WEAK_LOOKUP
273235537Sgber///     This symbol may be coalesced with other libraries vending the same
274235537Sgber///     symbol. E.g., C++'s "operator new". This is called a "weak bind."
275235537Sgberstruct ChainedFixupTarget {
276235537Sgberpublic:
277235537Sgber  ChainedFixupTarget(int LibOrdinal, uint32_t NameOffset, StringRef Symbol,
278235537Sgber                     uint64_t Addend, bool WeakImport)
279235537Sgber      : LibOrdinal(LibOrdinal), NameOffset(NameOffset), SymbolName(Symbol),
280235537Sgber        Addend(Addend), WeakImport(WeakImport) {}
281235537Sgber
282235537Sgber  int libOrdinal() { return LibOrdinal; }
283235537Sgber  uint32_t nameOffset() { return NameOffset; }
284235537Sgber  StringRef symbolName() { return SymbolName; }
285235537Sgber  uint64_t addend() { return Addend; }
286235537Sgber  bool weakImport() { return WeakImport; }
287235537Sgber  bool weakBind() {
288235537Sgber    return LibOrdinal == MachO::BIND_SPECIAL_DYLIB_WEAK_LOOKUP;
289235537Sgber  }
290235537Sgber
291235537Sgberprivate:
292235537Sgber  int LibOrdinal;
293235537Sgber  uint32_t NameOffset;
294235537Sgber  StringRef SymbolName;
295235537Sgber  uint64_t Addend;
296235537Sgber  bool WeakImport;
297235537Sgber};
298235537Sgber
299235537Sgberstruct ChainedFixupsSegment {
300235537Sgber  ChainedFixupsSegment(uint8_t SegIdx, uint32_t Offset,
301235537Sgber                       const MachO::dyld_chained_starts_in_segment &Header,
302235537Sgber                       std::vector<uint16_t> &&PageStarts)
303235537Sgber      : SegIdx(SegIdx), Offset(Offset), Header(Header),
304235537Sgber        PageStarts(PageStarts){};
305235537Sgber
306235537Sgber  uint32_t SegIdx;
307235537Sgber  uint32_t Offset; // dyld_chained_starts_in_image::seg_info_offset[SegIdx]
308235537Sgber  MachO::dyld_chained_starts_in_segment Header;
309235537Sgber  std::vector<uint16_t> PageStarts; // page_start[] entries, host endianness
310235537Sgber};
311
312/// MachOAbstractFixupEntry is an abstract class representing a fixup in a
313/// MH_DYLDLINK file. Fixups generally represent rebases and binds. Binds also
314/// subdivide into additional subtypes (weak, lazy, reexport).
315///
316/// The two concrete subclasses of MachOAbstractFixupEntry are:
317///
318///   MachORebaseBindEntry   - for dyld opcode-based tables, including threaded-
319///                            rebase, where rebases are mixed in with other
320///                            bind opcodes.
321///   MachOChainedFixupEntry - for pointer chains embedded in data pages.
322class MachOAbstractFixupEntry {
323public:
324  MachOAbstractFixupEntry(Error *Err, const MachOObjectFile *O);
325
326  int32_t segmentIndex() const;
327  uint64_t segmentOffset() const;
328  uint64_t segmentAddress() const;
329  StringRef segmentName() const;
330  StringRef sectionName() const;
331  StringRef typeName() const;
332  StringRef symbolName() const;
333  uint32_t flags() const;
334  int64_t addend() const;
335  int ordinal() const;
336
337  /// \return the location of this fixup as a VM Address. For the VM
338  /// Address this fixup is pointing to, use pointerValue().
339  uint64_t address() const;
340
341  /// \return the VM Address pointed to by this fixup. Use
342  /// pointerValue() to compare against other VM Addresses, such as
343  /// section addresses or segment vmaddrs.
344  uint64_t pointerValue() const { return PointerValue; }
345
346  /// \return the raw "on-disk" representation of the fixup. For
347  /// Threaded rebases and Chained pointers these values are generally
348  /// encoded into various different pointer formats. This value is
349  /// exposed in API for tools that want to display and annotate the
350  /// raw bits.
351  uint64_t rawValue() const { return RawValue; }
352
353  void moveNext();
354
355protected:
356  Error *E;
357  const MachOObjectFile *O;
358  uint64_t SegmentOffset = 0;
359  int32_t SegmentIndex = -1;
360  StringRef SymbolName;
361  int32_t Ordinal = 0;
362  uint32_t Flags = 0;
363  int64_t Addend = 0;
364  uint64_t PointerValue = 0;
365  uint64_t RawValue = 0;
366  bool Done = false;
367
368  void moveToFirst();
369  void moveToEnd();
370
371  /// \return the vm address of the start of __TEXT segment.
372  uint64_t textAddress() const { return TextAddress; }
373
374private:
375  uint64_t TextAddress;
376};
377
378class MachOChainedFixupEntry : public MachOAbstractFixupEntry {
379public:
380  enum class FixupKind { Bind, Rebase };
381
382  MachOChainedFixupEntry(Error *Err, const MachOObjectFile *O, bool Parse);
383
384  bool operator==(const MachOChainedFixupEntry &) const;
385
386  bool isBind() const { return Kind == FixupKind::Bind; }
387  bool isRebase() const { return Kind == FixupKind::Rebase; }
388
389  void moveNext();
390  void moveToFirst();
391  void moveToEnd();
392
393private:
394  void findNextPageWithFixups();
395
396  std::vector<ChainedFixupTarget> FixupTargets;
397  std::vector<ChainedFixupsSegment> Segments;
398  ArrayRef<uint8_t> SegmentData;
399  FixupKind Kind;
400  uint32_t InfoSegIndex = 0; // Index into Segments
401  uint32_t PageIndex = 0;    // Index into Segments[InfoSegIdx].PageStarts
402  uint32_t PageOffset = 0;   // Page offset of the current fixup
403};
404using fixup_iterator = content_iterator<MachOChainedFixupEntry>;
405
406class MachOObjectFile : public ObjectFile {
407public:
408  struct LoadCommandInfo {
409    const char *Ptr;      // Where in memory the load command is.
410    MachO::load_command C; // The command itself.
411  };
412  using LoadCommandList = SmallVector<LoadCommandInfo, 4>;
413  using load_command_iterator = LoadCommandList::const_iterator;
414
415  static Expected<std::unique_ptr<MachOObjectFile>>
416  create(MemoryBufferRef Object, bool IsLittleEndian, bool Is64Bits,
417         uint32_t UniversalCputype = 0, uint32_t UniversalIndex = 0,
418         size_t MachOFilesetEntryOffset = 0);
419
420  static bool isMachOPairedReloc(uint64_t RelocType, uint64_t Arch);
421
422  void moveSymbolNext(DataRefImpl &Symb) const override;
423
424  uint64_t getNValue(DataRefImpl Sym) const;
425  Expected<StringRef> getSymbolName(DataRefImpl Symb) const override;
426
427  // MachO specific.
428  Error checkSymbolTable() const;
429
430  std::error_code getIndirectName(DataRefImpl Symb, StringRef &Res) const;
431  unsigned getSectionType(SectionRef Sec) const;
432
433  Expected<uint64_t> getSymbolAddress(DataRefImpl Symb) const override;
434  uint32_t getSymbolAlignment(DataRefImpl Symb) const override;
435  uint64_t getCommonSymbolSizeImpl(DataRefImpl Symb) const override;
436  Expected<SymbolRef::Type> getSymbolType(DataRefImpl Symb) const override;
437  Expected<uint32_t> getSymbolFlags(DataRefImpl Symb) const override;
438  Expected<section_iterator> getSymbolSection(DataRefImpl Symb) const override;
439  unsigned getSymbolSectionID(SymbolRef Symb) const;
440  unsigned getSectionID(SectionRef Sec) const;
441
442  void moveSectionNext(DataRefImpl &Sec) const override;
443  Expected<StringRef> getSectionName(DataRefImpl Sec) const override;
444  uint64_t getSectionAddress(DataRefImpl Sec) const override;
445  uint64_t getSectionIndex(DataRefImpl Sec) const override;
446  uint64_t getSectionSize(DataRefImpl Sec) const override;
447  ArrayRef<uint8_t> getSectionContents(uint32_t Offset, uint64_t Size) const;
448  Expected<ArrayRef<uint8_t>>
449  getSectionContents(DataRefImpl Sec) const override;
450  uint64_t getSectionAlignment(DataRefImpl Sec) const override;
451  Expected<SectionRef> getSection(unsigned SectionIndex) const;
452  Expected<SectionRef> getSection(StringRef SectionName) const;
453  bool isSectionCompressed(DataRefImpl Sec) const override;
454  bool isSectionText(DataRefImpl Sec) const override;
455  bool isSectionData(DataRefImpl Sec) const override;
456  bool isSectionBSS(DataRefImpl Sec) const override;
457  bool isSectionVirtual(DataRefImpl Sec) const override;
458  bool isSectionBitcode(DataRefImpl Sec) const override;
459  bool isDebugSection(DataRefImpl Sec) const override;
460
461  /// Return the raw contents of an entire segment.
462  ArrayRef<uint8_t> getSegmentContents(StringRef SegmentName) const;
463  ArrayRef<uint8_t> getSegmentContents(size_t SegmentIndex) const;
464
465  /// When dsymutil generates the companion file, it strips all unnecessary
466  /// sections (e.g. everything in the _TEXT segment) by omitting their body
467  /// and setting the offset in their corresponding load command to zero.
468  ///
469  /// While the load command itself is valid, reading the section corresponds
470  /// to reading the number of bytes specified in the load command, starting
471  /// from offset 0 (i.e. the Mach-O header at the beginning of the file).
472  bool isSectionStripped(DataRefImpl Sec) const override;
473
474  relocation_iterator section_rel_begin(DataRefImpl Sec) const override;
475  relocation_iterator section_rel_end(DataRefImpl Sec) const override;
476
477  relocation_iterator extrel_begin() const;
478  relocation_iterator extrel_end() const;
479  iterator_range<relocation_iterator> external_relocations() const {
480    return make_range(extrel_begin(), extrel_end());
481  }
482
483  relocation_iterator locrel_begin() const;
484  relocation_iterator locrel_end() const;
485
486  void moveRelocationNext(DataRefImpl &Rel) const override;
487  uint64_t getRelocationOffset(DataRefImpl Rel) const override;
488  symbol_iterator getRelocationSymbol(DataRefImpl Rel) const override;
489  section_iterator getRelocationSection(DataRefImpl Rel) const;
490  uint64_t getRelocationType(DataRefImpl Rel) const override;
491  void getRelocationTypeName(DataRefImpl Rel,
492                             SmallVectorImpl<char> &Result) const override;
493  uint8_t getRelocationLength(DataRefImpl Rel) const;
494
495  // MachO specific.
496  std::error_code getLibraryShortNameByIndex(unsigned Index, StringRef &) const;
497  uint32_t getLibraryCount() const;
498
499  section_iterator getRelocationRelocatedSection(relocation_iterator Rel) const;
500
501  // TODO: Would be useful to have an iterator based version
502  // of the load command interface too.
503
504  basic_symbol_iterator symbol_begin() const override;
505  basic_symbol_iterator symbol_end() const override;
506
507  bool is64Bit() const override;
508
509  // MachO specific.
510  symbol_iterator getSymbolByIndex(unsigned Index) const;
511  uint64_t getSymbolIndex(DataRefImpl Symb) const;
512
513  section_iterator section_begin() const override;
514  section_iterator section_end() const override;
515
516  uint8_t getBytesInAddress() const override;
517
518  StringRef getFileFormatName() const override;
519  Triple::ArchType getArch() const override;
520  Expected<SubtargetFeatures> getFeatures() const override {
521    return SubtargetFeatures();
522  }
523  Triple getArchTriple(const char **McpuDefault = nullptr) const;
524
525  relocation_iterator section_rel_begin(unsigned Index) const;
526  relocation_iterator section_rel_end(unsigned Index) const;
527
528  dice_iterator begin_dices() const;
529  dice_iterator end_dices() const;
530
531  load_command_iterator begin_load_commands() const;
532  load_command_iterator end_load_commands() const;
533  iterator_range<load_command_iterator> load_commands() const;
534
535  /// For use iterating over all exported symbols.
536  iterator_range<export_iterator> exports(Error &Err) const;
537
538  /// For use examining a trie not in a MachOObjectFile.
539  static iterator_range<export_iterator> exports(Error &Err,
540                                                 ArrayRef<uint8_t> Trie,
541                                                 const MachOObjectFile *O =
542                                                                      nullptr);
543
544  /// For use iterating over all rebase table entries.
545  iterator_range<rebase_iterator> rebaseTable(Error &Err);
546
547  /// For use examining rebase opcodes in a MachOObjectFile.
548  static iterator_range<rebase_iterator> rebaseTable(Error &Err,
549                                                     MachOObjectFile *O,
550                                                     ArrayRef<uint8_t> Opcodes,
551                                                     bool is64);
552
553  /// For use iterating over all bind table entries.
554  iterator_range<bind_iterator> bindTable(Error &Err);
555
556  /// For iterating over all chained fixups.
557  iterator_range<fixup_iterator> fixupTable(Error &Err);
558
559  /// For use iterating over all lazy bind table entries.
560  iterator_range<bind_iterator> lazyBindTable(Error &Err);
561
562  /// For use iterating over all weak bind table entries.
563  iterator_range<bind_iterator> weakBindTable(Error &Err);
564
565  /// For use examining bind opcodes in a MachOObjectFile.
566  static iterator_range<bind_iterator> bindTable(Error &Err,
567                                                 MachOObjectFile *O,
568                                                 ArrayRef<uint8_t> Opcodes,
569                                                 bool is64,
570                                                 MachOBindEntry::Kind);
571
572  // Given a SegIndex, SegOffset, and PointerSize, verify a valid section exists
573  // that fully contains a pointer at that location. Multiple fixups in a bind
574  // (such as with the BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB opcode) can
575  // be tested via the Count and Skip parameters.
576  //
577  // This is used by MachOBindEntry::moveNext() to validate a MachOBindEntry.
578  const char *BindEntryCheckSegAndOffsets(int32_t SegIndex, uint64_t SegOffset,
579                                         uint8_t PointerSize, uint32_t Count=1,
580                                          uint32_t Skip=0) const {
581    return BindRebaseSectionTable->checkSegAndOffsets(SegIndex, SegOffset,
582                                                     PointerSize, Count, Skip);
583  }
584
585  // Given a SegIndex, SegOffset, and PointerSize, verify a valid section exists
586  // that fully contains a pointer at that location. Multiple fixups in a rebase
587  // (such as with the REBASE_OPCODE_DO_*_TIMES* opcodes) can be tested via the
588  // Count and Skip parameters.
589  //
590  // This is used by MachORebaseEntry::moveNext() to validate a MachORebaseEntry
591  const char *RebaseEntryCheckSegAndOffsets(int32_t SegIndex,
592                                            uint64_t SegOffset,
593                                            uint8_t PointerSize,
594                                            uint32_t Count=1,
595                                            uint32_t Skip=0) const {
596    return BindRebaseSectionTable->checkSegAndOffsets(SegIndex, SegOffset,
597                                                      PointerSize, Count, Skip);
598  }
599
600  /// For use with the SegIndex of a checked Mach-O Bind or Rebase entry to
601  /// get the segment name.
602  StringRef BindRebaseSegmentName(int32_t SegIndex) const {
603    return BindRebaseSectionTable->segmentName(SegIndex);
604  }
605
606  /// For use with a SegIndex,SegOffset pair from a checked Mach-O Bind or
607  /// Rebase entry to get the section name.
608  StringRef BindRebaseSectionName(uint32_t SegIndex, uint64_t SegOffset) const {
609    return BindRebaseSectionTable->sectionName(SegIndex, SegOffset);
610  }
611
612  /// For use with a SegIndex,SegOffset pair from a checked Mach-O Bind or
613  /// Rebase entry to get the address.
614  uint64_t BindRebaseAddress(uint32_t SegIndex, uint64_t SegOffset) const {
615    return BindRebaseSectionTable->address(SegIndex, SegOffset);
616  }
617
618  // In a MachO file, sections have a segment name. This is used in the .o
619  // files. They have a single segment, but this field specifies which segment
620  // a section should be put in the final object.
621  StringRef getSectionFinalSegmentName(DataRefImpl Sec) const;
622
623  // Names are stored as 16 bytes. These returns the raw 16 bytes without
624  // interpreting them as a C string.
625  ArrayRef<char> getSectionRawName(DataRefImpl Sec) const;
626  ArrayRef<char> getSectionRawFinalSegmentName(DataRefImpl Sec) const;
627
628  // MachO specific Info about relocations.
629  bool isRelocationScattered(const MachO::any_relocation_info &RE) const;
630  unsigned getPlainRelocationSymbolNum(
631                                    const MachO::any_relocation_info &RE) const;
632  bool getPlainRelocationExternal(const MachO::any_relocation_info &RE) const;
633  bool getScatteredRelocationScattered(
634                                    const MachO::any_relocation_info &RE) const;
635  uint32_t getScatteredRelocationValue(
636                                    const MachO::any_relocation_info &RE) const;
637  uint32_t getScatteredRelocationType(
638                                    const MachO::any_relocation_info &RE) const;
639  unsigned getAnyRelocationAddress(const MachO::any_relocation_info &RE) const;
640  unsigned getAnyRelocationPCRel(const MachO::any_relocation_info &RE) const;
641  unsigned getAnyRelocationLength(const MachO::any_relocation_info &RE) const;
642  unsigned getAnyRelocationType(const MachO::any_relocation_info &RE) const;
643  SectionRef getAnyRelocationSection(const MachO::any_relocation_info &RE) const;
644
645  // MachO specific structures.
646  MachO::section getSection(DataRefImpl DRI) const;
647  MachO::section_64 getSection64(DataRefImpl DRI) const;
648  MachO::section getSection(const LoadCommandInfo &L, unsigned Index) const;
649  MachO::section_64 getSection64(const LoadCommandInfo &L,unsigned Index) const;
650  MachO::nlist getSymbolTableEntry(DataRefImpl DRI) const;
651  MachO::nlist_64 getSymbol64TableEntry(DataRefImpl DRI) const;
652
653  MachO::linkedit_data_command
654  getLinkeditDataLoadCommand(const LoadCommandInfo &L) const;
655  MachO::segment_command
656  getSegmentLoadCommand(const LoadCommandInfo &L) const;
657  MachO::segment_command_64
658  getSegment64LoadCommand(const LoadCommandInfo &L) const;
659  MachO::linker_option_command
660  getLinkerOptionLoadCommand(const LoadCommandInfo &L) const;
661  MachO::version_min_command
662  getVersionMinLoadCommand(const LoadCommandInfo &L) const;
663  MachO::note_command
664  getNoteLoadCommand(const LoadCommandInfo &L) const;
665  MachO::build_version_command
666  getBuildVersionLoadCommand(const LoadCommandInfo &L) const;
667  MachO::build_tool_version
668  getBuildToolVersion(unsigned index) const;
669  MachO::dylib_command
670  getDylibIDLoadCommand(const LoadCommandInfo &L) const;
671  MachO::dyld_info_command
672  getDyldInfoLoadCommand(const LoadCommandInfo &L) const;
673  MachO::dylinker_command
674  getDylinkerCommand(const LoadCommandInfo &L) const;
675  MachO::uuid_command
676  getUuidCommand(const LoadCommandInfo &L) const;
677  MachO::rpath_command
678  getRpathCommand(const LoadCommandInfo &L) const;
679  MachO::source_version_command
680  getSourceVersionCommand(const LoadCommandInfo &L) const;
681  MachO::entry_point_command
682  getEntryPointCommand(const LoadCommandInfo &L) const;
683  MachO::encryption_info_command
684  getEncryptionInfoCommand(const LoadCommandInfo &L) const;
685  MachO::encryption_info_command_64
686  getEncryptionInfoCommand64(const LoadCommandInfo &L) const;
687  MachO::sub_framework_command
688  getSubFrameworkCommand(const LoadCommandInfo &L) const;
689  MachO::sub_umbrella_command
690  getSubUmbrellaCommand(const LoadCommandInfo &L) const;
691  MachO::sub_library_command
692  getSubLibraryCommand(const LoadCommandInfo &L) const;
693  MachO::sub_client_command
694  getSubClientCommand(const LoadCommandInfo &L) const;
695  MachO::routines_command
696  getRoutinesCommand(const LoadCommandInfo &L) const;
697  MachO::routines_command_64
698  getRoutinesCommand64(const LoadCommandInfo &L) const;
699  MachO::thread_command
700  getThreadCommand(const LoadCommandInfo &L) const;
701  MachO::fileset_entry_command
702  getFilesetEntryLoadCommand(const LoadCommandInfo &L) const;
703
704  MachO::any_relocation_info getRelocation(DataRefImpl Rel) const;
705  MachO::data_in_code_entry getDice(DataRefImpl Rel) const;
706  const MachO::mach_header &getHeader() const;
707  const MachO::mach_header_64 &getHeader64() const;
708  uint32_t
709  getIndirectSymbolTableEntry(const MachO::dysymtab_command &DLC,
710                              unsigned Index) const;
711  MachO::data_in_code_entry getDataInCodeTableEntry(uint32_t DataOffset,
712                                                    unsigned Index) const;
713  MachO::symtab_command getSymtabLoadCommand() const;
714  MachO::dysymtab_command getDysymtabLoadCommand() const;
715  MachO::linkedit_data_command getDataInCodeLoadCommand() const;
716  MachO::linkedit_data_command getLinkOptHintsLoadCommand() const;
717  ArrayRef<uint8_t> getDyldInfoRebaseOpcodes() const;
718  ArrayRef<uint8_t> getDyldInfoBindOpcodes() const;
719  ArrayRef<uint8_t> getDyldInfoWeakBindOpcodes() const;
720  ArrayRef<uint8_t> getDyldInfoLazyBindOpcodes() const;
721  ArrayRef<uint8_t> getDyldInfoExportsTrie() const;
722
723  /// If the optional is std::nullopt, no header was found, but the object was
724  /// well-formed.
725  Expected<std::optional<MachO::dyld_chained_fixups_header>>
726  getChainedFixupsHeader() const;
727  Expected<std::vector<ChainedFixupTarget>> getDyldChainedFixupTargets() const;
728
729  // Note: This is a limited, temporary API, which will be removed when Apple
730  // upstreams their implementation. Please do not rely on this.
731  Expected<std::optional<MachO::linkedit_data_command>>
732  getChainedFixupsLoadCommand() const;
733  // Returns the number of sections listed in dyld_chained_starts_in_image, and
734  // a ChainedFixupsSegment for each segment that has fixups.
735  Expected<std::pair<size_t, std::vector<ChainedFixupsSegment>>>
736  getChainedFixupsSegments() const;
737  ArrayRef<uint8_t> getDyldExportsTrie() const;
738
739  SmallVector<uint64_t> getFunctionStarts() const;
740  ArrayRef<uint8_t> getUuid() const;
741
742  StringRef getStringTableData() const;
743
744  void ReadULEB128s(uint64_t Index, SmallVectorImpl<uint64_t> &Out) const;
745
746  static StringRef guessLibraryShortName(StringRef Name, bool &isFramework,
747                                         StringRef &Suffix);
748
749  static Triple::ArchType getArch(uint32_t CPUType, uint32_t CPUSubType);
750  static Triple getArchTriple(uint32_t CPUType, uint32_t CPUSubType,
751                              const char **McpuDefault = nullptr,
752                              const char **ArchFlag = nullptr);
753  static bool isValidArch(StringRef ArchFlag);
754  static ArrayRef<StringRef> getValidArchs();
755  static Triple getHostArch();
756
757  bool isRelocatableObject() const override;
758
759  StringRef mapDebugSectionName(StringRef Name) const override;
760
761  llvm::binaryformat::Swift5ReflectionSectionKind
762  mapReflectionSectionNameToEnumValue(StringRef SectionName) const override;
763
764  bool hasPageZeroSegment() const { return HasPageZeroSegment; }
765
766  size_t getMachOFilesetEntryOffset() const { return MachOFilesetEntryOffset; }
767
768  static bool classof(const Binary *v) {
769    return v->isMachO();
770  }
771
772  static uint32_t
773  getVersionMinMajor(MachO::version_min_command &C, bool SDK) {
774    uint32_t VersionOrSDK = (SDK) ? C.sdk : C.version;
775    return (VersionOrSDK >> 16) & 0xffff;
776  }
777
778  static uint32_t
779  getVersionMinMinor(MachO::version_min_command &C, bool SDK) {
780    uint32_t VersionOrSDK = (SDK) ? C.sdk : C.version;
781    return (VersionOrSDK >> 8) & 0xff;
782  }
783
784  static uint32_t
785  getVersionMinUpdate(MachO::version_min_command &C, bool SDK) {
786    uint32_t VersionOrSDK = (SDK) ? C.sdk : C.version;
787    return VersionOrSDK & 0xff;
788  }
789
790  static std::string getBuildPlatform(uint32_t platform) {
791    switch (platform) {
792#define PLATFORM(platform, id, name, build_name, target, tapi_target,          \
793                 marketing)                                                    \
794  case MachO::PLATFORM_##platform:                                             \
795    return #name;
796#include "llvm/BinaryFormat/MachO.def"
797    default:
798      std::string ret;
799      raw_string_ostream ss(ret);
800      ss << format_hex(platform, 8, true);
801      return ss.str();
802    }
803  }
804
805  static std::string getBuildTool(uint32_t tools) {
806    switch (tools) {
807    case MachO::TOOL_CLANG: return "clang";
808    case MachO::TOOL_SWIFT: return "swift";
809    case MachO::TOOL_LD: return "ld";
810    case MachO::TOOL_LLD:
811      return "lld";
812    default:
813      std::string ret;
814      raw_string_ostream ss(ret);
815      ss << format_hex(tools, 8, true);
816      return ss.str();
817    }
818  }
819
820  static std::string getVersionString(uint32_t version) {
821    uint32_t major = (version >> 16) & 0xffff;
822    uint32_t minor = (version >> 8) & 0xff;
823    uint32_t update = version & 0xff;
824
825    SmallString<32> Version;
826    Version = utostr(major) + "." + utostr(minor);
827    if (update != 0)
828      Version += "." + utostr(update);
829    return std::string(std::string(Version));
830  }
831
832  /// If the input path is a .dSYM bundle (as created by the dsymutil tool),
833  /// return the paths to the object files found in the bundle, otherwise return
834  /// an empty vector. If the path appears to be a .dSYM bundle but no objects
835  /// were found or there was a filesystem error, then return an error.
836  static Expected<std::vector<std::string>>
837  findDsymObjectMembers(StringRef Path);
838
839private:
840  MachOObjectFile(MemoryBufferRef Object, bool IsLittleEndian, bool Is64Bits,
841                  Error &Err, uint32_t UniversalCputype = 0,
842                  uint32_t UniversalIndex = 0,
843                  size_t MachOFilesetEntryOffset = 0);
844
845  uint64_t getSymbolValueImpl(DataRefImpl Symb) const override;
846
847  union {
848    MachO::mach_header_64 Header64;
849    MachO::mach_header Header;
850  };
851  using SectionList = SmallVector<const char*, 1>;
852  SectionList Sections;
853  using LibraryList = SmallVector<const char*, 1>;
854  LibraryList Libraries;
855  LoadCommandList LoadCommands;
856  using LibraryShortName = SmallVector<StringRef, 1>;
857  using BuildToolList = SmallVector<const char*, 1>;
858  BuildToolList BuildTools;
859  mutable LibraryShortName LibrariesShortNames;
860  std::unique_ptr<BindRebaseSegInfo> BindRebaseSectionTable;
861  const char *SymtabLoadCmd = nullptr;
862  const char *DysymtabLoadCmd = nullptr;
863  const char *DataInCodeLoadCmd = nullptr;
864  const char *LinkOptHintsLoadCmd = nullptr;
865  const char *DyldInfoLoadCmd = nullptr;
866  const char *FuncStartsLoadCmd = nullptr;
867  const char *DyldChainedFixupsLoadCmd = nullptr;
868  const char *DyldExportsTrieLoadCmd = nullptr;
869  const char *UuidLoadCmd = nullptr;
870  bool HasPageZeroSegment = false;
871  size_t MachOFilesetEntryOffset = 0;
872};
873
874/// DiceRef
875inline DiceRef::DiceRef(DataRefImpl DiceP, const ObjectFile *Owner)
876  : DicePimpl(DiceP) , OwningObject(Owner) {}
877
878inline bool DiceRef::operator==(const DiceRef &Other) const {
879  return DicePimpl == Other.DicePimpl;
880}
881
882inline bool DiceRef::operator<(const DiceRef &Other) const {
883  return DicePimpl < Other.DicePimpl;
884}
885
886inline void DiceRef::moveNext() {
887  const MachO::data_in_code_entry *P =
888    reinterpret_cast<const MachO::data_in_code_entry *>(DicePimpl.p);
889  DicePimpl.p = reinterpret_cast<uintptr_t>(P + 1);
890}
891
892// Since a Mach-O data in code reference, a DiceRef, can only be created when
893// the OwningObject ObjectFile is a MachOObjectFile a static_cast<> is used for
894// the methods that get the values of the fields of the reference.
895
896inline std::error_code DiceRef::getOffset(uint32_t &Result) const {
897  const MachOObjectFile *MachOOF =
898    static_cast<const MachOObjectFile *>(OwningObject);
899  MachO::data_in_code_entry Dice = MachOOF->getDice(DicePimpl);
900  Result = Dice.offset;
901  return std::error_code();
902}
903
904inline std::error_code DiceRef::getLength(uint16_t &Result) const {
905  const MachOObjectFile *MachOOF =
906    static_cast<const MachOObjectFile *>(OwningObject);
907  MachO::data_in_code_entry Dice = MachOOF->getDice(DicePimpl);
908  Result = Dice.length;
909  return std::error_code();
910}
911
912inline std::error_code DiceRef::getKind(uint16_t &Result) const {
913  const MachOObjectFile *MachOOF =
914    static_cast<const MachOObjectFile *>(OwningObject);
915  MachO::data_in_code_entry Dice = MachOOF->getDice(DicePimpl);
916  Result = Dice.kind;
917  return std::error_code();
918}
919
920inline DataRefImpl DiceRef::getRawDataRefImpl() const {
921  return DicePimpl;
922}
923
924inline const ObjectFile *DiceRef::getObjectFile() const {
925  return OwningObject;
926}
927
928} // end namespace object
929} // end namespace llvm
930
931#endif // LLVM_OBJECT_MACHO_H
932