1//===- MachO.h - MachO object file implementation ---------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file declares the MachOObjectFile class, which implement the ObjectFile
10// interface for MachO files.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_OBJECT_MACHO_H
15#define LLVM_OBJECT_MACHO_H
16
17#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/SmallString.h"
19#include "llvm/ADT/SmallVector.h"
20#include "llvm/ADT/StringExtras.h"
21#include "llvm/ADT/StringRef.h"
22#include "llvm/ADT/iterator_range.h"
23#include "llvm/BinaryFormat/MachO.h"
24#include "llvm/BinaryFormat/Swift.h"
25#include "llvm/Object/Binary.h"
26#include "llvm/Object/ObjectFile.h"
27#include "llvm/Object/SymbolicFile.h"
28#include "llvm/Support/Error.h"
29#include "llvm/Support/Format.h"
30#include "llvm/Support/MemoryBuffer.h"
31#include "llvm/Support/raw_ostream.h"
32#include "llvm/TargetParser/SubtargetFeature.h"
33#include "llvm/TargetParser/Triple.h"
34#include <cstdint>
35#include <memory>
36#include <string>
37#include <system_error>
38
39namespace llvm {
40namespace object {
41
42/// DiceRef - This is a value type class that represents a single
43/// data in code entry in the table in a Mach-O object file.
44class DiceRef {
45  DataRefImpl DicePimpl;
46  const ObjectFile *OwningObject = nullptr;
47
48public:
49  DiceRef() = default;
50  DiceRef(DataRefImpl DiceP, const ObjectFile *Owner);
51
52  bool operator==(const DiceRef &Other) const;
53  bool operator<(const DiceRef &Other) const;
54
55  void moveNext();
56
57  std::error_code getOffset(uint32_t &Result) const;
58  std::error_code getLength(uint16_t &Result) const;
59  std::error_code getKind(uint16_t &Result) const;
60
61  DataRefImpl getRawDataRefImpl() const;
62  const ObjectFile *getObjectFile() const;
63};
64using dice_iterator = content_iterator<DiceRef>;
65
66/// ExportEntry encapsulates the current-state-of-the-walk used when doing a
67/// non-recursive walk of the trie data structure.  This allows you to iterate
68/// across all exported symbols using:
69///      Error Err = Error::success();
70///      for (const llvm::object::ExportEntry &AnExport : Obj->exports(&Err)) {
71///      }
72///      if (Err) { report error ...
73class ExportEntry {
74public:
75  ExportEntry(Error *Err, const MachOObjectFile *O, ArrayRef<uint8_t> Trie);
76
77  StringRef name() const;
78  uint64_t flags() const;
79  uint64_t address() const;
80  uint64_t other() const;
81  StringRef otherName() const;
82  uint32_t nodeOffset() const;
83
84  bool operator==(const ExportEntry &) const;
85
86  void moveNext();
87
88private:
89  friend class MachOObjectFile;
90
91  void moveToFirst();
92  void moveToEnd();
93  uint64_t readULEB128(const uint8_t *&p, const char **error);
94  void pushDownUntilBottom();
95  void pushNode(uint64_t Offset);
96
97  // Represents a node in the mach-o exports trie.
98  struct NodeState {
99    NodeState(const uint8_t *Ptr);
100
101    const uint8_t *Start;
102    const uint8_t *Current;
103    uint64_t Flags = 0;
104    uint64_t Address = 0;
105    uint64_t Other = 0;
106    const char *ImportName = nullptr;
107    unsigned ChildCount = 0;
108    unsigned NextChildIndex = 0;
109    unsigned ParentStringLength = 0;
110    bool IsExportNode = false;
111  };
112  using NodeList = SmallVector<NodeState, 16>;
113  using node_iterator = NodeList::const_iterator;
114
115  Error *E;
116  const MachOObjectFile *O;
117  ArrayRef<uint8_t> Trie;
118  SmallString<256> CumulativeString;
119  NodeList Stack;
120  bool Done = false;
121
122  iterator_range<node_iterator> nodes() const {
123    return make_range(Stack.begin(), Stack.end());
124  }
125};
126using export_iterator = content_iterator<ExportEntry>;
127
128// Segment info so SegIndex/SegOffset pairs in a Mach-O Bind or Rebase entry
129// can be checked and translated.  Only the SegIndex/SegOffset pairs from
130// checked entries are to be used with the segmentName(), sectionName() and
131// address() methods below.
132class BindRebaseSegInfo {
133public:
134  BindRebaseSegInfo(const MachOObjectFile *Obj);
135
136  // Used to check a Mach-O Bind or Rebase entry for errors when iterating.
137  const char* checkSegAndOffsets(int32_t SegIndex, uint64_t SegOffset,
138                                 uint8_t PointerSize, uint32_t Count=1,
139                                 uint32_t Skip=0);
140  // Used with valid SegIndex/SegOffset values from checked entries.
141  StringRef segmentName(int32_t SegIndex);
142  StringRef sectionName(int32_t SegIndex, uint64_t SegOffset);
143  uint64_t address(uint32_t SegIndex, uint64_t SegOffset);
144
145private:
146  struct SectionInfo {
147    uint64_t Address;
148    uint64_t Size;
149    StringRef SectionName;
150    StringRef SegmentName;
151    uint64_t OffsetInSegment;
152    uint64_t SegmentStartAddress;
153    int32_t SegmentIndex;
154  };
155  const SectionInfo &findSection(int32_t SegIndex, uint64_t SegOffset);
156
157  SmallVector<SectionInfo, 32> Sections;
158  int32_t MaxSegIndex;
159};
160
161/// MachORebaseEntry encapsulates the current state in the decompression of
162/// rebasing opcodes. This allows you to iterate through the compressed table of
163/// rebasing using:
164///    Error Err = Error::success();
165///    for (const llvm::object::MachORebaseEntry &Entry : Obj->rebaseTable(&Err)) {
166///    }
167///    if (Err) { report error ...
168class MachORebaseEntry {
169public:
170  MachORebaseEntry(Error *Err, const MachOObjectFile *O,
171                   ArrayRef<uint8_t> opcodes, bool is64Bit);
172
173  int32_t segmentIndex() const;
174  uint64_t segmentOffset() const;
175  StringRef typeName() const;
176  StringRef segmentName() const;
177  StringRef sectionName() const;
178  uint64_t address() const;
179
180  bool operator==(const MachORebaseEntry &) const;
181
182  void moveNext();
183
184private:
185  friend class MachOObjectFile;
186
187  void moveToFirst();
188  void moveToEnd();
189  uint64_t readULEB128(const char **error);
190
191  Error *E;
192  const MachOObjectFile *O;
193  ArrayRef<uint8_t> Opcodes;
194  const uint8_t *Ptr;
195  uint64_t SegmentOffset = 0;
196  int32_t SegmentIndex = -1;
197  uint64_t RemainingLoopCount = 0;
198  uint64_t AdvanceAmount = 0;
199  uint8_t  RebaseType = 0;
200  uint8_t  PointerSize;
201  bool     Done = false;
202};
203using rebase_iterator = content_iterator<MachORebaseEntry>;
204
205/// MachOBindEntry encapsulates the current state in the decompression of
206/// binding opcodes. This allows you to iterate through the compressed table of
207/// bindings using:
208///    Error Err = Error::success();
209///    for (const llvm::object::MachOBindEntry &Entry : Obj->bindTable(&Err)) {
210///    }
211///    if (Err) { report error ...
212class MachOBindEntry {
213public:
214  enum class Kind { Regular, Lazy, Weak };
215
216  MachOBindEntry(Error *Err, const MachOObjectFile *O,
217                 ArrayRef<uint8_t> Opcodes, bool is64Bit, MachOBindEntry::Kind);
218
219  int32_t segmentIndex() const;
220  uint64_t segmentOffset() const;
221  StringRef typeName() const;
222  StringRef symbolName() const;
223  uint32_t flags() const;
224  int64_t addend() const;
225  int ordinal() const;
226
227  StringRef segmentName() const;
228  StringRef sectionName() const;
229  uint64_t address() const;
230
231  bool operator==(const MachOBindEntry &) const;
232
233  void moveNext();
234
235private:
236  friend class MachOObjectFile;
237
238  void moveToFirst();
239  void moveToEnd();
240  uint64_t readULEB128(const char **error);
241  int64_t readSLEB128(const char **error);
242
243  Error *E;
244  const MachOObjectFile *O;
245  ArrayRef<uint8_t> Opcodes;
246  const uint8_t *Ptr;
247  uint64_t SegmentOffset = 0;
248  int32_t  SegmentIndex = -1;
249  StringRef SymbolName;
250  bool     LibraryOrdinalSet = false;
251  int      Ordinal = 0;
252  uint32_t Flags = 0;
253  int64_t  Addend = 0;
254  uint64_t RemainingLoopCount = 0;
255  uint64_t AdvanceAmount = 0;
256  uint8_t  BindType = 0;
257  uint8_t  PointerSize;
258  Kind     TableKind;
259  bool     Done = false;
260};
261using bind_iterator = content_iterator<MachOBindEntry>;
262
263/// ChainedFixupTarget holds all the information about an external symbol
264/// necessary to bind this binary to that symbol. These values are referenced
265/// indirectly by chained fixup binds. This structure captures values from all
266/// import and symbol formats.
267///
268/// Be aware there are two notions of weak here:
269///   WeakImport == true
270///     The associated bind may be set to 0 if this symbol is missing from its
271///     parent library. This is called a "weak import."
272///   LibOrdinal == BIND_SPECIAL_DYLIB_WEAK_LOOKUP
273///     This symbol may be coalesced with other libraries vending the same
274///     symbol. E.g., C++'s "operator new". This is called a "weak bind."
275struct ChainedFixupTarget {
276public:
277  ChainedFixupTarget(int LibOrdinal, uint32_t NameOffset, StringRef Symbol,
278                     uint64_t Addend, bool WeakImport)
279      : LibOrdinal(LibOrdinal), NameOffset(NameOffset), SymbolName(Symbol),
280        Addend(Addend), WeakImport(WeakImport) {}
281
282  int libOrdinal() { return LibOrdinal; }
283  uint32_t nameOffset() { return NameOffset; }
284  StringRef symbolName() { return SymbolName; }
285  uint64_t addend() { return Addend; }
286  bool weakImport() { return WeakImport; }
287  bool weakBind() {
288    return LibOrdinal == MachO::BIND_SPECIAL_DYLIB_WEAK_LOOKUP;
289  }
290
291private:
292  int LibOrdinal;
293  uint32_t NameOffset;
294  StringRef SymbolName;
295  uint64_t Addend;
296  bool WeakImport;
297};
298
299struct ChainedFixupsSegment {
300  ChainedFixupsSegment(uint8_t SegIdx, uint32_t Offset,
301                       const MachO::dyld_chained_starts_in_segment &Header,
302                       std::vector<uint16_t> &&PageStarts)
303      : SegIdx(SegIdx), Offset(Offset), Header(Header),
304        PageStarts(PageStarts){};
305
306  uint32_t SegIdx;
307  uint32_t Offset; // dyld_chained_starts_in_image::seg_info_offset[SegIdx]
308  MachO::dyld_chained_starts_in_segment Header;
309  std::vector<uint16_t> PageStarts; // page_start[] entries, host endianness
310};
311
312/// MachOAbstractFixupEntry is an abstract class representing a fixup in a
313/// MH_DYLDLINK file. Fixups generally represent rebases and binds. Binds also
314/// subdivide into additional subtypes (weak, lazy, reexport).
315///
316/// The two concrete subclasses of MachOAbstractFixupEntry are:
317///
318///   MachORebaseBindEntry   - for dyld opcode-based tables, including threaded-
319///                            rebase, where rebases are mixed in with other
320///                            bind opcodes.
321///   MachOChainedFixupEntry - for pointer chains embedded in data pages.
322class MachOAbstractFixupEntry {
323public:
324  MachOAbstractFixupEntry(Error *Err, const MachOObjectFile *O);
325
326  int32_t segmentIndex() const;
327  uint64_t segmentOffset() const;
328  uint64_t segmentAddress() const;
329  StringRef segmentName() const;
330  StringRef sectionName() const;
331  StringRef typeName() const;
332  StringRef symbolName() const;
333  uint32_t flags() const;
334  int64_t addend() const;
335  int ordinal() const;
336
337  /// \return the location of this fixup as a VM Address. For the VM
338  /// Address this fixup is pointing to, use pointerValue().
339  uint64_t address() const;
340
341  /// \return the VM Address pointed to by this fixup. Use
342  /// pointerValue() to compare against other VM Addresses, such as
343  /// section addresses or segment vmaddrs.
344  uint64_t pointerValue() const { return PointerValue; }
345
346  /// \return the raw "on-disk" representation of the fixup. For
347  /// Threaded rebases and Chained pointers these values are generally
348  /// encoded into various different pointer formats. This value is
349  /// exposed in API for tools that want to display and annotate the
350  /// raw bits.
351  uint64_t rawValue() const { return RawValue; }
352
353  void moveNext();
354
355protected:
356  Error *E;
357  const MachOObjectFile *O;
358  uint64_t SegmentOffset = 0;
359  int32_t SegmentIndex = -1;
360  StringRef SymbolName;
361  int32_t Ordinal = 0;
362  uint32_t Flags = 0;
363  int64_t Addend = 0;
364  uint64_t PointerValue = 0;
365  uint64_t RawValue = 0;
366  bool Done = false;
367
368  void moveToFirst();
369  void moveToEnd();
370
371  /// \return the vm address of the start of __TEXT segment.
372  uint64_t textAddress() const { return TextAddress; }
373
374private:
375  uint64_t TextAddress;
376};
377
378class MachOChainedFixupEntry : public MachOAbstractFixupEntry {
379public:
380  enum class FixupKind { Bind, Rebase };
381
382  MachOChainedFixupEntry(Error *Err, const MachOObjectFile *O, bool Parse);
383
384  bool operator==(const MachOChainedFixupEntry &) const;
385
386  bool isBind() const { return Kind == FixupKind::Bind; }
387  bool isRebase() const { return Kind == FixupKind::Rebase; }
388
389  void moveNext();
390  void moveToFirst();
391  void moveToEnd();
392
393private:
394  void findNextPageWithFixups();
395
396  std::vector<ChainedFixupTarget> FixupTargets;
397  std::vector<ChainedFixupsSegment> Segments;
398  ArrayRef<uint8_t> SegmentData;
399  FixupKind Kind;
400  uint32_t InfoSegIndex = 0; // Index into Segments
401  uint32_t PageIndex = 0;    // Index into Segments[InfoSegIdx].PageStarts
402  uint32_t PageOffset = 0;   // Page offset of the current fixup
403};
404using fixup_iterator = content_iterator<MachOChainedFixupEntry>;
405
406class MachOObjectFile : public ObjectFile {
407public:
408  struct LoadCommandInfo {
409    const char *Ptr;      // Where in memory the load command is.
410    MachO::load_command C; // The command itself.
411  };
412  using LoadCommandList = SmallVector<LoadCommandInfo, 4>;
413  using load_command_iterator = LoadCommandList::const_iterator;
414
415  static Expected<std::unique_ptr<MachOObjectFile>>
416  create(MemoryBufferRef Object, bool IsLittleEndian, bool Is64Bits,
417         uint32_t UniversalCputype = 0, uint32_t UniversalIndex = 0,
418         size_t MachOFilesetEntryOffset = 0);
419
420  static bool isMachOPairedReloc(uint64_t RelocType, uint64_t Arch);
421
422  void moveSymbolNext(DataRefImpl &Symb) const override;
423
424  uint64_t getNValue(DataRefImpl Sym) const;
425  Expected<StringRef> getSymbolName(DataRefImpl Symb) const override;
426
427  // MachO specific.
428  Error checkSymbolTable() const;
429
430  std::error_code getIndirectName(DataRefImpl Symb, StringRef &Res) const;
431  unsigned getSectionType(SectionRef Sec) const;
432
433  Expected<uint64_t> getSymbolAddress(DataRefImpl Symb) const override;
434  uint32_t getSymbolAlignment(DataRefImpl Symb) const override;
435  uint64_t getCommonSymbolSizeImpl(DataRefImpl Symb) const override;
436  Expected<SymbolRef::Type> getSymbolType(DataRefImpl Symb) const override;
437  Expected<uint32_t> getSymbolFlags(DataRefImpl Symb) const override;
438  Expected<section_iterator> getSymbolSection(DataRefImpl Symb) const override;
439  unsigned getSymbolSectionID(SymbolRef Symb) const;
440  unsigned getSectionID(SectionRef Sec) const;
441
442  void moveSectionNext(DataRefImpl &Sec) const override;
443  Expected<StringRef> getSectionName(DataRefImpl Sec) const override;
444  uint64_t getSectionAddress(DataRefImpl Sec) const override;
445  uint64_t getSectionIndex(DataRefImpl Sec) const override;
446  uint64_t getSectionSize(DataRefImpl Sec) const override;
447  ArrayRef<uint8_t> getSectionContents(uint32_t Offset, uint64_t Size) const;
448  Expected<ArrayRef<uint8_t>>
449  getSectionContents(DataRefImpl Sec) const override;
450  uint64_t getSectionAlignment(DataRefImpl Sec) const override;
451  Expected<SectionRef> getSection(unsigned SectionIndex) const;
452  Expected<SectionRef> getSection(StringRef SectionName) const;
453  bool isSectionCompressed(DataRefImpl Sec) const override;
454  bool isSectionText(DataRefImpl Sec) const override;
455  bool isSectionData(DataRefImpl Sec) const override;
456  bool isSectionBSS(DataRefImpl Sec) const override;
457  bool isSectionVirtual(DataRefImpl Sec) const override;
458  bool isSectionBitcode(DataRefImpl Sec) const override;
459  bool isDebugSection(DataRefImpl Sec) const override;
460
461  /// Return the raw contents of an entire segment.
462  ArrayRef<uint8_t> getSegmentContents(StringRef SegmentName) const;
463  ArrayRef<uint8_t> getSegmentContents(size_t SegmentIndex) const;
464
465  /// When dsymutil generates the companion file, it strips all unnecessary
466  /// sections (e.g. everything in the _TEXT segment) by omitting their body
467  /// and setting the offset in their corresponding load command to zero.
468  ///
469  /// While the load command itself is valid, reading the section corresponds
470  /// to reading the number of bytes specified in the load command, starting
471  /// from offset 0 (i.e. the Mach-O header at the beginning of the file).
472  bool isSectionStripped(DataRefImpl Sec) const override;
473
474  relocation_iterator section_rel_begin(DataRefImpl Sec) const override;
475  relocation_iterator section_rel_end(DataRefImpl Sec) const override;
476
477  relocation_iterator extrel_begin() const;
478  relocation_iterator extrel_end() const;
479  iterator_range<relocation_iterator> external_relocations() const {
480    return make_range(extrel_begin(), extrel_end());
481  }
482
483  relocation_iterator locrel_begin() const;
484  relocation_iterator locrel_end() const;
485
486  void moveRelocationNext(DataRefImpl &Rel) const override;
487  uint64_t getRelocationOffset(DataRefImpl Rel) const override;
488  symbol_iterator getRelocationSymbol(DataRefImpl Rel) const override;
489  section_iterator getRelocationSection(DataRefImpl Rel) const;
490  uint64_t getRelocationType(DataRefImpl Rel) const override;
491  void getRelocationTypeName(DataRefImpl Rel,
492                             SmallVectorImpl<char> &Result) const override;
493  uint8_t getRelocationLength(DataRefImpl Rel) const;
494
495  // MachO specific.
496  std::error_code getLibraryShortNameByIndex(unsigned Index, StringRef &) const;
497  uint32_t getLibraryCount() const;
498
499  section_iterator getRelocationRelocatedSection(relocation_iterator Rel) const;
500
501  // TODO: Would be useful to have an iterator based version
502  // of the load command interface too.
503
504  basic_symbol_iterator symbol_begin() const override;
505  basic_symbol_iterator symbol_end() const override;
506
507  bool is64Bit() const override;
508
509  // MachO specific.
510  symbol_iterator getSymbolByIndex(unsigned Index) const;
511  uint64_t getSymbolIndex(DataRefImpl Symb) const;
512
513  section_iterator section_begin() const override;
514  section_iterator section_end() const override;
515
516  uint8_t getBytesInAddress() const override;
517
518  StringRef getFileFormatName() const override;
519  Triple::ArchType getArch() const override;
520  Expected<SubtargetFeatures> getFeatures() const override {
521    return SubtargetFeatures();
522  }
523  Triple getArchTriple(const char **McpuDefault = nullptr) const;
524
525  relocation_iterator section_rel_begin(unsigned Index) const;
526  relocation_iterator section_rel_end(unsigned Index) const;
527
528  dice_iterator begin_dices() const;
529  dice_iterator end_dices() const;
530
531  load_command_iterator begin_load_commands() const;
532  load_command_iterator end_load_commands() const;
533  iterator_range<load_command_iterator> load_commands() const;
534
535  /// For use iterating over all exported symbols.
536  iterator_range<export_iterator> exports(Error &Err) const;
537
538  /// For use examining a trie not in a MachOObjectFile.
539  static iterator_range<export_iterator> exports(Error &Err,
540                                                 ArrayRef<uint8_t> Trie,
541                                                 const MachOObjectFile *O =
542                                                                      nullptr);
543
544  /// For use iterating over all rebase table entries.
545  iterator_range<rebase_iterator> rebaseTable(Error &Err);
546
547  /// For use examining rebase opcodes in a MachOObjectFile.
548  static iterator_range<rebase_iterator> rebaseTable(Error &Err,
549                                                     MachOObjectFile *O,
550                                                     ArrayRef<uint8_t> Opcodes,
551                                                     bool is64);
552
553  /// For use iterating over all bind table entries.
554  iterator_range<bind_iterator> bindTable(Error &Err);
555
556  /// For iterating over all chained fixups.
557  iterator_range<fixup_iterator> fixupTable(Error &Err);
558
559  /// For use iterating over all lazy bind table entries.
560  iterator_range<bind_iterator> lazyBindTable(Error &Err);
561
562  /// For use iterating over all weak bind table entries.
563  iterator_range<bind_iterator> weakBindTable(Error &Err);
564
565  /// For use examining bind opcodes in a MachOObjectFile.
566  static iterator_range<bind_iterator> bindTable(Error &Err,
567                                                 MachOObjectFile *O,
568                                                 ArrayRef<uint8_t> Opcodes,
569                                                 bool is64,
570                                                 MachOBindEntry::Kind);
571
572  // Given a SegIndex, SegOffset, and PointerSize, verify a valid section exists
573  // that fully contains a pointer at that location. Multiple fixups in a bind
574  // (such as with the BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB opcode) can
575  // be tested via the Count and Skip parameters.
576  //
577  // This is used by MachOBindEntry::moveNext() to validate a MachOBindEntry.
578  const char *BindEntryCheckSegAndOffsets(int32_t SegIndex, uint64_t SegOffset,
579                                         uint8_t PointerSize, uint32_t Count=1,
580                                          uint32_t Skip=0) const {
581    return BindRebaseSectionTable->checkSegAndOffsets(SegIndex, SegOffset,
582                                                     PointerSize, Count, Skip);
583  }
584
585  // Given a SegIndex, SegOffset, and PointerSize, verify a valid section exists
586  // that fully contains a pointer at that location. Multiple fixups in a rebase
587  // (such as with the REBASE_OPCODE_DO_*_TIMES* opcodes) can be tested via the
588  // Count and Skip parameters.
589  //
590  // This is used by MachORebaseEntry::moveNext() to validate a MachORebaseEntry
591  const char *RebaseEntryCheckSegAndOffsets(int32_t SegIndex,
592                                            uint64_t SegOffset,
593                                            uint8_t PointerSize,
594                                            uint32_t Count=1,
595                                            uint32_t Skip=0) const {
596    return BindRebaseSectionTable->checkSegAndOffsets(SegIndex, SegOffset,
597                                                      PointerSize, Count, Skip);
598  }
599
600  /// For use with the SegIndex of a checked Mach-O Bind or Rebase entry to
601  /// get the segment name.
602  StringRef BindRebaseSegmentName(int32_t SegIndex) const {
603    return BindRebaseSectionTable->segmentName(SegIndex);
604  }
605
606  /// For use with a SegIndex,SegOffset pair from a checked Mach-O Bind or
607  /// Rebase entry to get the section name.
608  StringRef BindRebaseSectionName(uint32_t SegIndex, uint64_t SegOffset) const {
609    return BindRebaseSectionTable->sectionName(SegIndex, SegOffset);
610  }
611
612  /// For use with a SegIndex,SegOffset pair from a checked Mach-O Bind or
613  /// Rebase entry to get the address.
614  uint64_t BindRebaseAddress(uint32_t SegIndex, uint64_t SegOffset) const {
615    return BindRebaseSectionTable->address(SegIndex, SegOffset);
616  }
617
618  // In a MachO file, sections have a segment name. This is used in the .o
619  // files. They have a single segment, but this field specifies which segment
620  // a section should be put in the final object.
621  StringRef getSectionFinalSegmentName(DataRefImpl Sec) const;
622
623  // Names are stored as 16 bytes. These returns the raw 16 bytes without
624  // interpreting them as a C string.
625  ArrayRef<char> getSectionRawName(DataRefImpl Sec) const;
626  ArrayRef<char> getSectionRawFinalSegmentName(DataRefImpl Sec) const;
627
628  // MachO specific Info about relocations.
629  bool isRelocationScattered(const MachO::any_relocation_info &RE) const;
630  unsigned getPlainRelocationSymbolNum(
631                                    const MachO::any_relocation_info &RE) const;
632  bool getPlainRelocationExternal(const MachO::any_relocation_info &RE) const;
633  bool getScatteredRelocationScattered(
634                                    const MachO::any_relocation_info &RE) const;
635  uint32_t getScatteredRelocationValue(
636                                    const MachO::any_relocation_info &RE) const;
637  uint32_t getScatteredRelocationType(
638                                    const MachO::any_relocation_info &RE) const;
639  unsigned getAnyRelocationAddress(const MachO::any_relocation_info &RE) const;
640  unsigned getAnyRelocationPCRel(const MachO::any_relocation_info &RE) const;
641  unsigned getAnyRelocationLength(const MachO::any_relocation_info &RE) const;
642  unsigned getAnyRelocationType(const MachO::any_relocation_info &RE) const;
643  SectionRef getAnyRelocationSection(const MachO::any_relocation_info &RE) const;
644
645  // MachO specific structures.
646  MachO::section getSection(DataRefImpl DRI) const;
647  MachO::section_64 getSection64(DataRefImpl DRI) const;
648  MachO::section getSection(const LoadCommandInfo &L, unsigned Index) const;
649  MachO::section_64 getSection64(const LoadCommandInfo &L,unsigned Index) const;
650  MachO::nlist getSymbolTableEntry(DataRefImpl DRI) const;
651  MachO::nlist_64 getSymbol64TableEntry(DataRefImpl DRI) const;
652
653  MachO::linkedit_data_command
654  getLinkeditDataLoadCommand(const LoadCommandInfo &L) const;
655  MachO::segment_command
656  getSegmentLoadCommand(const LoadCommandInfo &L) const;
657  MachO::segment_command_64
658  getSegment64LoadCommand(const LoadCommandInfo &L) const;
659  MachO::linker_option_command
660  getLinkerOptionLoadCommand(const LoadCommandInfo &L) const;
661  MachO::version_min_command
662  getVersionMinLoadCommand(const LoadCommandInfo &L) const;
663  MachO::note_command
664  getNoteLoadCommand(const LoadCommandInfo &L) const;
665  MachO::build_version_command
666  getBuildVersionLoadCommand(const LoadCommandInfo &L) const;
667  MachO::build_tool_version
668  getBuildToolVersion(unsigned index) const;
669  MachO::dylib_command
670  getDylibIDLoadCommand(const LoadCommandInfo &L) const;
671  MachO::dyld_info_command
672  getDyldInfoLoadCommand(const LoadCommandInfo &L) const;
673  MachO::dylinker_command
674  getDylinkerCommand(const LoadCommandInfo &L) const;
675  MachO::uuid_command
676  getUuidCommand(const LoadCommandInfo &L) const;
677  MachO::rpath_command
678  getRpathCommand(const LoadCommandInfo &L) const;
679  MachO::source_version_command
680  getSourceVersionCommand(const LoadCommandInfo &L) const;
681  MachO::entry_point_command
682  getEntryPointCommand(const LoadCommandInfo &L) const;
683  MachO::encryption_info_command
684  getEncryptionInfoCommand(const LoadCommandInfo &L) const;
685  MachO::encryption_info_command_64
686  getEncryptionInfoCommand64(const LoadCommandInfo &L) const;
687  MachO::sub_framework_command
688  getSubFrameworkCommand(const LoadCommandInfo &L) const;
689  MachO::sub_umbrella_command
690  getSubUmbrellaCommand(const LoadCommandInfo &L) const;
691  MachO::sub_library_command
692  getSubLibraryCommand(const LoadCommandInfo &L) const;
693  MachO::sub_client_command
694  getSubClientCommand(const LoadCommandInfo &L) const;
695  MachO::routines_command
696  getRoutinesCommand(const LoadCommandInfo &L) const;
697  MachO::routines_command_64
698  getRoutinesCommand64(const LoadCommandInfo &L) const;
699  MachO::thread_command
700  getThreadCommand(const LoadCommandInfo &L) const;
701  MachO::fileset_entry_command
702  getFilesetEntryLoadCommand(const LoadCommandInfo &L) const;
703
704  MachO::any_relocation_info getRelocation(DataRefImpl Rel) const;
705  MachO::data_in_code_entry getDice(DataRefImpl Rel) const;
706  const MachO::mach_header &getHeader() const;
707  const MachO::mach_header_64 &getHeader64() const;
708  uint32_t
709  getIndirectSymbolTableEntry(const MachO::dysymtab_command &DLC,
710                              unsigned Index) const;
711  MachO::data_in_code_entry getDataInCodeTableEntry(uint32_t DataOffset,
712                                                    unsigned Index) const;
713  MachO::symtab_command getSymtabLoadCommand() const;
714  MachO::dysymtab_command getDysymtabLoadCommand() const;
715  MachO::linkedit_data_command getDataInCodeLoadCommand() const;
716  MachO::linkedit_data_command getLinkOptHintsLoadCommand() const;
717  ArrayRef<uint8_t> getDyldInfoRebaseOpcodes() const;
718  ArrayRef<uint8_t> getDyldInfoBindOpcodes() const;
719  ArrayRef<uint8_t> getDyldInfoWeakBindOpcodes() const;
720  ArrayRef<uint8_t> getDyldInfoLazyBindOpcodes() const;
721  ArrayRef<uint8_t> getDyldInfoExportsTrie() const;
722
723  /// If the optional is std::nullopt, no header was found, but the object was
724  /// well-formed.
725  Expected<std::optional<MachO::dyld_chained_fixups_header>>
726  getChainedFixupsHeader() const;
727  Expected<std::vector<ChainedFixupTarget>> getDyldChainedFixupTargets() const;
728
729  // Note: This is a limited, temporary API, which will be removed when Apple
730  // upstreams their implementation. Please do not rely on this.
731  Expected<std::optional<MachO::linkedit_data_command>>
732  getChainedFixupsLoadCommand() const;
733  // Returns the number of sections listed in dyld_chained_starts_in_image, and
734  // a ChainedFixupsSegment for each segment that has fixups.
735  Expected<std::pair<size_t, std::vector<ChainedFixupsSegment>>>
736  getChainedFixupsSegments() const;
737  ArrayRef<uint8_t> getDyldExportsTrie() const;
738
739  SmallVector<uint64_t> getFunctionStarts() const;
740  ArrayRef<uint8_t> getUuid() const;
741
742  StringRef getStringTableData() const;
743
744  void ReadULEB128s(uint64_t Index, SmallVectorImpl<uint64_t> &Out) const;
745
746  static StringRef guessLibraryShortName(StringRef Name, bool &isFramework,
747                                         StringRef &Suffix);
748
749  static Triple::ArchType getArch(uint32_t CPUType, uint32_t CPUSubType);
750  static Triple getArchTriple(uint32_t CPUType, uint32_t CPUSubType,
751                              const char **McpuDefault = nullptr,
752                              const char **ArchFlag = nullptr);
753  static bool isValidArch(StringRef ArchFlag);
754  static ArrayRef<StringRef> getValidArchs();
755  static Triple getHostArch();
756
757  bool isRelocatableObject() const override;
758
759  StringRef mapDebugSectionName(StringRef Name) const override;
760
761  llvm::binaryformat::Swift5ReflectionSectionKind
762  mapReflectionSectionNameToEnumValue(StringRef SectionName) const override;
763
764  bool hasPageZeroSegment() const { return HasPageZeroSegment; }
765
766  size_t getMachOFilesetEntryOffset() const { return MachOFilesetEntryOffset; }
767
768  static bool classof(const Binary *v) {
769    return v->isMachO();
770  }
771
772  static uint32_t
773  getVersionMinMajor(MachO::version_min_command &C, bool SDK) {
774    uint32_t VersionOrSDK = (SDK) ? C.sdk : C.version;
775    return (VersionOrSDK >> 16) & 0xffff;
776  }
777
778  static uint32_t
779  getVersionMinMinor(MachO::version_min_command &C, bool SDK) {
780    uint32_t VersionOrSDK = (SDK) ? C.sdk : C.version;
781    return (VersionOrSDK >> 8) & 0xff;
782  }
783
784  static uint32_t
785  getVersionMinUpdate(MachO::version_min_command &C, bool SDK) {
786    uint32_t VersionOrSDK = (SDK) ? C.sdk : C.version;
787    return VersionOrSDK & 0xff;
788  }
789
790  static std::string getBuildPlatform(uint32_t platform) {
791    switch (platform) {
792#define PLATFORM(platform, id, name, build_name, target, tapi_target,          \
793                 marketing)                                                    \
794  case MachO::PLATFORM_##platform:                                             \
795    return #name;
796#include "llvm/BinaryFormat/MachO.def"
797    default:
798      std::string ret;
799      raw_string_ostream ss(ret);
800      ss << format_hex(platform, 8, true);
801      return ss.str();
802    }
803  }
804
805  static std::string getBuildTool(uint32_t tools) {
806    switch (tools) {
807    case MachO::TOOL_CLANG: return "clang";
808    case MachO::TOOL_SWIFT: return "swift";
809    case MachO::TOOL_LD: return "ld";
810    case MachO::TOOL_LLD:
811      return "lld";
812    default:
813      std::string ret;
814      raw_string_ostream ss(ret);
815      ss << format_hex(tools, 8, true);
816      return ss.str();
817    }
818  }
819
820  static std::string getVersionString(uint32_t version) {
821    uint32_t major = (version >> 16) & 0xffff;
822    uint32_t minor = (version >> 8) & 0xff;
823    uint32_t update = version & 0xff;
824
825    SmallString<32> Version;
826    Version = utostr(major) + "." + utostr(minor);
827    if (update != 0)
828      Version += "." + utostr(update);
829    return std::string(std::string(Version));
830  }
831
832  /// If the input path is a .dSYM bundle (as created by the dsymutil tool),
833  /// return the paths to the object files found in the bundle, otherwise return
834  /// an empty vector. If the path appears to be a .dSYM bundle but no objects
835  /// were found or there was a filesystem error, then return an error.
836  static Expected<std::vector<std::string>>
837  findDsymObjectMembers(StringRef Path);
838
839private:
840  MachOObjectFile(MemoryBufferRef Object, bool IsLittleEndian, bool Is64Bits,
841                  Error &Err, uint32_t UniversalCputype = 0,
842                  uint32_t UniversalIndex = 0,
843                  size_t MachOFilesetEntryOffset = 0);
844
845  uint64_t getSymbolValueImpl(DataRefImpl Symb) const override;
846
847  union {
848    MachO::mach_header_64 Header64;
849    MachO::mach_header Header;
850  };
851  using SectionList = SmallVector<const char*, 1>;
852  SectionList Sections;
853  using LibraryList = SmallVector<const char*, 1>;
854  LibraryList Libraries;
855  LoadCommandList LoadCommands;
856  using LibraryShortName = SmallVector<StringRef, 1>;
857  using BuildToolList = SmallVector<const char*, 1>;
858  BuildToolList BuildTools;
859  mutable LibraryShortName LibrariesShortNames;
860  std::unique_ptr<BindRebaseSegInfo> BindRebaseSectionTable;
861  const char *SymtabLoadCmd = nullptr;
862  const char *DysymtabLoadCmd = nullptr;
863  const char *DataInCodeLoadCmd = nullptr;
864  const char *LinkOptHintsLoadCmd = nullptr;
865  const char *DyldInfoLoadCmd = nullptr;
866  const char *FuncStartsLoadCmd = nullptr;
867  const char *DyldChainedFixupsLoadCmd = nullptr;
868  const char *DyldExportsTrieLoadCmd = nullptr;
869  const char *UuidLoadCmd = nullptr;
870  bool HasPageZeroSegment = false;
871  size_t MachOFilesetEntryOffset = 0;
872};
873
874/// DiceRef
875inline DiceRef::DiceRef(DataRefImpl DiceP, const ObjectFile *Owner)
876  : DicePimpl(DiceP) , OwningObject(Owner) {}
877
878inline bool DiceRef::operator==(const DiceRef &Other) const {
879  return DicePimpl == Other.DicePimpl;
880}
881
882inline bool DiceRef::operator<(const DiceRef &Other) const {
883  return DicePimpl < Other.DicePimpl;
884}
885
886inline void DiceRef::moveNext() {
887  const MachO::data_in_code_entry *P =
888    reinterpret_cast<const MachO::data_in_code_entry *>(DicePimpl.p);
889  DicePimpl.p = reinterpret_cast<uintptr_t>(P + 1);
890}
891
892// Since a Mach-O data in code reference, a DiceRef, can only be created when
893// the OwningObject ObjectFile is a MachOObjectFile a static_cast<> is used for
894// the methods that get the values of the fields of the reference.
895
896inline std::error_code DiceRef::getOffset(uint32_t &Result) const {
897  const MachOObjectFile *MachOOF =
898    static_cast<const MachOObjectFile *>(OwningObject);
899  MachO::data_in_code_entry Dice = MachOOF->getDice(DicePimpl);
900  Result = Dice.offset;
901  return std::error_code();
902}
903
904inline std::error_code DiceRef::getLength(uint16_t &Result) const {
905  const MachOObjectFile *MachOOF =
906    static_cast<const MachOObjectFile *>(OwningObject);
907  MachO::data_in_code_entry Dice = MachOOF->getDice(DicePimpl);
908  Result = Dice.length;
909  return std::error_code();
910}
911
912inline std::error_code DiceRef::getKind(uint16_t &Result) const {
913  const MachOObjectFile *MachOOF =
914    static_cast<const MachOObjectFile *>(OwningObject);
915  MachO::data_in_code_entry Dice = MachOOF->getDice(DicePimpl);
916  Result = Dice.kind;
917  return std::error_code();
918}
919
920inline DataRefImpl DiceRef::getRawDataRefImpl() const {
921  return DicePimpl;
922}
923
924inline const ObjectFile *DiceRef::getObjectFile() const {
925  return OwningObject;
926}
927
928} // end namespace object
929} // end namespace llvm
930
931#endif // LLVM_OBJECT_MACHO_H
932