1235537Sgber//===- MachO.h - MachO object file implementation ---------------*- C++ -*-===// 2235537Sgber// 3235537Sgber// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4235537Sgber// See https://llvm.org/LICENSE.txt for license information. 5235537Sgber// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6235537Sgber// 7235537Sgber//===----------------------------------------------------------------------===// 8235537Sgber// 9235537Sgber// This file declares the MachOObjectFile class, which implement the ObjectFile 10235537Sgber// interface for MachO files. 11235537Sgber// 12235537Sgber//===----------------------------------------------------------------------===// 13235537Sgber 14235537Sgber#ifndef LLVM_OBJECT_MACHO_H 15235537Sgber#define LLVM_OBJECT_MACHO_H 16235537Sgber 17235537Sgber#include "llvm/ADT/ArrayRef.h" 18235537Sgber#include "llvm/ADT/SmallString.h" 19235537Sgber#include "llvm/ADT/SmallVector.h" 20235537Sgber#include "llvm/ADT/StringExtras.h" 21235537Sgber#include "llvm/ADT/StringRef.h" 22235537Sgber#include "llvm/ADT/iterator_range.h" 23235537Sgber#include "llvm/BinaryFormat/MachO.h" 24235537Sgber#include "llvm/BinaryFormat/Swift.h" 25235537Sgber#include "llvm/Object/Binary.h" 26235537Sgber#include "llvm/Object/ObjectFile.h" 27235537Sgber#include "llvm/Object/SymbolicFile.h" 28235537Sgber#include "llvm/Support/Error.h" 29235537Sgber#include "llvm/Support/Format.h" 30235537Sgber#include "llvm/Support/MemoryBuffer.h" 31235537Sgber#include "llvm/Support/raw_ostream.h" 32235537Sgber#include "llvm/TargetParser/SubtargetFeature.h" 33235537Sgber#include "llvm/TargetParser/Triple.h" 34235537Sgber#include <cstdint> 35235537Sgber#include <memory> 36235537Sgber#include <string> 37235537Sgber#include <system_error> 38235537Sgber 39235537Sgbernamespace llvm { 40235537Sgbernamespace object { 41235537Sgber 42235537Sgber/// DiceRef - This is a value type class that represents a single 43235537Sgber/// data in code entry in the table in a Mach-O object file. 44235537Sgberclass DiceRef { 45235537Sgber DataRefImpl DicePimpl; 46235537Sgber const ObjectFile *OwningObject = nullptr; 47235537Sgber 48235537Sgberpublic: 49235537Sgber DiceRef() = default; 50235537Sgber DiceRef(DataRefImpl DiceP, const ObjectFile *Owner); 51235537Sgber 52235537Sgber bool operator==(const DiceRef &Other) const; 53235537Sgber bool operator<(const DiceRef &Other) const; 54235537Sgber 55235537Sgber void moveNext(); 56235537Sgber 57235537Sgber std::error_code getOffset(uint32_t &Result) const; 58235537Sgber std::error_code getLength(uint16_t &Result) const; 59235537Sgber std::error_code getKind(uint16_t &Result) const; 60235537Sgber 61235537Sgber DataRefImpl getRawDataRefImpl() const; 62235537Sgber const ObjectFile *getObjectFile() const; 63235537Sgber}; 64235537Sgberusing dice_iterator = content_iterator<DiceRef>; 65235537Sgber 66235537Sgber/// ExportEntry encapsulates the current-state-of-the-walk used when doing a 67235537Sgber/// non-recursive walk of the trie data structure. This allows you to iterate 68235537Sgber/// across all exported symbols using: 69235537Sgber/// Error Err = Error::success(); 70235537Sgber/// for (const llvm::object::ExportEntry &AnExport : Obj->exports(&Err)) { 71235537Sgber/// } 72235537Sgber/// if (Err) { report error ... 73235537Sgberclass ExportEntry { 74235537Sgberpublic: 75235537Sgber ExportEntry(Error *Err, const MachOObjectFile *O, ArrayRef<uint8_t> Trie); 76235537Sgber 77235537Sgber StringRef name() const; 78235537Sgber uint64_t flags() const; 79235537Sgber uint64_t address() const; 80235537Sgber uint64_t other() const; 81235537Sgber StringRef otherName() const; 82235537Sgber uint32_t nodeOffset() const; 83235537Sgber 84235537Sgber bool operator==(const ExportEntry &) const; 85235537Sgber 86235537Sgber void moveNext(); 87235537Sgber 88235537Sgberprivate: 89235537Sgber friend class MachOObjectFile; 90235537Sgber 91235537Sgber void moveToFirst(); 92235537Sgber void moveToEnd(); 93235537Sgber uint64_t readULEB128(const uint8_t *&p, const char **error); 94235537Sgber void pushDownUntilBottom(); 95235537Sgber void pushNode(uint64_t Offset); 96235537Sgber 97235537Sgber // Represents a node in the mach-o exports trie. 98235537Sgber struct NodeState { 99235537Sgber NodeState(const uint8_t *Ptr); 100235537Sgber 101235537Sgber const uint8_t *Start; 102235537Sgber const uint8_t *Current; 103235537Sgber uint64_t Flags = 0; 104235537Sgber uint64_t Address = 0; 105235537Sgber uint64_t Other = 0; 106235537Sgber const char *ImportName = nullptr; 107235537Sgber unsigned ChildCount = 0; 108235537Sgber unsigned NextChildIndex = 0; 109235537Sgber unsigned ParentStringLength = 0; 110235537Sgber bool IsExportNode = false; 111235537Sgber }; 112235537Sgber using NodeList = SmallVector<NodeState, 16>; 113235537Sgber using node_iterator = NodeList::const_iterator; 114235537Sgber 115235537Sgber Error *E; 116235537Sgber const MachOObjectFile *O; 117235537Sgber ArrayRef<uint8_t> Trie; 118235537Sgber SmallString<256> CumulativeString; 119235537Sgber NodeList Stack; 120235537Sgber bool Done = false; 121235537Sgber 122235537Sgber iterator_range<node_iterator> nodes() const { 123235537Sgber return make_range(Stack.begin(), Stack.end()); 124235537Sgber } 125235537Sgber}; 126235537Sgberusing export_iterator = content_iterator<ExportEntry>; 127235537Sgber 128235537Sgber// Segment info so SegIndex/SegOffset pairs in a Mach-O Bind or Rebase entry 129235537Sgber// can be checked and translated. Only the SegIndex/SegOffset pairs from 130235537Sgber// checked entries are to be used with the segmentName(), sectionName() and 131235537Sgber// address() methods below. 132235537Sgberclass BindRebaseSegInfo { 133235537Sgberpublic: 134235537Sgber BindRebaseSegInfo(const MachOObjectFile *Obj); 135235537Sgber 136235537Sgber // Used to check a Mach-O Bind or Rebase entry for errors when iterating. 137235537Sgber const char* checkSegAndOffsets(int32_t SegIndex, uint64_t SegOffset, 138235537Sgber uint8_t PointerSize, uint32_t Count=1, 139235537Sgber uint32_t Skip=0); 140235537Sgber // Used with valid SegIndex/SegOffset values from checked entries. 141235537Sgber StringRef segmentName(int32_t SegIndex); 142235537Sgber StringRef sectionName(int32_t SegIndex, uint64_t SegOffset); 143235537Sgber uint64_t address(uint32_t SegIndex, uint64_t SegOffset); 144235537Sgber 145235537Sgberprivate: 146235537Sgber struct SectionInfo { 147235537Sgber uint64_t Address; 148235537Sgber uint64_t Size; 149235537Sgber StringRef SectionName; 150235537Sgber StringRef SegmentName; 151235537Sgber uint64_t OffsetInSegment; 152235537Sgber uint64_t SegmentStartAddress; 153235537Sgber int32_t SegmentIndex; 154235537Sgber }; 155235537Sgber const SectionInfo &findSection(int32_t SegIndex, uint64_t SegOffset); 156235537Sgber 157235537Sgber SmallVector<SectionInfo, 32> Sections; 158235537Sgber int32_t MaxSegIndex; 159235537Sgber}; 160235537Sgber 161235537Sgber/// MachORebaseEntry encapsulates the current state in the decompression of 162235537Sgber/// rebasing opcodes. This allows you to iterate through the compressed table of 163235537Sgber/// rebasing using: 164235537Sgber/// Error Err = Error::success(); 165235537Sgber/// for (const llvm::object::MachORebaseEntry &Entry : Obj->rebaseTable(&Err)) { 166235537Sgber/// } 167235537Sgber/// if (Err) { report error ... 168235537Sgberclass MachORebaseEntry { 169235537Sgberpublic: 170235537Sgber MachORebaseEntry(Error *Err, const MachOObjectFile *O, 171235537Sgber ArrayRef<uint8_t> opcodes, bool is64Bit); 172235537Sgber 173235537Sgber int32_t segmentIndex() const; 174235537Sgber uint64_t segmentOffset() const; 175235537Sgber StringRef typeName() const; 176235537Sgber StringRef segmentName() const; 177235537Sgber StringRef sectionName() const; 178235537Sgber uint64_t address() const; 179235537Sgber 180235537Sgber bool operator==(const MachORebaseEntry &) const; 181235537Sgber 182235537Sgber void moveNext(); 183235537Sgber 184235537Sgberprivate: 185235537Sgber friend class MachOObjectFile; 186235537Sgber 187235537Sgber void moveToFirst(); 188235537Sgber void moveToEnd(); 189235537Sgber uint64_t readULEB128(const char **error); 190235537Sgber 191235537Sgber Error *E; 192235537Sgber const MachOObjectFile *O; 193235537Sgber ArrayRef<uint8_t> Opcodes; 194235537Sgber const uint8_t *Ptr; 195235537Sgber uint64_t SegmentOffset = 0; 196235537Sgber int32_t SegmentIndex = -1; 197235537Sgber uint64_t RemainingLoopCount = 0; 198235537Sgber uint64_t AdvanceAmount = 0; 199235537Sgber uint8_t RebaseType = 0; 200235537Sgber uint8_t PointerSize; 201235537Sgber bool Done = false; 202235537Sgber}; 203235537Sgberusing rebase_iterator = content_iterator<MachORebaseEntry>; 204235537Sgber 205235537Sgber/// MachOBindEntry encapsulates the current state in the decompression of 206235537Sgber/// binding opcodes. This allows you to iterate through the compressed table of 207235537Sgber/// bindings using: 208235537Sgber/// Error Err = Error::success(); 209235537Sgber/// for (const llvm::object::MachOBindEntry &Entry : Obj->bindTable(&Err)) { 210235537Sgber/// } 211235537Sgber/// if (Err) { report error ... 212235537Sgberclass MachOBindEntry { 213235537Sgberpublic: 214235537Sgber enum class Kind { Regular, Lazy, Weak }; 215235537Sgber 216235537Sgber MachOBindEntry(Error *Err, const MachOObjectFile *O, 217235537Sgber ArrayRef<uint8_t> Opcodes, bool is64Bit, MachOBindEntry::Kind); 218235537Sgber 219235537Sgber int32_t segmentIndex() const; 220235537Sgber uint64_t segmentOffset() const; 221235537Sgber StringRef typeName() const; 222235537Sgber StringRef symbolName() const; 223235537Sgber uint32_t flags() const; 224235537Sgber int64_t addend() const; 225235537Sgber int ordinal() const; 226235537Sgber 227235537Sgber StringRef segmentName() const; 228235537Sgber StringRef sectionName() const; 229235537Sgber uint64_t address() const; 230235537Sgber 231235537Sgber bool operator==(const MachOBindEntry &) const; 232235537Sgber 233235537Sgber void moveNext(); 234235537Sgber 235235537Sgberprivate: 236235537Sgber friend class MachOObjectFile; 237235537Sgber 238235537Sgber void moveToFirst(); 239235537Sgber void moveToEnd(); 240235537Sgber uint64_t readULEB128(const char **error); 241235537Sgber int64_t readSLEB128(const char **error); 242235537Sgber 243235537Sgber Error *E; 244235537Sgber const MachOObjectFile *O; 245235537Sgber ArrayRef<uint8_t> Opcodes; 246235537Sgber const uint8_t *Ptr; 247235537Sgber uint64_t SegmentOffset = 0; 248235537Sgber int32_t SegmentIndex = -1; 249235537Sgber StringRef SymbolName; 250235537Sgber bool LibraryOrdinalSet = false; 251235537Sgber int Ordinal = 0; 252235537Sgber uint32_t Flags = 0; 253235537Sgber int64_t Addend = 0; 254235537Sgber uint64_t RemainingLoopCount = 0; 255235537Sgber uint64_t AdvanceAmount = 0; 256235537Sgber uint8_t BindType = 0; 257235537Sgber uint8_t PointerSize; 258235537Sgber Kind TableKind; 259235537Sgber bool Done = false; 260235537Sgber}; 261235537Sgberusing bind_iterator = content_iterator<MachOBindEntry>; 262235537Sgber 263235537Sgber/// ChainedFixupTarget holds all the information about an external symbol 264235537Sgber/// necessary to bind this binary to that symbol. These values are referenced 265235537Sgber/// indirectly by chained fixup binds. This structure captures values from all 266235537Sgber/// import and symbol formats. 267235537Sgber/// 268235537Sgber/// Be aware there are two notions of weak here: 269235537Sgber/// WeakImport == true 270235537Sgber/// The associated bind may be set to 0 if this symbol is missing from its 271235537Sgber/// parent library. This is called a "weak import." 272235537Sgber/// LibOrdinal == BIND_SPECIAL_DYLIB_WEAK_LOOKUP 273235537Sgber/// This symbol may be coalesced with other libraries vending the same 274235537Sgber/// symbol. E.g., C++'s "operator new". This is called a "weak bind." 275235537Sgberstruct ChainedFixupTarget { 276235537Sgberpublic: 277235537Sgber ChainedFixupTarget(int LibOrdinal, uint32_t NameOffset, StringRef Symbol, 278235537Sgber uint64_t Addend, bool WeakImport) 279235537Sgber : LibOrdinal(LibOrdinal), NameOffset(NameOffset), SymbolName(Symbol), 280235537Sgber Addend(Addend), WeakImport(WeakImport) {} 281235537Sgber 282235537Sgber int libOrdinal() { return LibOrdinal; } 283235537Sgber uint32_t nameOffset() { return NameOffset; } 284235537Sgber StringRef symbolName() { return SymbolName; } 285235537Sgber uint64_t addend() { return Addend; } 286235537Sgber bool weakImport() { return WeakImport; } 287235537Sgber bool weakBind() { 288235537Sgber return LibOrdinal == MachO::BIND_SPECIAL_DYLIB_WEAK_LOOKUP; 289235537Sgber } 290235537Sgber 291235537Sgberprivate: 292235537Sgber int LibOrdinal; 293235537Sgber uint32_t NameOffset; 294235537Sgber StringRef SymbolName; 295235537Sgber uint64_t Addend; 296235537Sgber bool WeakImport; 297235537Sgber}; 298235537Sgber 299235537Sgberstruct ChainedFixupsSegment { 300235537Sgber ChainedFixupsSegment(uint8_t SegIdx, uint32_t Offset, 301235537Sgber const MachO::dyld_chained_starts_in_segment &Header, 302235537Sgber std::vector<uint16_t> &&PageStarts) 303235537Sgber : SegIdx(SegIdx), Offset(Offset), Header(Header), 304235537Sgber PageStarts(PageStarts){}; 305235537Sgber 306235537Sgber uint32_t SegIdx; 307235537Sgber uint32_t Offset; // dyld_chained_starts_in_image::seg_info_offset[SegIdx] 308235537Sgber MachO::dyld_chained_starts_in_segment Header; 309235537Sgber std::vector<uint16_t> PageStarts; // page_start[] entries, host endianness 310235537Sgber}; 311 312/// MachOAbstractFixupEntry is an abstract class representing a fixup in a 313/// MH_DYLDLINK file. Fixups generally represent rebases and binds. Binds also 314/// subdivide into additional subtypes (weak, lazy, reexport). 315/// 316/// The two concrete subclasses of MachOAbstractFixupEntry are: 317/// 318/// MachORebaseBindEntry - for dyld opcode-based tables, including threaded- 319/// rebase, where rebases are mixed in with other 320/// bind opcodes. 321/// MachOChainedFixupEntry - for pointer chains embedded in data pages. 322class MachOAbstractFixupEntry { 323public: 324 MachOAbstractFixupEntry(Error *Err, const MachOObjectFile *O); 325 326 int32_t segmentIndex() const; 327 uint64_t segmentOffset() const; 328 uint64_t segmentAddress() const; 329 StringRef segmentName() const; 330 StringRef sectionName() const; 331 StringRef typeName() const; 332 StringRef symbolName() const; 333 uint32_t flags() const; 334 int64_t addend() const; 335 int ordinal() const; 336 337 /// \return the location of this fixup as a VM Address. For the VM 338 /// Address this fixup is pointing to, use pointerValue(). 339 uint64_t address() const; 340 341 /// \return the VM Address pointed to by this fixup. Use 342 /// pointerValue() to compare against other VM Addresses, such as 343 /// section addresses or segment vmaddrs. 344 uint64_t pointerValue() const { return PointerValue; } 345 346 /// \return the raw "on-disk" representation of the fixup. For 347 /// Threaded rebases and Chained pointers these values are generally 348 /// encoded into various different pointer formats. This value is 349 /// exposed in API for tools that want to display and annotate the 350 /// raw bits. 351 uint64_t rawValue() const { return RawValue; } 352 353 void moveNext(); 354 355protected: 356 Error *E; 357 const MachOObjectFile *O; 358 uint64_t SegmentOffset = 0; 359 int32_t SegmentIndex = -1; 360 StringRef SymbolName; 361 int32_t Ordinal = 0; 362 uint32_t Flags = 0; 363 int64_t Addend = 0; 364 uint64_t PointerValue = 0; 365 uint64_t RawValue = 0; 366 bool Done = false; 367 368 void moveToFirst(); 369 void moveToEnd(); 370 371 /// \return the vm address of the start of __TEXT segment. 372 uint64_t textAddress() const { return TextAddress; } 373 374private: 375 uint64_t TextAddress; 376}; 377 378class MachOChainedFixupEntry : public MachOAbstractFixupEntry { 379public: 380 enum class FixupKind { Bind, Rebase }; 381 382 MachOChainedFixupEntry(Error *Err, const MachOObjectFile *O, bool Parse); 383 384 bool operator==(const MachOChainedFixupEntry &) const; 385 386 bool isBind() const { return Kind == FixupKind::Bind; } 387 bool isRebase() const { return Kind == FixupKind::Rebase; } 388 389 void moveNext(); 390 void moveToFirst(); 391 void moveToEnd(); 392 393private: 394 void findNextPageWithFixups(); 395 396 std::vector<ChainedFixupTarget> FixupTargets; 397 std::vector<ChainedFixupsSegment> Segments; 398 ArrayRef<uint8_t> SegmentData; 399 FixupKind Kind; 400 uint32_t InfoSegIndex = 0; // Index into Segments 401 uint32_t PageIndex = 0; // Index into Segments[InfoSegIdx].PageStarts 402 uint32_t PageOffset = 0; // Page offset of the current fixup 403}; 404using fixup_iterator = content_iterator<MachOChainedFixupEntry>; 405 406class MachOObjectFile : public ObjectFile { 407public: 408 struct LoadCommandInfo { 409 const char *Ptr; // Where in memory the load command is. 410 MachO::load_command C; // The command itself. 411 }; 412 using LoadCommandList = SmallVector<LoadCommandInfo, 4>; 413 using load_command_iterator = LoadCommandList::const_iterator; 414 415 static Expected<std::unique_ptr<MachOObjectFile>> 416 create(MemoryBufferRef Object, bool IsLittleEndian, bool Is64Bits, 417 uint32_t UniversalCputype = 0, uint32_t UniversalIndex = 0, 418 size_t MachOFilesetEntryOffset = 0); 419 420 static bool isMachOPairedReloc(uint64_t RelocType, uint64_t Arch); 421 422 void moveSymbolNext(DataRefImpl &Symb) const override; 423 424 uint64_t getNValue(DataRefImpl Sym) const; 425 Expected<StringRef> getSymbolName(DataRefImpl Symb) const override; 426 427 // MachO specific. 428 Error checkSymbolTable() const; 429 430 std::error_code getIndirectName(DataRefImpl Symb, StringRef &Res) const; 431 unsigned getSectionType(SectionRef Sec) const; 432 433 Expected<uint64_t> getSymbolAddress(DataRefImpl Symb) const override; 434 uint32_t getSymbolAlignment(DataRefImpl Symb) const override; 435 uint64_t getCommonSymbolSizeImpl(DataRefImpl Symb) const override; 436 Expected<SymbolRef::Type> getSymbolType(DataRefImpl Symb) const override; 437 Expected<uint32_t> getSymbolFlags(DataRefImpl Symb) const override; 438 Expected<section_iterator> getSymbolSection(DataRefImpl Symb) const override; 439 unsigned getSymbolSectionID(SymbolRef Symb) const; 440 unsigned getSectionID(SectionRef Sec) const; 441 442 void moveSectionNext(DataRefImpl &Sec) const override; 443 Expected<StringRef> getSectionName(DataRefImpl Sec) const override; 444 uint64_t getSectionAddress(DataRefImpl Sec) const override; 445 uint64_t getSectionIndex(DataRefImpl Sec) const override; 446 uint64_t getSectionSize(DataRefImpl Sec) const override; 447 ArrayRef<uint8_t> getSectionContents(uint32_t Offset, uint64_t Size) const; 448 Expected<ArrayRef<uint8_t>> 449 getSectionContents(DataRefImpl Sec) const override; 450 uint64_t getSectionAlignment(DataRefImpl Sec) const override; 451 Expected<SectionRef> getSection(unsigned SectionIndex) const; 452 Expected<SectionRef> getSection(StringRef SectionName) const; 453 bool isSectionCompressed(DataRefImpl Sec) const override; 454 bool isSectionText(DataRefImpl Sec) const override; 455 bool isSectionData(DataRefImpl Sec) const override; 456 bool isSectionBSS(DataRefImpl Sec) const override; 457 bool isSectionVirtual(DataRefImpl Sec) const override; 458 bool isSectionBitcode(DataRefImpl Sec) const override; 459 bool isDebugSection(DataRefImpl Sec) const override; 460 461 /// Return the raw contents of an entire segment. 462 ArrayRef<uint8_t> getSegmentContents(StringRef SegmentName) const; 463 ArrayRef<uint8_t> getSegmentContents(size_t SegmentIndex) const; 464 465 /// When dsymutil generates the companion file, it strips all unnecessary 466 /// sections (e.g. everything in the _TEXT segment) by omitting their body 467 /// and setting the offset in their corresponding load command to zero. 468 /// 469 /// While the load command itself is valid, reading the section corresponds 470 /// to reading the number of bytes specified in the load command, starting 471 /// from offset 0 (i.e. the Mach-O header at the beginning of the file). 472 bool isSectionStripped(DataRefImpl Sec) const override; 473 474 relocation_iterator section_rel_begin(DataRefImpl Sec) const override; 475 relocation_iterator section_rel_end(DataRefImpl Sec) const override; 476 477 relocation_iterator extrel_begin() const; 478 relocation_iterator extrel_end() const; 479 iterator_range<relocation_iterator> external_relocations() const { 480 return make_range(extrel_begin(), extrel_end()); 481 } 482 483 relocation_iterator locrel_begin() const; 484 relocation_iterator locrel_end() const; 485 486 void moveRelocationNext(DataRefImpl &Rel) const override; 487 uint64_t getRelocationOffset(DataRefImpl Rel) const override; 488 symbol_iterator getRelocationSymbol(DataRefImpl Rel) const override; 489 section_iterator getRelocationSection(DataRefImpl Rel) const; 490 uint64_t getRelocationType(DataRefImpl Rel) const override; 491 void getRelocationTypeName(DataRefImpl Rel, 492 SmallVectorImpl<char> &Result) const override; 493 uint8_t getRelocationLength(DataRefImpl Rel) const; 494 495 // MachO specific. 496 std::error_code getLibraryShortNameByIndex(unsigned Index, StringRef &) const; 497 uint32_t getLibraryCount() const; 498 499 section_iterator getRelocationRelocatedSection(relocation_iterator Rel) const; 500 501 // TODO: Would be useful to have an iterator based version 502 // of the load command interface too. 503 504 basic_symbol_iterator symbol_begin() const override; 505 basic_symbol_iterator symbol_end() const override; 506 507 bool is64Bit() const override; 508 509 // MachO specific. 510 symbol_iterator getSymbolByIndex(unsigned Index) const; 511 uint64_t getSymbolIndex(DataRefImpl Symb) const; 512 513 section_iterator section_begin() const override; 514 section_iterator section_end() const override; 515 516 uint8_t getBytesInAddress() const override; 517 518 StringRef getFileFormatName() const override; 519 Triple::ArchType getArch() const override; 520 Expected<SubtargetFeatures> getFeatures() const override { 521 return SubtargetFeatures(); 522 } 523 Triple getArchTriple(const char **McpuDefault = nullptr) const; 524 525 relocation_iterator section_rel_begin(unsigned Index) const; 526 relocation_iterator section_rel_end(unsigned Index) const; 527 528 dice_iterator begin_dices() const; 529 dice_iterator end_dices() const; 530 531 load_command_iterator begin_load_commands() const; 532 load_command_iterator end_load_commands() const; 533 iterator_range<load_command_iterator> load_commands() const; 534 535 /// For use iterating over all exported symbols. 536 iterator_range<export_iterator> exports(Error &Err) const; 537 538 /// For use examining a trie not in a MachOObjectFile. 539 static iterator_range<export_iterator> exports(Error &Err, 540 ArrayRef<uint8_t> Trie, 541 const MachOObjectFile *O = 542 nullptr); 543 544 /// For use iterating over all rebase table entries. 545 iterator_range<rebase_iterator> rebaseTable(Error &Err); 546 547 /// For use examining rebase opcodes in a MachOObjectFile. 548 static iterator_range<rebase_iterator> rebaseTable(Error &Err, 549 MachOObjectFile *O, 550 ArrayRef<uint8_t> Opcodes, 551 bool is64); 552 553 /// For use iterating over all bind table entries. 554 iterator_range<bind_iterator> bindTable(Error &Err); 555 556 /// For iterating over all chained fixups. 557 iterator_range<fixup_iterator> fixupTable(Error &Err); 558 559 /// For use iterating over all lazy bind table entries. 560 iterator_range<bind_iterator> lazyBindTable(Error &Err); 561 562 /// For use iterating over all weak bind table entries. 563 iterator_range<bind_iterator> weakBindTable(Error &Err); 564 565 /// For use examining bind opcodes in a MachOObjectFile. 566 static iterator_range<bind_iterator> bindTable(Error &Err, 567 MachOObjectFile *O, 568 ArrayRef<uint8_t> Opcodes, 569 bool is64, 570 MachOBindEntry::Kind); 571 572 // Given a SegIndex, SegOffset, and PointerSize, verify a valid section exists 573 // that fully contains a pointer at that location. Multiple fixups in a bind 574 // (such as with the BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB opcode) can 575 // be tested via the Count and Skip parameters. 576 // 577 // This is used by MachOBindEntry::moveNext() to validate a MachOBindEntry. 578 const char *BindEntryCheckSegAndOffsets(int32_t SegIndex, uint64_t SegOffset, 579 uint8_t PointerSize, uint32_t Count=1, 580 uint32_t Skip=0) const { 581 return BindRebaseSectionTable->checkSegAndOffsets(SegIndex, SegOffset, 582 PointerSize, Count, Skip); 583 } 584 585 // Given a SegIndex, SegOffset, and PointerSize, verify a valid section exists 586 // that fully contains a pointer at that location. Multiple fixups in a rebase 587 // (such as with the REBASE_OPCODE_DO_*_TIMES* opcodes) can be tested via the 588 // Count and Skip parameters. 589 // 590 // This is used by MachORebaseEntry::moveNext() to validate a MachORebaseEntry 591 const char *RebaseEntryCheckSegAndOffsets(int32_t SegIndex, 592 uint64_t SegOffset, 593 uint8_t PointerSize, 594 uint32_t Count=1, 595 uint32_t Skip=0) const { 596 return BindRebaseSectionTable->checkSegAndOffsets(SegIndex, SegOffset, 597 PointerSize, Count, Skip); 598 } 599 600 /// For use with the SegIndex of a checked Mach-O Bind or Rebase entry to 601 /// get the segment name. 602 StringRef BindRebaseSegmentName(int32_t SegIndex) const { 603 return BindRebaseSectionTable->segmentName(SegIndex); 604 } 605 606 /// For use with a SegIndex,SegOffset pair from a checked Mach-O Bind or 607 /// Rebase entry to get the section name. 608 StringRef BindRebaseSectionName(uint32_t SegIndex, uint64_t SegOffset) const { 609 return BindRebaseSectionTable->sectionName(SegIndex, SegOffset); 610 } 611 612 /// For use with a SegIndex,SegOffset pair from a checked Mach-O Bind or 613 /// Rebase entry to get the address. 614 uint64_t BindRebaseAddress(uint32_t SegIndex, uint64_t SegOffset) const { 615 return BindRebaseSectionTable->address(SegIndex, SegOffset); 616 } 617 618 // In a MachO file, sections have a segment name. This is used in the .o 619 // files. They have a single segment, but this field specifies which segment 620 // a section should be put in the final object. 621 StringRef getSectionFinalSegmentName(DataRefImpl Sec) const; 622 623 // Names are stored as 16 bytes. These returns the raw 16 bytes without 624 // interpreting them as a C string. 625 ArrayRef<char> getSectionRawName(DataRefImpl Sec) const; 626 ArrayRef<char> getSectionRawFinalSegmentName(DataRefImpl Sec) const; 627 628 // MachO specific Info about relocations. 629 bool isRelocationScattered(const MachO::any_relocation_info &RE) const; 630 unsigned getPlainRelocationSymbolNum( 631 const MachO::any_relocation_info &RE) const; 632 bool getPlainRelocationExternal(const MachO::any_relocation_info &RE) const; 633 bool getScatteredRelocationScattered( 634 const MachO::any_relocation_info &RE) const; 635 uint32_t getScatteredRelocationValue( 636 const MachO::any_relocation_info &RE) const; 637 uint32_t getScatteredRelocationType( 638 const MachO::any_relocation_info &RE) const; 639 unsigned getAnyRelocationAddress(const MachO::any_relocation_info &RE) const; 640 unsigned getAnyRelocationPCRel(const MachO::any_relocation_info &RE) const; 641 unsigned getAnyRelocationLength(const MachO::any_relocation_info &RE) const; 642 unsigned getAnyRelocationType(const MachO::any_relocation_info &RE) const; 643 SectionRef getAnyRelocationSection(const MachO::any_relocation_info &RE) const; 644 645 // MachO specific structures. 646 MachO::section getSection(DataRefImpl DRI) const; 647 MachO::section_64 getSection64(DataRefImpl DRI) const; 648 MachO::section getSection(const LoadCommandInfo &L, unsigned Index) const; 649 MachO::section_64 getSection64(const LoadCommandInfo &L,unsigned Index) const; 650 MachO::nlist getSymbolTableEntry(DataRefImpl DRI) const; 651 MachO::nlist_64 getSymbol64TableEntry(DataRefImpl DRI) const; 652 653 MachO::linkedit_data_command 654 getLinkeditDataLoadCommand(const LoadCommandInfo &L) const; 655 MachO::segment_command 656 getSegmentLoadCommand(const LoadCommandInfo &L) const; 657 MachO::segment_command_64 658 getSegment64LoadCommand(const LoadCommandInfo &L) const; 659 MachO::linker_option_command 660 getLinkerOptionLoadCommand(const LoadCommandInfo &L) const; 661 MachO::version_min_command 662 getVersionMinLoadCommand(const LoadCommandInfo &L) const; 663 MachO::note_command 664 getNoteLoadCommand(const LoadCommandInfo &L) const; 665 MachO::build_version_command 666 getBuildVersionLoadCommand(const LoadCommandInfo &L) const; 667 MachO::build_tool_version 668 getBuildToolVersion(unsigned index) const; 669 MachO::dylib_command 670 getDylibIDLoadCommand(const LoadCommandInfo &L) const; 671 MachO::dyld_info_command 672 getDyldInfoLoadCommand(const LoadCommandInfo &L) const; 673 MachO::dylinker_command 674 getDylinkerCommand(const LoadCommandInfo &L) const; 675 MachO::uuid_command 676 getUuidCommand(const LoadCommandInfo &L) const; 677 MachO::rpath_command 678 getRpathCommand(const LoadCommandInfo &L) const; 679 MachO::source_version_command 680 getSourceVersionCommand(const LoadCommandInfo &L) const; 681 MachO::entry_point_command 682 getEntryPointCommand(const LoadCommandInfo &L) const; 683 MachO::encryption_info_command 684 getEncryptionInfoCommand(const LoadCommandInfo &L) const; 685 MachO::encryption_info_command_64 686 getEncryptionInfoCommand64(const LoadCommandInfo &L) const; 687 MachO::sub_framework_command 688 getSubFrameworkCommand(const LoadCommandInfo &L) const; 689 MachO::sub_umbrella_command 690 getSubUmbrellaCommand(const LoadCommandInfo &L) const; 691 MachO::sub_library_command 692 getSubLibraryCommand(const LoadCommandInfo &L) const; 693 MachO::sub_client_command 694 getSubClientCommand(const LoadCommandInfo &L) const; 695 MachO::routines_command 696 getRoutinesCommand(const LoadCommandInfo &L) const; 697 MachO::routines_command_64 698 getRoutinesCommand64(const LoadCommandInfo &L) const; 699 MachO::thread_command 700 getThreadCommand(const LoadCommandInfo &L) const; 701 MachO::fileset_entry_command 702 getFilesetEntryLoadCommand(const LoadCommandInfo &L) const; 703 704 MachO::any_relocation_info getRelocation(DataRefImpl Rel) const; 705 MachO::data_in_code_entry getDice(DataRefImpl Rel) const; 706 const MachO::mach_header &getHeader() const; 707 const MachO::mach_header_64 &getHeader64() const; 708 uint32_t 709 getIndirectSymbolTableEntry(const MachO::dysymtab_command &DLC, 710 unsigned Index) const; 711 MachO::data_in_code_entry getDataInCodeTableEntry(uint32_t DataOffset, 712 unsigned Index) const; 713 MachO::symtab_command getSymtabLoadCommand() const; 714 MachO::dysymtab_command getDysymtabLoadCommand() const; 715 MachO::linkedit_data_command getDataInCodeLoadCommand() const; 716 MachO::linkedit_data_command getLinkOptHintsLoadCommand() const; 717 ArrayRef<uint8_t> getDyldInfoRebaseOpcodes() const; 718 ArrayRef<uint8_t> getDyldInfoBindOpcodes() const; 719 ArrayRef<uint8_t> getDyldInfoWeakBindOpcodes() const; 720 ArrayRef<uint8_t> getDyldInfoLazyBindOpcodes() const; 721 ArrayRef<uint8_t> getDyldInfoExportsTrie() const; 722 723 /// If the optional is std::nullopt, no header was found, but the object was 724 /// well-formed. 725 Expected<std::optional<MachO::dyld_chained_fixups_header>> 726 getChainedFixupsHeader() const; 727 Expected<std::vector<ChainedFixupTarget>> getDyldChainedFixupTargets() const; 728 729 // Note: This is a limited, temporary API, which will be removed when Apple 730 // upstreams their implementation. Please do not rely on this. 731 Expected<std::optional<MachO::linkedit_data_command>> 732 getChainedFixupsLoadCommand() const; 733 // Returns the number of sections listed in dyld_chained_starts_in_image, and 734 // a ChainedFixupsSegment for each segment that has fixups. 735 Expected<std::pair<size_t, std::vector<ChainedFixupsSegment>>> 736 getChainedFixupsSegments() const; 737 ArrayRef<uint8_t> getDyldExportsTrie() const; 738 739 SmallVector<uint64_t> getFunctionStarts() const; 740 ArrayRef<uint8_t> getUuid() const; 741 742 StringRef getStringTableData() const; 743 744 void ReadULEB128s(uint64_t Index, SmallVectorImpl<uint64_t> &Out) const; 745 746 static StringRef guessLibraryShortName(StringRef Name, bool &isFramework, 747 StringRef &Suffix); 748 749 static Triple::ArchType getArch(uint32_t CPUType, uint32_t CPUSubType); 750 static Triple getArchTriple(uint32_t CPUType, uint32_t CPUSubType, 751 const char **McpuDefault = nullptr, 752 const char **ArchFlag = nullptr); 753 static bool isValidArch(StringRef ArchFlag); 754 static ArrayRef<StringRef> getValidArchs(); 755 static Triple getHostArch(); 756 757 bool isRelocatableObject() const override; 758 759 StringRef mapDebugSectionName(StringRef Name) const override; 760 761 llvm::binaryformat::Swift5ReflectionSectionKind 762 mapReflectionSectionNameToEnumValue(StringRef SectionName) const override; 763 764 bool hasPageZeroSegment() const { return HasPageZeroSegment; } 765 766 size_t getMachOFilesetEntryOffset() const { return MachOFilesetEntryOffset; } 767 768 static bool classof(const Binary *v) { 769 return v->isMachO(); 770 } 771 772 static uint32_t 773 getVersionMinMajor(MachO::version_min_command &C, bool SDK) { 774 uint32_t VersionOrSDK = (SDK) ? C.sdk : C.version; 775 return (VersionOrSDK >> 16) & 0xffff; 776 } 777 778 static uint32_t 779 getVersionMinMinor(MachO::version_min_command &C, bool SDK) { 780 uint32_t VersionOrSDK = (SDK) ? C.sdk : C.version; 781 return (VersionOrSDK >> 8) & 0xff; 782 } 783 784 static uint32_t 785 getVersionMinUpdate(MachO::version_min_command &C, bool SDK) { 786 uint32_t VersionOrSDK = (SDK) ? C.sdk : C.version; 787 return VersionOrSDK & 0xff; 788 } 789 790 static std::string getBuildPlatform(uint32_t platform) { 791 switch (platform) { 792#define PLATFORM(platform, id, name, build_name, target, tapi_target, \ 793 marketing) \ 794 case MachO::PLATFORM_##platform: \ 795 return #name; 796#include "llvm/BinaryFormat/MachO.def" 797 default: 798 std::string ret; 799 raw_string_ostream ss(ret); 800 ss << format_hex(platform, 8, true); 801 return ss.str(); 802 } 803 } 804 805 static std::string getBuildTool(uint32_t tools) { 806 switch (tools) { 807 case MachO::TOOL_CLANG: return "clang"; 808 case MachO::TOOL_SWIFT: return "swift"; 809 case MachO::TOOL_LD: return "ld"; 810 case MachO::TOOL_LLD: 811 return "lld"; 812 default: 813 std::string ret; 814 raw_string_ostream ss(ret); 815 ss << format_hex(tools, 8, true); 816 return ss.str(); 817 } 818 } 819 820 static std::string getVersionString(uint32_t version) { 821 uint32_t major = (version >> 16) & 0xffff; 822 uint32_t minor = (version >> 8) & 0xff; 823 uint32_t update = version & 0xff; 824 825 SmallString<32> Version; 826 Version = utostr(major) + "." + utostr(minor); 827 if (update != 0) 828 Version += "." + utostr(update); 829 return std::string(std::string(Version)); 830 } 831 832 /// If the input path is a .dSYM bundle (as created by the dsymutil tool), 833 /// return the paths to the object files found in the bundle, otherwise return 834 /// an empty vector. If the path appears to be a .dSYM bundle but no objects 835 /// were found or there was a filesystem error, then return an error. 836 static Expected<std::vector<std::string>> 837 findDsymObjectMembers(StringRef Path); 838 839private: 840 MachOObjectFile(MemoryBufferRef Object, bool IsLittleEndian, bool Is64Bits, 841 Error &Err, uint32_t UniversalCputype = 0, 842 uint32_t UniversalIndex = 0, 843 size_t MachOFilesetEntryOffset = 0); 844 845 uint64_t getSymbolValueImpl(DataRefImpl Symb) const override; 846 847 union { 848 MachO::mach_header_64 Header64; 849 MachO::mach_header Header; 850 }; 851 using SectionList = SmallVector<const char*, 1>; 852 SectionList Sections; 853 using LibraryList = SmallVector<const char*, 1>; 854 LibraryList Libraries; 855 LoadCommandList LoadCommands; 856 using LibraryShortName = SmallVector<StringRef, 1>; 857 using BuildToolList = SmallVector<const char*, 1>; 858 BuildToolList BuildTools; 859 mutable LibraryShortName LibrariesShortNames; 860 std::unique_ptr<BindRebaseSegInfo> BindRebaseSectionTable; 861 const char *SymtabLoadCmd = nullptr; 862 const char *DysymtabLoadCmd = nullptr; 863 const char *DataInCodeLoadCmd = nullptr; 864 const char *LinkOptHintsLoadCmd = nullptr; 865 const char *DyldInfoLoadCmd = nullptr; 866 const char *FuncStartsLoadCmd = nullptr; 867 const char *DyldChainedFixupsLoadCmd = nullptr; 868 const char *DyldExportsTrieLoadCmd = nullptr; 869 const char *UuidLoadCmd = nullptr; 870 bool HasPageZeroSegment = false; 871 size_t MachOFilesetEntryOffset = 0; 872}; 873 874/// DiceRef 875inline DiceRef::DiceRef(DataRefImpl DiceP, const ObjectFile *Owner) 876 : DicePimpl(DiceP) , OwningObject(Owner) {} 877 878inline bool DiceRef::operator==(const DiceRef &Other) const { 879 return DicePimpl == Other.DicePimpl; 880} 881 882inline bool DiceRef::operator<(const DiceRef &Other) const { 883 return DicePimpl < Other.DicePimpl; 884} 885 886inline void DiceRef::moveNext() { 887 const MachO::data_in_code_entry *P = 888 reinterpret_cast<const MachO::data_in_code_entry *>(DicePimpl.p); 889 DicePimpl.p = reinterpret_cast<uintptr_t>(P + 1); 890} 891 892// Since a Mach-O data in code reference, a DiceRef, can only be created when 893// the OwningObject ObjectFile is a MachOObjectFile a static_cast<> is used for 894// the methods that get the values of the fields of the reference. 895 896inline std::error_code DiceRef::getOffset(uint32_t &Result) const { 897 const MachOObjectFile *MachOOF = 898 static_cast<const MachOObjectFile *>(OwningObject); 899 MachO::data_in_code_entry Dice = MachOOF->getDice(DicePimpl); 900 Result = Dice.offset; 901 return std::error_code(); 902} 903 904inline std::error_code DiceRef::getLength(uint16_t &Result) const { 905 const MachOObjectFile *MachOOF = 906 static_cast<const MachOObjectFile *>(OwningObject); 907 MachO::data_in_code_entry Dice = MachOOF->getDice(DicePimpl); 908 Result = Dice.length; 909 return std::error_code(); 910} 911 912inline std::error_code DiceRef::getKind(uint16_t &Result) const { 913 const MachOObjectFile *MachOOF = 914 static_cast<const MachOObjectFile *>(OwningObject); 915 MachO::data_in_code_entry Dice = MachOOF->getDice(DicePimpl); 916 Result = Dice.kind; 917 return std::error_code(); 918} 919 920inline DataRefImpl DiceRef::getRawDataRefImpl() const { 921 return DicePimpl; 922} 923 924inline const ObjectFile *DiceRef::getObjectFile() const { 925 return OwningObject; 926} 927 928} // end namespace object 929} // end namespace llvm 930 931#endif // LLVM_OBJECT_MACHO_H 932