MachOReader.cpp revision 360784
1//===- MachOReader.cpp ------------------------------------------*- C++ -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9#include "MachOReader.h" 10#include "../llvm-objcopy.h" 11#include "Object.h" 12#include "llvm/BinaryFormat/MachO.h" 13#include "llvm/Object/MachO.h" 14#include <memory> 15 16namespace llvm { 17namespace objcopy { 18namespace macho { 19 20void MachOReader::readHeader(Object &O) const { 21 O.Header.Magic = MachOObj.getHeader().magic; 22 O.Header.CPUType = MachOObj.getHeader().cputype; 23 O.Header.CPUSubType = MachOObj.getHeader().cpusubtype; 24 O.Header.FileType = MachOObj.getHeader().filetype; 25 O.Header.NCmds = MachOObj.getHeader().ncmds; 26 O.Header.SizeOfCmds = MachOObj.getHeader().sizeofcmds; 27 O.Header.Flags = MachOObj.getHeader().flags; 28} 29 30template <typename SectionType> 31Section constructSectionCommon(SectionType Sec) { 32 StringRef SegName(Sec.segname, strnlen(Sec.segname, sizeof(Sec.segname))); 33 StringRef SectName(Sec.sectname, strnlen(Sec.sectname, sizeof(Sec.sectname))); 34 Section S(SegName, SectName); 35 S.Addr = Sec.addr; 36 S.Size = Sec.size; 37 S.Offset = Sec.offset; 38 S.Align = Sec.align; 39 S.RelOff = Sec.reloff; 40 S.NReloc = Sec.nreloc; 41 S.Flags = Sec.flags; 42 S.Reserved1 = Sec.reserved1; 43 S.Reserved2 = Sec.reserved2; 44 S.Reserved3 = 0; 45 return S; 46} 47 48template <typename SectionType> Section constructSection(SectionType Sec); 49 50template <> Section constructSection(MachO::section Sec) { 51 return constructSectionCommon(Sec); 52} 53 54template <> Section constructSection(MachO::section_64 Sec) { 55 Section S = constructSectionCommon(Sec); 56 S.Reserved3 = Sec.reserved3; 57 return S; 58} 59 60// TODO: get rid of reportError and make MachOReader return Expected<> instead. 61template <typename SectionType, typename SegmentType> 62std::vector<Section> 63extractSections(const object::MachOObjectFile::LoadCommandInfo &LoadCmd, 64 const object::MachOObjectFile &MachOObj, 65 size_t &NextSectionIndex) { 66 auto End = LoadCmd.Ptr + LoadCmd.C.cmdsize; 67 const SectionType *Curr = 68 reinterpret_cast<const SectionType *>(LoadCmd.Ptr + sizeof(SegmentType)); 69 std::vector<Section> Sections; 70 for (; reinterpret_cast<const void *>(Curr) < End; Curr++) { 71 if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost) { 72 SectionType Sec; 73 memcpy((void *)&Sec, Curr, sizeof(SectionType)); 74 MachO::swapStruct(Sec); 75 Sections.push_back(constructSection(Sec)); 76 } else { 77 Sections.push_back(constructSection(*Curr)); 78 } 79 80 Section &S = Sections.back(); 81 82 Expected<object::SectionRef> SecRef = 83 MachOObj.getSection(NextSectionIndex++); 84 if (!SecRef) 85 reportError(MachOObj.getFileName(), SecRef.takeError()); 86 87 if (Expected<ArrayRef<uint8_t>> E = 88 MachOObj.getSectionContents(SecRef->getRawDataRefImpl())) 89 S.Content = 90 StringRef(reinterpret_cast<const char *>(E->data()), E->size()); 91 else 92 reportError(MachOObj.getFileName(), E.takeError()); 93 94 S.Relocations.reserve(S.NReloc); 95 for (auto RI = MachOObj.section_rel_begin(SecRef->getRawDataRefImpl()), 96 RE = MachOObj.section_rel_end(SecRef->getRawDataRefImpl()); 97 RI != RE; ++RI) { 98 RelocationInfo R; 99 R.Symbol = nullptr; // We'll fill this field later. 100 R.Info = MachOObj.getRelocation(RI->getRawDataRefImpl()); 101 R.Scattered = MachOObj.isRelocationScattered(R.Info); 102 S.Relocations.push_back(R); 103 } 104 105 assert(S.NReloc == S.Relocations.size() && 106 "Incorrect number of relocations"); 107 } 108 return Sections; 109} 110 111void MachOReader::readLoadCommands(Object &O) const { 112 // For MachO sections indices start from 1. 113 size_t NextSectionIndex = 1; 114 for (auto LoadCmd : MachOObj.load_commands()) { 115 LoadCommand LC; 116 switch (LoadCmd.C.cmd) { 117 case MachO::LC_SEGMENT: 118 LC.Sections = extractSections<MachO::section, MachO::segment_command>( 119 LoadCmd, MachOObj, NextSectionIndex); 120 break; 121 case MachO::LC_SEGMENT_64: 122 LC.Sections = 123 extractSections<MachO::section_64, MachO::segment_command_64>( 124 LoadCmd, MachOObj, NextSectionIndex); 125 break; 126 case MachO::LC_SYMTAB: 127 O.SymTabCommandIndex = O.LoadCommands.size(); 128 break; 129 case MachO::LC_DYSYMTAB: 130 O.DySymTabCommandIndex = O.LoadCommands.size(); 131 break; 132 case MachO::LC_DYLD_INFO: 133 case MachO::LC_DYLD_INFO_ONLY: 134 O.DyLdInfoCommandIndex = O.LoadCommands.size(); 135 break; 136 case MachO::LC_DATA_IN_CODE: 137 O.DataInCodeCommandIndex = O.LoadCommands.size(); 138 break; 139 case MachO::LC_FUNCTION_STARTS: 140 O.FunctionStartsCommandIndex = O.LoadCommands.size(); 141 break; 142 } 143#define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \ 144 case MachO::LCName: \ 145 memcpy((void *)&(LC.MachOLoadCommand.LCStruct##_data), LoadCmd.Ptr, \ 146 sizeof(MachO::LCStruct)); \ 147 if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost) \ 148 MachO::swapStruct(LC.MachOLoadCommand.LCStruct##_data); \ 149 if (LoadCmd.C.cmdsize > sizeof(MachO::LCStruct)) \ 150 LC.Payload = ArrayRef<uint8_t>( \ 151 reinterpret_cast<uint8_t *>(const_cast<char *>(LoadCmd.Ptr)) + \ 152 sizeof(MachO::LCStruct), \ 153 LoadCmd.C.cmdsize - sizeof(MachO::LCStruct)); \ 154 break; 155 156 switch (LoadCmd.C.cmd) { 157 default: 158 memcpy((void *)&(LC.MachOLoadCommand.load_command_data), LoadCmd.Ptr, 159 sizeof(MachO::load_command)); 160 if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost) 161 MachO::swapStruct(LC.MachOLoadCommand.load_command_data); 162 if (LoadCmd.C.cmdsize > sizeof(MachO::load_command)) 163 LC.Payload = ArrayRef<uint8_t>( 164 reinterpret_cast<uint8_t *>(const_cast<char *>(LoadCmd.Ptr)) + 165 sizeof(MachO::load_command), 166 LoadCmd.C.cmdsize - sizeof(MachO::load_command)); 167 break; 168#include "llvm/BinaryFormat/MachO.def" 169 } 170 O.LoadCommands.push_back(std::move(LC)); 171 } 172} 173 174template <typename nlist_t> 175SymbolEntry constructSymbolEntry(StringRef StrTable, const nlist_t &nlist) { 176 assert(nlist.n_strx < StrTable.size() && 177 "n_strx exceeds the size of the string table"); 178 SymbolEntry SE; 179 SE.Name = StringRef(StrTable.data() + nlist.n_strx).str(); 180 SE.n_type = nlist.n_type; 181 SE.n_sect = nlist.n_sect; 182 SE.n_desc = nlist.n_desc; 183 SE.n_value = nlist.n_value; 184 return SE; 185} 186 187void MachOReader::readSymbolTable(Object &O) const { 188 StringRef StrTable = MachOObj.getStringTableData(); 189 for (auto Symbol : MachOObj.symbols()) { 190 SymbolEntry SE = 191 (MachOObj.is64Bit() 192 ? constructSymbolEntry( 193 StrTable, 194 MachOObj.getSymbol64TableEntry(Symbol.getRawDataRefImpl())) 195 : constructSymbolEntry( 196 StrTable, 197 MachOObj.getSymbolTableEntry(Symbol.getRawDataRefImpl()))); 198 199 O.SymTable.Symbols.push_back(std::make_unique<SymbolEntry>(SE)); 200 } 201} 202 203void MachOReader::setSymbolInRelocationInfo(Object &O) const { 204 for (auto &LC : O.LoadCommands) 205 for (auto &Sec : LC.Sections) 206 for (auto &Reloc : Sec.Relocations) 207 if (!Reloc.Scattered) { 208 auto *Info = reinterpret_cast<MachO::relocation_info *>(&Reloc.Info); 209 Reloc.Symbol = O.SymTable.getSymbolByIndex(Info->r_symbolnum); 210 } 211} 212 213void MachOReader::readRebaseInfo(Object &O) const { 214 O.Rebases.Opcodes = MachOObj.getDyldInfoRebaseOpcodes(); 215} 216 217void MachOReader::readBindInfo(Object &O) const { 218 O.Binds.Opcodes = MachOObj.getDyldInfoBindOpcodes(); 219} 220 221void MachOReader::readWeakBindInfo(Object &O) const { 222 O.WeakBinds.Opcodes = MachOObj.getDyldInfoWeakBindOpcodes(); 223} 224 225void MachOReader::readLazyBindInfo(Object &O) const { 226 O.LazyBinds.Opcodes = MachOObj.getDyldInfoLazyBindOpcodes(); 227} 228 229void MachOReader::readExportInfo(Object &O) const { 230 O.Exports.Trie = MachOObj.getDyldInfoExportsTrie(); 231} 232 233void MachOReader::readDataInCodeData(Object &O) const { 234 if (!O.DataInCodeCommandIndex) 235 return; 236 const MachO::linkedit_data_command &LDC = 237 O.LoadCommands[*O.DataInCodeCommandIndex] 238 .MachOLoadCommand.linkedit_data_command_data; 239 240 O.DataInCode.Data = arrayRefFromStringRef( 241 MachOObj.getData().substr(LDC.dataoff, LDC.datasize)); 242} 243 244void MachOReader::readFunctionStartsData(Object &O) const { 245 if (!O.FunctionStartsCommandIndex) 246 return; 247 const MachO::linkedit_data_command &LDC = 248 O.LoadCommands[*O.FunctionStartsCommandIndex] 249 .MachOLoadCommand.linkedit_data_command_data; 250 251 O.FunctionStarts.Data = arrayRefFromStringRef( 252 MachOObj.getData().substr(LDC.dataoff, LDC.datasize)); 253} 254 255void MachOReader::readIndirectSymbolTable(Object &O) const { 256 MachO::dysymtab_command DySymTab = MachOObj.getDysymtabLoadCommand(); 257 constexpr uint32_t AbsOrLocalMask = 258 MachO::INDIRECT_SYMBOL_LOCAL | MachO::INDIRECT_SYMBOL_ABS; 259 for (uint32_t i = 0; i < DySymTab.nindirectsyms; ++i) { 260 uint32_t Index = MachOObj.getIndirectSymbolTableEntry(DySymTab, i); 261 if ((Index & AbsOrLocalMask) != 0) 262 O.IndirectSymTable.Symbols.emplace_back(Index, None); 263 else 264 O.IndirectSymTable.Symbols.emplace_back( 265 Index, O.SymTable.getSymbolByIndex(Index)); 266 } 267} 268 269std::unique_ptr<Object> MachOReader::create() const { 270 auto Obj = std::make_unique<Object>(); 271 readHeader(*Obj); 272 readLoadCommands(*Obj); 273 readSymbolTable(*Obj); 274 setSymbolInRelocationInfo(*Obj); 275 readRebaseInfo(*Obj); 276 readBindInfo(*Obj); 277 readWeakBindInfo(*Obj); 278 readLazyBindInfo(*Obj); 279 readExportInfo(*Obj); 280 readDataInCodeData(*Obj); 281 readFunctionStartsData(*Obj); 282 readIndirectSymbolTable(*Obj); 283 return Obj; 284} 285 286} // end namespace macho 287} // end namespace objcopy 288} // end namespace llvm 289