MachOReader.cpp revision 360784
1//===- MachOReader.cpp ------------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "MachOReader.h"
10#include "../llvm-objcopy.h"
11#include "Object.h"
12#include "llvm/BinaryFormat/MachO.h"
13#include "llvm/Object/MachO.h"
14#include <memory>
15
16namespace llvm {
17namespace objcopy {
18namespace macho {
19
20void MachOReader::readHeader(Object &O) const {
21  O.Header.Magic = MachOObj.getHeader().magic;
22  O.Header.CPUType = MachOObj.getHeader().cputype;
23  O.Header.CPUSubType = MachOObj.getHeader().cpusubtype;
24  O.Header.FileType = MachOObj.getHeader().filetype;
25  O.Header.NCmds = MachOObj.getHeader().ncmds;
26  O.Header.SizeOfCmds = MachOObj.getHeader().sizeofcmds;
27  O.Header.Flags = MachOObj.getHeader().flags;
28}
29
30template <typename SectionType>
31Section constructSectionCommon(SectionType Sec) {
32  StringRef SegName(Sec.segname, strnlen(Sec.segname, sizeof(Sec.segname)));
33  StringRef SectName(Sec.sectname, strnlen(Sec.sectname, sizeof(Sec.sectname)));
34  Section S(SegName, SectName);
35  S.Addr = Sec.addr;
36  S.Size = Sec.size;
37  S.Offset = Sec.offset;
38  S.Align = Sec.align;
39  S.RelOff = Sec.reloff;
40  S.NReloc = Sec.nreloc;
41  S.Flags = Sec.flags;
42  S.Reserved1 = Sec.reserved1;
43  S.Reserved2 = Sec.reserved2;
44  S.Reserved3 = 0;
45  return S;
46}
47
48template <typename SectionType> Section constructSection(SectionType Sec);
49
50template <> Section constructSection(MachO::section Sec) {
51  return constructSectionCommon(Sec);
52}
53
54template <> Section constructSection(MachO::section_64 Sec) {
55  Section S = constructSectionCommon(Sec);
56  S.Reserved3 = Sec.reserved3;
57  return S;
58}
59
60// TODO: get rid of reportError and make MachOReader return Expected<> instead.
61template <typename SectionType, typename SegmentType>
62std::vector<Section>
63extractSections(const object::MachOObjectFile::LoadCommandInfo &LoadCmd,
64                const object::MachOObjectFile &MachOObj,
65                size_t &NextSectionIndex) {
66  auto End = LoadCmd.Ptr + LoadCmd.C.cmdsize;
67  const SectionType *Curr =
68      reinterpret_cast<const SectionType *>(LoadCmd.Ptr + sizeof(SegmentType));
69  std::vector<Section> Sections;
70  for (; reinterpret_cast<const void *>(Curr) < End; Curr++) {
71    if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost) {
72      SectionType Sec;
73      memcpy((void *)&Sec, Curr, sizeof(SectionType));
74      MachO::swapStruct(Sec);
75      Sections.push_back(constructSection(Sec));
76    } else {
77      Sections.push_back(constructSection(*Curr));
78    }
79
80    Section &S = Sections.back();
81
82    Expected<object::SectionRef> SecRef =
83        MachOObj.getSection(NextSectionIndex++);
84    if (!SecRef)
85      reportError(MachOObj.getFileName(), SecRef.takeError());
86
87    if (Expected<ArrayRef<uint8_t>> E =
88            MachOObj.getSectionContents(SecRef->getRawDataRefImpl()))
89      S.Content =
90          StringRef(reinterpret_cast<const char *>(E->data()), E->size());
91    else
92      reportError(MachOObj.getFileName(), E.takeError());
93
94    S.Relocations.reserve(S.NReloc);
95    for (auto RI = MachOObj.section_rel_begin(SecRef->getRawDataRefImpl()),
96              RE = MachOObj.section_rel_end(SecRef->getRawDataRefImpl());
97         RI != RE; ++RI) {
98      RelocationInfo R;
99      R.Symbol = nullptr; // We'll fill this field later.
100      R.Info = MachOObj.getRelocation(RI->getRawDataRefImpl());
101      R.Scattered = MachOObj.isRelocationScattered(R.Info);
102      S.Relocations.push_back(R);
103    }
104
105    assert(S.NReloc == S.Relocations.size() &&
106           "Incorrect number of relocations");
107  }
108  return Sections;
109}
110
111void MachOReader::readLoadCommands(Object &O) const {
112  // For MachO sections indices start from 1.
113  size_t NextSectionIndex = 1;
114  for (auto LoadCmd : MachOObj.load_commands()) {
115    LoadCommand LC;
116    switch (LoadCmd.C.cmd) {
117    case MachO::LC_SEGMENT:
118      LC.Sections = extractSections<MachO::section, MachO::segment_command>(
119          LoadCmd, MachOObj, NextSectionIndex);
120      break;
121    case MachO::LC_SEGMENT_64:
122      LC.Sections =
123          extractSections<MachO::section_64, MachO::segment_command_64>(
124              LoadCmd, MachOObj, NextSectionIndex);
125      break;
126    case MachO::LC_SYMTAB:
127      O.SymTabCommandIndex = O.LoadCommands.size();
128      break;
129    case MachO::LC_DYSYMTAB:
130      O.DySymTabCommandIndex = O.LoadCommands.size();
131      break;
132    case MachO::LC_DYLD_INFO:
133    case MachO::LC_DYLD_INFO_ONLY:
134      O.DyLdInfoCommandIndex = O.LoadCommands.size();
135      break;
136    case MachO::LC_DATA_IN_CODE:
137      O.DataInCodeCommandIndex = O.LoadCommands.size();
138      break;
139    case MachO::LC_FUNCTION_STARTS:
140      O.FunctionStartsCommandIndex = O.LoadCommands.size();
141      break;
142    }
143#define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct)                         \
144  case MachO::LCName:                                                          \
145    memcpy((void *)&(LC.MachOLoadCommand.LCStruct##_data), LoadCmd.Ptr,        \
146           sizeof(MachO::LCStruct));                                           \
147    if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost)                  \
148      MachO::swapStruct(LC.MachOLoadCommand.LCStruct##_data);                  \
149    if (LoadCmd.C.cmdsize > sizeof(MachO::LCStruct))                           \
150      LC.Payload = ArrayRef<uint8_t>(                                          \
151          reinterpret_cast<uint8_t *>(const_cast<char *>(LoadCmd.Ptr)) +       \
152              sizeof(MachO::LCStruct),                                         \
153          LoadCmd.C.cmdsize - sizeof(MachO::LCStruct));                        \
154    break;
155
156    switch (LoadCmd.C.cmd) {
157    default:
158      memcpy((void *)&(LC.MachOLoadCommand.load_command_data), LoadCmd.Ptr,
159             sizeof(MachO::load_command));
160      if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost)
161        MachO::swapStruct(LC.MachOLoadCommand.load_command_data);
162      if (LoadCmd.C.cmdsize > sizeof(MachO::load_command))
163        LC.Payload = ArrayRef<uint8_t>(
164            reinterpret_cast<uint8_t *>(const_cast<char *>(LoadCmd.Ptr)) +
165                sizeof(MachO::load_command),
166            LoadCmd.C.cmdsize - sizeof(MachO::load_command));
167      break;
168#include "llvm/BinaryFormat/MachO.def"
169    }
170    O.LoadCommands.push_back(std::move(LC));
171  }
172}
173
174template <typename nlist_t>
175SymbolEntry constructSymbolEntry(StringRef StrTable, const nlist_t &nlist) {
176  assert(nlist.n_strx < StrTable.size() &&
177         "n_strx exceeds the size of the string table");
178  SymbolEntry SE;
179  SE.Name = StringRef(StrTable.data() + nlist.n_strx).str();
180  SE.n_type = nlist.n_type;
181  SE.n_sect = nlist.n_sect;
182  SE.n_desc = nlist.n_desc;
183  SE.n_value = nlist.n_value;
184  return SE;
185}
186
187void MachOReader::readSymbolTable(Object &O) const {
188  StringRef StrTable = MachOObj.getStringTableData();
189  for (auto Symbol : MachOObj.symbols()) {
190    SymbolEntry SE =
191        (MachOObj.is64Bit()
192             ? constructSymbolEntry(
193                   StrTable,
194                   MachOObj.getSymbol64TableEntry(Symbol.getRawDataRefImpl()))
195             : constructSymbolEntry(
196                   StrTable,
197                   MachOObj.getSymbolTableEntry(Symbol.getRawDataRefImpl())));
198
199    O.SymTable.Symbols.push_back(std::make_unique<SymbolEntry>(SE));
200  }
201}
202
203void MachOReader::setSymbolInRelocationInfo(Object &O) const {
204  for (auto &LC : O.LoadCommands)
205    for (auto &Sec : LC.Sections)
206      for (auto &Reloc : Sec.Relocations)
207        if (!Reloc.Scattered) {
208          auto *Info = reinterpret_cast<MachO::relocation_info *>(&Reloc.Info);
209          Reloc.Symbol = O.SymTable.getSymbolByIndex(Info->r_symbolnum);
210        }
211}
212
213void MachOReader::readRebaseInfo(Object &O) const {
214  O.Rebases.Opcodes = MachOObj.getDyldInfoRebaseOpcodes();
215}
216
217void MachOReader::readBindInfo(Object &O) const {
218  O.Binds.Opcodes = MachOObj.getDyldInfoBindOpcodes();
219}
220
221void MachOReader::readWeakBindInfo(Object &O) const {
222  O.WeakBinds.Opcodes = MachOObj.getDyldInfoWeakBindOpcodes();
223}
224
225void MachOReader::readLazyBindInfo(Object &O) const {
226  O.LazyBinds.Opcodes = MachOObj.getDyldInfoLazyBindOpcodes();
227}
228
229void MachOReader::readExportInfo(Object &O) const {
230  O.Exports.Trie = MachOObj.getDyldInfoExportsTrie();
231}
232
233void MachOReader::readDataInCodeData(Object &O) const {
234  if (!O.DataInCodeCommandIndex)
235    return;
236  const MachO::linkedit_data_command &LDC =
237      O.LoadCommands[*O.DataInCodeCommandIndex]
238          .MachOLoadCommand.linkedit_data_command_data;
239
240  O.DataInCode.Data = arrayRefFromStringRef(
241      MachOObj.getData().substr(LDC.dataoff, LDC.datasize));
242}
243
244void MachOReader::readFunctionStartsData(Object &O) const {
245  if (!O.FunctionStartsCommandIndex)
246    return;
247  const MachO::linkedit_data_command &LDC =
248      O.LoadCommands[*O.FunctionStartsCommandIndex]
249          .MachOLoadCommand.linkedit_data_command_data;
250
251  O.FunctionStarts.Data = arrayRefFromStringRef(
252      MachOObj.getData().substr(LDC.dataoff, LDC.datasize));
253}
254
255void MachOReader::readIndirectSymbolTable(Object &O) const {
256  MachO::dysymtab_command DySymTab = MachOObj.getDysymtabLoadCommand();
257  constexpr uint32_t AbsOrLocalMask =
258      MachO::INDIRECT_SYMBOL_LOCAL | MachO::INDIRECT_SYMBOL_ABS;
259  for (uint32_t i = 0; i < DySymTab.nindirectsyms; ++i) {
260    uint32_t Index = MachOObj.getIndirectSymbolTableEntry(DySymTab, i);
261    if ((Index & AbsOrLocalMask) != 0)
262      O.IndirectSymTable.Symbols.emplace_back(Index, None);
263    else
264      O.IndirectSymTable.Symbols.emplace_back(
265          Index, O.SymTable.getSymbolByIndex(Index));
266  }
267}
268
269std::unique_ptr<Object> MachOReader::create() const {
270  auto Obj = std::make_unique<Object>();
271  readHeader(*Obj);
272  readLoadCommands(*Obj);
273  readSymbolTable(*Obj);
274  setSymbolInRelocationInfo(*Obj);
275  readRebaseInfo(*Obj);
276  readBindInfo(*Obj);
277  readWeakBindInfo(*Obj);
278  readLazyBindInfo(*Obj);
279  readExportInfo(*Obj);
280  readDataInCodeData(*Obj);
281  readFunctionStartsData(*Obj);
282  readIndirectSymbolTable(*Obj);
283  return Obj;
284}
285
286} // end namespace macho
287} // end namespace objcopy
288} // end namespace llvm
289