MachOWriter.cpp revision 360784
1//===- MachOWriter.cpp ------------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "MachOWriter.h"
10#include "MachOLayoutBuilder.h"
11#include "Object.h"
12#include "llvm/ADT/STLExtras.h"
13#include "llvm/BinaryFormat/MachO.h"
14#include "llvm/Object/MachO.h"
15#include "llvm/Support/Errc.h"
16#include "llvm/Support/ErrorHandling.h"
17#include <memory>
18
19namespace llvm {
20namespace objcopy {
21namespace macho {
22
23size_t MachOWriter::headerSize() const {
24  return Is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header);
25}
26
27size_t MachOWriter::loadCommandsSize() const { return O.Header.SizeOfCmds; }
28
29size_t MachOWriter::symTableSize() const {
30  return O.SymTable.Symbols.size() *
31         (Is64Bit ? sizeof(MachO::nlist_64) : sizeof(MachO::nlist));
32}
33
34size_t MachOWriter::totalSize() const {
35  // Going from tail to head and looking for an appropriate "anchor" to
36  // calculate the total size assuming that all the offsets are either valid
37  // ("true") or 0 (0 indicates that the corresponding part is missing).
38
39  SmallVector<size_t, 7> Ends;
40  if (O.SymTabCommandIndex) {
41    const MachO::symtab_command &SymTabCommand =
42        O.LoadCommands[*O.SymTabCommandIndex]
43            .MachOLoadCommand.symtab_command_data;
44    if (SymTabCommand.symoff)
45      Ends.push_back(SymTabCommand.symoff + symTableSize());
46    if (SymTabCommand.stroff)
47      Ends.push_back(SymTabCommand.stroff + SymTabCommand.strsize);
48  }
49  if (O.DyLdInfoCommandIndex) {
50    const MachO::dyld_info_command &DyLdInfoCommand =
51        O.LoadCommands[*O.DyLdInfoCommandIndex]
52            .MachOLoadCommand.dyld_info_command_data;
53    if (DyLdInfoCommand.rebase_off) {
54      assert((DyLdInfoCommand.rebase_size == O.Rebases.Opcodes.size()) &&
55             "Incorrect rebase opcodes size");
56      Ends.push_back(DyLdInfoCommand.rebase_off + DyLdInfoCommand.rebase_size);
57    }
58    if (DyLdInfoCommand.bind_off) {
59      assert((DyLdInfoCommand.bind_size == O.Binds.Opcodes.size()) &&
60             "Incorrect bind opcodes size");
61      Ends.push_back(DyLdInfoCommand.bind_off + DyLdInfoCommand.bind_size);
62    }
63    if (DyLdInfoCommand.weak_bind_off) {
64      assert((DyLdInfoCommand.weak_bind_size == O.WeakBinds.Opcodes.size()) &&
65             "Incorrect weak bind opcodes size");
66      Ends.push_back(DyLdInfoCommand.weak_bind_off +
67                     DyLdInfoCommand.weak_bind_size);
68    }
69    if (DyLdInfoCommand.lazy_bind_off) {
70      assert((DyLdInfoCommand.lazy_bind_size == O.LazyBinds.Opcodes.size()) &&
71             "Incorrect lazy bind opcodes size");
72      Ends.push_back(DyLdInfoCommand.lazy_bind_off +
73                     DyLdInfoCommand.lazy_bind_size);
74    }
75    if (DyLdInfoCommand.export_off) {
76      assert((DyLdInfoCommand.export_size == O.Exports.Trie.size()) &&
77             "Incorrect trie size");
78      Ends.push_back(DyLdInfoCommand.export_off + DyLdInfoCommand.export_size);
79    }
80  }
81
82  if (O.DySymTabCommandIndex) {
83    const MachO::dysymtab_command &DySymTabCommand =
84        O.LoadCommands[*O.DySymTabCommandIndex]
85            .MachOLoadCommand.dysymtab_command_data;
86
87    if (DySymTabCommand.indirectsymoff)
88      Ends.push_back(DySymTabCommand.indirectsymoff +
89                     sizeof(uint32_t) * O.IndirectSymTable.Symbols.size());
90  }
91
92  if (O.DataInCodeCommandIndex) {
93    const MachO::linkedit_data_command &LinkEditDataCommand =
94        O.LoadCommands[*O.DataInCodeCommandIndex]
95            .MachOLoadCommand.linkedit_data_command_data;
96
97    if (LinkEditDataCommand.dataoff)
98      Ends.push_back(LinkEditDataCommand.dataoff +
99                     LinkEditDataCommand.datasize);
100  }
101
102  if (O.FunctionStartsCommandIndex) {
103    const MachO::linkedit_data_command &LinkEditDataCommand =
104        O.LoadCommands[*O.FunctionStartsCommandIndex]
105            .MachOLoadCommand.linkedit_data_command_data;
106
107    if (LinkEditDataCommand.dataoff)
108      Ends.push_back(LinkEditDataCommand.dataoff +
109                     LinkEditDataCommand.datasize);
110  }
111
112  // Otherwise, use the last section / reloction.
113  for (const auto &LC : O.LoadCommands)
114    for (const auto &S : LC.Sections) {
115      Ends.push_back(S.Offset + S.Size);
116      if (S.RelOff)
117        Ends.push_back(S.RelOff +
118                       S.NReloc * sizeof(MachO::any_relocation_info));
119    }
120
121  if (!Ends.empty())
122    return *std::max_element(Ends.begin(), Ends.end());
123
124  // Otherwise, we have only Mach header and load commands.
125  return headerSize() + loadCommandsSize();
126}
127
128void MachOWriter::writeHeader() {
129  MachO::mach_header_64 Header;
130
131  Header.magic = O.Header.Magic;
132  Header.cputype = O.Header.CPUType;
133  Header.cpusubtype = O.Header.CPUSubType;
134  Header.filetype = O.Header.FileType;
135  Header.ncmds = O.Header.NCmds;
136  Header.sizeofcmds = O.Header.SizeOfCmds;
137  Header.flags = O.Header.Flags;
138  Header.reserved = O.Header.Reserved;
139
140  if (IsLittleEndian != sys::IsLittleEndianHost)
141    MachO::swapStruct(Header);
142
143  auto HeaderSize =
144      Is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header);
145  memcpy(B.getBufferStart(), &Header, HeaderSize);
146}
147
148void MachOWriter::writeLoadCommands() {
149  uint8_t *Begin = B.getBufferStart() + headerSize();
150  for (const auto &LC : O.LoadCommands) {
151    // Construct a load command.
152    MachO::macho_load_command MLC = LC.MachOLoadCommand;
153    switch (MLC.load_command_data.cmd) {
154    case MachO::LC_SEGMENT:
155      if (IsLittleEndian != sys::IsLittleEndianHost)
156        MachO::swapStruct(MLC.segment_command_data);
157      memcpy(Begin, &MLC.segment_command_data, sizeof(MachO::segment_command));
158      Begin += sizeof(MachO::segment_command);
159
160      for (const auto &Sec : LC.Sections)
161        writeSectionInLoadCommand<MachO::section>(Sec, Begin);
162      continue;
163    case MachO::LC_SEGMENT_64:
164      if (IsLittleEndian != sys::IsLittleEndianHost)
165        MachO::swapStruct(MLC.segment_command_64_data);
166      memcpy(Begin, &MLC.segment_command_64_data,
167             sizeof(MachO::segment_command_64));
168      Begin += sizeof(MachO::segment_command_64);
169
170      for (const auto &Sec : LC.Sections)
171        writeSectionInLoadCommand<MachO::section_64>(Sec, Begin);
172      continue;
173    }
174
175#define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct)                         \
176  case MachO::LCName:                                                          \
177    assert(sizeof(MachO::LCStruct) + LC.Payload.size() ==                      \
178           MLC.load_command_data.cmdsize);                                     \
179    if (IsLittleEndian != sys::IsLittleEndianHost)                             \
180      MachO::swapStruct(MLC.LCStruct##_data);                                  \
181    memcpy(Begin, &MLC.LCStruct##_data, sizeof(MachO::LCStruct));              \
182    Begin += sizeof(MachO::LCStruct);                                          \
183    if (!LC.Payload.empty())                                                   \
184      memcpy(Begin, LC.Payload.data(), LC.Payload.size());                     \
185    Begin += LC.Payload.size();                                                \
186    break;
187
188    // Copy the load command as it is.
189    switch (MLC.load_command_data.cmd) {
190    default:
191      assert(sizeof(MachO::load_command) + LC.Payload.size() ==
192             MLC.load_command_data.cmdsize);
193      if (IsLittleEndian != sys::IsLittleEndianHost)
194        MachO::swapStruct(MLC.load_command_data);
195      memcpy(Begin, &MLC.load_command_data, sizeof(MachO::load_command));
196      Begin += sizeof(MachO::load_command);
197      if (!LC.Payload.empty())
198        memcpy(Begin, LC.Payload.data(), LC.Payload.size());
199      Begin += LC.Payload.size();
200      break;
201#include "llvm/BinaryFormat/MachO.def"
202    }
203  }
204}
205
206template <typename StructType>
207void MachOWriter::writeSectionInLoadCommand(const Section &Sec, uint8_t *&Out) {
208  StructType Temp;
209  assert(Sec.Segname.size() <= sizeof(Temp.segname) && "too long segment name");
210  assert(Sec.Sectname.size() <= sizeof(Temp.sectname) &&
211         "too long section name");
212  memset(&Temp, 0, sizeof(StructType));
213  memcpy(Temp.segname, Sec.Segname.data(), Sec.Segname.size());
214  memcpy(Temp.sectname, Sec.Sectname.data(), Sec.Sectname.size());
215  Temp.addr = Sec.Addr;
216  Temp.size = Sec.Size;
217  Temp.offset = Sec.Offset;
218  Temp.align = Sec.Align;
219  Temp.reloff = Sec.RelOff;
220  Temp.nreloc = Sec.NReloc;
221  Temp.flags = Sec.Flags;
222  Temp.reserved1 = Sec.Reserved1;
223  Temp.reserved2 = Sec.Reserved2;
224
225  if (IsLittleEndian != sys::IsLittleEndianHost)
226    MachO::swapStruct(Temp);
227  memcpy(Out, &Temp, sizeof(StructType));
228  Out += sizeof(StructType);
229}
230
231void MachOWriter::writeSections() {
232  for (const auto &LC : O.LoadCommands)
233    for (const auto &Sec : LC.Sections) {
234      if (Sec.isVirtualSection())
235        continue;
236
237      assert(Sec.Offset && "Section offset can not be zero");
238      assert((Sec.Size == Sec.Content.size()) && "Incorrect section size");
239      memcpy(B.getBufferStart() + Sec.Offset, Sec.Content.data(),
240             Sec.Content.size());
241      for (size_t Index = 0; Index < Sec.Relocations.size(); ++Index) {
242        auto RelocInfo = Sec.Relocations[Index];
243        if (!RelocInfo.Scattered) {
244          auto *Info =
245              reinterpret_cast<MachO::relocation_info *>(&RelocInfo.Info);
246          Info->r_symbolnum = RelocInfo.Symbol->Index;
247        }
248
249        if (IsLittleEndian != sys::IsLittleEndianHost)
250          MachO::swapStruct(
251              reinterpret_cast<MachO::any_relocation_info &>(RelocInfo.Info));
252        memcpy(B.getBufferStart() + Sec.RelOff +
253                   Index * sizeof(MachO::any_relocation_info),
254               &RelocInfo.Info, sizeof(RelocInfo.Info));
255      }
256    }
257}
258
259template <typename NListType>
260void writeNListEntry(const SymbolEntry &SE, bool IsLittleEndian, char *&Out,
261                     uint32_t Nstrx) {
262  NListType ListEntry;
263  ListEntry.n_strx = Nstrx;
264  ListEntry.n_type = SE.n_type;
265  ListEntry.n_sect = SE.n_sect;
266  ListEntry.n_desc = SE.n_desc;
267  ListEntry.n_value = SE.n_value;
268
269  if (IsLittleEndian != sys::IsLittleEndianHost)
270    MachO::swapStruct(ListEntry);
271  memcpy(Out, reinterpret_cast<const char *>(&ListEntry), sizeof(NListType));
272  Out += sizeof(NListType);
273}
274
275void MachOWriter::writeStringTable() {
276  if (!O.SymTabCommandIndex)
277    return;
278  const MachO::symtab_command &SymTabCommand =
279      O.LoadCommands[*O.SymTabCommandIndex]
280          .MachOLoadCommand.symtab_command_data;
281
282  uint8_t *StrTable = (uint8_t *)B.getBufferStart() + SymTabCommand.stroff;
283  LayoutBuilder.getStringTableBuilder().write(StrTable);
284}
285
286void MachOWriter::writeSymbolTable() {
287  if (!O.SymTabCommandIndex)
288    return;
289  const MachO::symtab_command &SymTabCommand =
290      O.LoadCommands[*O.SymTabCommandIndex]
291          .MachOLoadCommand.symtab_command_data;
292
293  char *SymTable = (char *)B.getBufferStart() + SymTabCommand.symoff;
294  for (auto Iter = O.SymTable.Symbols.begin(), End = O.SymTable.Symbols.end();
295       Iter != End; Iter++) {
296    SymbolEntry *Sym = Iter->get();
297    uint32_t Nstrx = LayoutBuilder.getStringTableBuilder().getOffset(Sym->Name);
298
299    if (Is64Bit)
300      writeNListEntry<MachO::nlist_64>(*Sym, IsLittleEndian, SymTable, Nstrx);
301    else
302      writeNListEntry<MachO::nlist>(*Sym, IsLittleEndian, SymTable, Nstrx);
303  }
304}
305
306void MachOWriter::writeRebaseInfo() {
307  if (!O.DyLdInfoCommandIndex)
308    return;
309  const MachO::dyld_info_command &DyLdInfoCommand =
310      O.LoadCommands[*O.DyLdInfoCommandIndex]
311          .MachOLoadCommand.dyld_info_command_data;
312  char *Out = (char *)B.getBufferStart() + DyLdInfoCommand.rebase_off;
313  assert((DyLdInfoCommand.rebase_size == O.Rebases.Opcodes.size()) &&
314         "Incorrect rebase opcodes size");
315  memcpy(Out, O.Rebases.Opcodes.data(), O.Rebases.Opcodes.size());
316}
317
318void MachOWriter::writeBindInfo() {
319  if (!O.DyLdInfoCommandIndex)
320    return;
321  const MachO::dyld_info_command &DyLdInfoCommand =
322      O.LoadCommands[*O.DyLdInfoCommandIndex]
323          .MachOLoadCommand.dyld_info_command_data;
324  char *Out = (char *)B.getBufferStart() + DyLdInfoCommand.bind_off;
325  assert((DyLdInfoCommand.bind_size == O.Binds.Opcodes.size()) &&
326         "Incorrect bind opcodes size");
327  memcpy(Out, O.Binds.Opcodes.data(), O.Binds.Opcodes.size());
328}
329
330void MachOWriter::writeWeakBindInfo() {
331  if (!O.DyLdInfoCommandIndex)
332    return;
333  const MachO::dyld_info_command &DyLdInfoCommand =
334      O.LoadCommands[*O.DyLdInfoCommandIndex]
335          .MachOLoadCommand.dyld_info_command_data;
336  char *Out = (char *)B.getBufferStart() + DyLdInfoCommand.weak_bind_off;
337  assert((DyLdInfoCommand.weak_bind_size == O.WeakBinds.Opcodes.size()) &&
338         "Incorrect weak bind opcodes size");
339  memcpy(Out, O.WeakBinds.Opcodes.data(), O.WeakBinds.Opcodes.size());
340}
341
342void MachOWriter::writeLazyBindInfo() {
343  if (!O.DyLdInfoCommandIndex)
344    return;
345  const MachO::dyld_info_command &DyLdInfoCommand =
346      O.LoadCommands[*O.DyLdInfoCommandIndex]
347          .MachOLoadCommand.dyld_info_command_data;
348  char *Out = (char *)B.getBufferStart() + DyLdInfoCommand.lazy_bind_off;
349  assert((DyLdInfoCommand.lazy_bind_size == O.LazyBinds.Opcodes.size()) &&
350         "Incorrect lazy bind opcodes size");
351  memcpy(Out, O.LazyBinds.Opcodes.data(), O.LazyBinds.Opcodes.size());
352}
353
354void MachOWriter::writeExportInfo() {
355  if (!O.DyLdInfoCommandIndex)
356    return;
357  const MachO::dyld_info_command &DyLdInfoCommand =
358      O.LoadCommands[*O.DyLdInfoCommandIndex]
359          .MachOLoadCommand.dyld_info_command_data;
360  char *Out = (char *)B.getBufferStart() + DyLdInfoCommand.export_off;
361  assert((DyLdInfoCommand.export_size == O.Exports.Trie.size()) &&
362         "Incorrect export trie size");
363  memcpy(Out, O.Exports.Trie.data(), O.Exports.Trie.size());
364}
365
366void MachOWriter::writeIndirectSymbolTable() {
367  if (!O.DySymTabCommandIndex)
368    return;
369
370  const MachO::dysymtab_command &DySymTabCommand =
371      O.LoadCommands[*O.DySymTabCommandIndex]
372          .MachOLoadCommand.dysymtab_command_data;
373
374  uint32_t *Out =
375      (uint32_t *)(B.getBufferStart() + DySymTabCommand.indirectsymoff);
376  for (const IndirectSymbolEntry &Sym : O.IndirectSymTable.Symbols) {
377    uint32_t Entry = (Sym.Symbol) ? (*Sym.Symbol)->Index : Sym.OriginalIndex;
378    if (IsLittleEndian != sys::IsLittleEndianHost)
379      sys::swapByteOrder(Entry);
380    *Out++ = Entry;
381  }
382}
383
384void MachOWriter::writeDataInCodeData() {
385  if (!O.DataInCodeCommandIndex)
386    return;
387  const MachO::linkedit_data_command &LinkEditDataCommand =
388      O.LoadCommands[*O.DataInCodeCommandIndex]
389          .MachOLoadCommand.linkedit_data_command_data;
390  char *Out = (char *)B.getBufferStart() + LinkEditDataCommand.dataoff;
391  assert((LinkEditDataCommand.datasize == O.DataInCode.Data.size()) &&
392         "Incorrect data in code data size");
393  memcpy(Out, O.DataInCode.Data.data(), O.DataInCode.Data.size());
394}
395
396void MachOWriter::writeFunctionStartsData() {
397  if (!O.FunctionStartsCommandIndex)
398    return;
399  const MachO::linkedit_data_command &LinkEditDataCommand =
400      O.LoadCommands[*O.FunctionStartsCommandIndex]
401          .MachOLoadCommand.linkedit_data_command_data;
402  char *Out = (char *)B.getBufferStart() + LinkEditDataCommand.dataoff;
403  assert((LinkEditDataCommand.datasize == O.FunctionStarts.Data.size()) &&
404         "Incorrect function starts data size");
405  memcpy(Out, O.FunctionStarts.Data.data(), O.FunctionStarts.Data.size());
406}
407
408void MachOWriter::writeTail() {
409  typedef void (MachOWriter::*WriteHandlerType)(void);
410  typedef std::pair<uint64_t, WriteHandlerType> WriteOperation;
411  SmallVector<WriteOperation, 7> Queue;
412
413  if (O.SymTabCommandIndex) {
414    const MachO::symtab_command &SymTabCommand =
415        O.LoadCommands[*O.SymTabCommandIndex]
416            .MachOLoadCommand.symtab_command_data;
417    if (SymTabCommand.symoff)
418      Queue.push_back({SymTabCommand.symoff, &MachOWriter::writeSymbolTable});
419    if (SymTabCommand.stroff)
420      Queue.push_back({SymTabCommand.stroff, &MachOWriter::writeStringTable});
421  }
422
423  if (O.DyLdInfoCommandIndex) {
424    const MachO::dyld_info_command &DyLdInfoCommand =
425        O.LoadCommands[*O.DyLdInfoCommandIndex]
426            .MachOLoadCommand.dyld_info_command_data;
427    if (DyLdInfoCommand.rebase_off)
428      Queue.push_back(
429          {DyLdInfoCommand.rebase_off, &MachOWriter::writeRebaseInfo});
430    if (DyLdInfoCommand.bind_off)
431      Queue.push_back({DyLdInfoCommand.bind_off, &MachOWriter::writeBindInfo});
432    if (DyLdInfoCommand.weak_bind_off)
433      Queue.push_back(
434          {DyLdInfoCommand.weak_bind_off, &MachOWriter::writeWeakBindInfo});
435    if (DyLdInfoCommand.lazy_bind_off)
436      Queue.push_back(
437          {DyLdInfoCommand.lazy_bind_off, &MachOWriter::writeLazyBindInfo});
438    if (DyLdInfoCommand.export_off)
439      Queue.push_back(
440          {DyLdInfoCommand.export_off, &MachOWriter::writeExportInfo});
441  }
442
443  if (O.DySymTabCommandIndex) {
444    const MachO::dysymtab_command &DySymTabCommand =
445        O.LoadCommands[*O.DySymTabCommandIndex]
446            .MachOLoadCommand.dysymtab_command_data;
447
448    if (DySymTabCommand.indirectsymoff)
449      Queue.emplace_back(DySymTabCommand.indirectsymoff,
450                         &MachOWriter::writeIndirectSymbolTable);
451  }
452
453  if (O.DataInCodeCommandIndex) {
454    const MachO::linkedit_data_command &LinkEditDataCommand =
455        O.LoadCommands[*O.DataInCodeCommandIndex]
456            .MachOLoadCommand.linkedit_data_command_data;
457
458    if (LinkEditDataCommand.dataoff)
459      Queue.emplace_back(LinkEditDataCommand.dataoff,
460                         &MachOWriter::writeDataInCodeData);
461  }
462
463  if (O.FunctionStartsCommandIndex) {
464    const MachO::linkedit_data_command &LinkEditDataCommand =
465        O.LoadCommands[*O.FunctionStartsCommandIndex]
466            .MachOLoadCommand.linkedit_data_command_data;
467
468    if (LinkEditDataCommand.dataoff)
469      Queue.emplace_back(LinkEditDataCommand.dataoff,
470                         &MachOWriter::writeFunctionStartsData);
471  }
472
473  llvm::sort(Queue, [](const WriteOperation &LHS, const WriteOperation &RHS) {
474    return LHS.first < RHS.first;
475  });
476
477  for (auto WriteOp : Queue)
478    (this->*WriteOp.second)();
479}
480
481Error MachOWriter::finalize() { return LayoutBuilder.layout(); }
482
483Error MachOWriter::write() {
484  if (Error E = B.allocate(totalSize()))
485    return E;
486  memset(B.getBufferStart(), 0, totalSize());
487  writeHeader();
488  writeLoadCommands();
489  writeSections();
490  writeTail();
491  return B.commit();
492}
493
494} // end namespace macho
495} // end namespace objcopy
496} // end namespace llvm
497