MachObjectWriter.cpp revision 360784
1//===- lib/MC/MachObjectWriter.cpp - Mach-O File Writer -------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "llvm/ADT/DenseMap.h"
10#include "llvm/ADT/Twine.h"
11#include "llvm/ADT/iterator_range.h"
12#include "llvm/BinaryFormat/MachO.h"
13#include "llvm/MC/MCAsmBackend.h"
14#include "llvm/MC/MCAsmLayout.h"
15#include "llvm/MC/MCAssembler.h"
16#include "llvm/MC/MCContext.h"
17#include "llvm/MC/MCDirectives.h"
18#include "llvm/MC/MCExpr.h"
19#include "llvm/MC/MCFixupKindInfo.h"
20#include "llvm/MC/MCFragment.h"
21#include "llvm/MC/MCMachObjectWriter.h"
22#include "llvm/MC/MCObjectWriter.h"
23#include "llvm/MC/MCSection.h"
24#include "llvm/MC/MCSectionMachO.h"
25#include "llvm/MC/MCSymbol.h"
26#include "llvm/MC/MCSymbolMachO.h"
27#include "llvm/MC/MCValue.h"
28#include "llvm/Support/Alignment.h"
29#include "llvm/Support/Casting.h"
30#include "llvm/Support/Debug.h"
31#include "llvm/Support/ErrorHandling.h"
32#include "llvm/Support/MathExtras.h"
33#include "llvm/Support/raw_ostream.h"
34#include <algorithm>
35#include <cassert>
36#include <cstdint>
37#include <string>
38#include <utility>
39#include <vector>
40
41using namespace llvm;
42
43#define DEBUG_TYPE "mc"
44
45void MachObjectWriter::reset() {
46  Relocations.clear();
47  IndirectSymBase.clear();
48  StringTable.clear();
49  LocalSymbolData.clear();
50  ExternalSymbolData.clear();
51  UndefinedSymbolData.clear();
52  MCObjectWriter::reset();
53}
54
55bool MachObjectWriter::doesSymbolRequireExternRelocation(const MCSymbol &S) {
56  // Undefined symbols are always extern.
57  if (S.isUndefined())
58    return true;
59
60  // References to weak definitions require external relocation entries; the
61  // definition may not always be the one in the same object file.
62  if (cast<MCSymbolMachO>(S).isWeakDefinition())
63    return true;
64
65  // Otherwise, we can use an internal relocation.
66  return false;
67}
68
69bool MachObjectWriter::
70MachSymbolData::operator<(const MachSymbolData &RHS) const {
71  return Symbol->getName() < RHS.Symbol->getName();
72}
73
74bool MachObjectWriter::isFixupKindPCRel(const MCAssembler &Asm, unsigned Kind) {
75  const MCFixupKindInfo &FKI = Asm.getBackend().getFixupKindInfo(
76    (MCFixupKind) Kind);
77
78  return FKI.Flags & MCFixupKindInfo::FKF_IsPCRel;
79}
80
81uint64_t MachObjectWriter::getFragmentAddress(const MCFragment *Fragment,
82                                              const MCAsmLayout &Layout) const {
83  return getSectionAddress(Fragment->getParent()) +
84         Layout.getFragmentOffset(Fragment);
85}
86
87uint64_t MachObjectWriter::getSymbolAddress(const MCSymbol &S,
88                                            const MCAsmLayout &Layout) const {
89  // If this is a variable, then recursively evaluate now.
90  if (S.isVariable()) {
91    if (const MCConstantExpr *C =
92          dyn_cast<const MCConstantExpr>(S.getVariableValue()))
93      return C->getValue();
94
95    MCValue Target;
96    if (!S.getVariableValue()->evaluateAsRelocatable(Target, &Layout, nullptr))
97      report_fatal_error("unable to evaluate offset for variable '" +
98                         S.getName() + "'");
99
100    // Verify that any used symbols are defined.
101    if (Target.getSymA() && Target.getSymA()->getSymbol().isUndefined())
102      report_fatal_error("unable to evaluate offset to undefined symbol '" +
103                         Target.getSymA()->getSymbol().getName() + "'");
104    if (Target.getSymB() && Target.getSymB()->getSymbol().isUndefined())
105      report_fatal_error("unable to evaluate offset to undefined symbol '" +
106                         Target.getSymB()->getSymbol().getName() + "'");
107
108    uint64_t Address = Target.getConstant();
109    if (Target.getSymA())
110      Address += getSymbolAddress(Target.getSymA()->getSymbol(), Layout);
111    if (Target.getSymB())
112      Address += getSymbolAddress(Target.getSymB()->getSymbol(), Layout);
113    return Address;
114  }
115
116  return getSectionAddress(S.getFragment()->getParent()) +
117         Layout.getSymbolOffset(S);
118}
119
120uint64_t MachObjectWriter::getPaddingSize(const MCSection *Sec,
121                                          const MCAsmLayout &Layout) const {
122  uint64_t EndAddr = getSectionAddress(Sec) + Layout.getSectionAddressSize(Sec);
123  unsigned Next = Sec->getLayoutOrder() + 1;
124  if (Next >= Layout.getSectionOrder().size())
125    return 0;
126
127  const MCSection &NextSec = *Layout.getSectionOrder()[Next];
128  if (NextSec.isVirtualSection())
129    return 0;
130  return offsetToAlignment(EndAddr, Align(NextSec.getAlignment()));
131}
132
133void MachObjectWriter::writeHeader(MachO::HeaderFileType Type,
134                                   unsigned NumLoadCommands,
135                                   unsigned LoadCommandsSize,
136                                   bool SubsectionsViaSymbols) {
137  uint32_t Flags = 0;
138
139  if (SubsectionsViaSymbols)
140    Flags |= MachO::MH_SUBSECTIONS_VIA_SYMBOLS;
141
142  // struct mach_header (28 bytes) or
143  // struct mach_header_64 (32 bytes)
144
145  uint64_t Start = W.OS.tell();
146  (void) Start;
147
148  W.write<uint32_t>(is64Bit() ? MachO::MH_MAGIC_64 : MachO::MH_MAGIC);
149
150  W.write<uint32_t>(TargetObjectWriter->getCPUType());
151  W.write<uint32_t>(TargetObjectWriter->getCPUSubtype());
152
153  W.write<uint32_t>(Type);
154  W.write<uint32_t>(NumLoadCommands);
155  W.write<uint32_t>(LoadCommandsSize);
156  W.write<uint32_t>(Flags);
157  if (is64Bit())
158    W.write<uint32_t>(0); // reserved
159
160  assert(W.OS.tell() - Start == (is64Bit() ? sizeof(MachO::mach_header_64)
161                                           : sizeof(MachO::mach_header)));
162}
163
164void MachObjectWriter::writeWithPadding(StringRef Str, uint64_t Size) {
165  assert(Size >= Str.size());
166  W.OS << Str;
167  W.OS.write_zeros(Size - Str.size());
168}
169
170/// writeSegmentLoadCommand - Write a segment load command.
171///
172/// \param NumSections The number of sections in this segment.
173/// \param SectionDataSize The total size of the sections.
174void MachObjectWriter::writeSegmentLoadCommand(
175    StringRef Name, unsigned NumSections, uint64_t VMAddr, uint64_t VMSize,
176    uint64_t SectionDataStartOffset, uint64_t SectionDataSize, uint32_t MaxProt,
177    uint32_t InitProt) {
178  // struct segment_command (56 bytes) or
179  // struct segment_command_64 (72 bytes)
180
181  uint64_t Start = W.OS.tell();
182  (void) Start;
183
184  unsigned SegmentLoadCommandSize =
185    is64Bit() ? sizeof(MachO::segment_command_64):
186    sizeof(MachO::segment_command);
187  W.write<uint32_t>(is64Bit() ? MachO::LC_SEGMENT_64 : MachO::LC_SEGMENT);
188  W.write<uint32_t>(SegmentLoadCommandSize +
189          NumSections * (is64Bit() ? sizeof(MachO::section_64) :
190                         sizeof(MachO::section)));
191
192  writeWithPadding(Name, 16);
193  if (is64Bit()) {
194    W.write<uint64_t>(VMAddr);                 // vmaddr
195    W.write<uint64_t>(VMSize); // vmsize
196    W.write<uint64_t>(SectionDataStartOffset); // file offset
197    W.write<uint64_t>(SectionDataSize); // file size
198  } else {
199    W.write<uint32_t>(VMAddr);                 // vmaddr
200    W.write<uint32_t>(VMSize); // vmsize
201    W.write<uint32_t>(SectionDataStartOffset); // file offset
202    W.write<uint32_t>(SectionDataSize); // file size
203  }
204  // maxprot
205  W.write<uint32_t>(MaxProt);
206  // initprot
207  W.write<uint32_t>(InitProt);
208  W.write<uint32_t>(NumSections);
209  W.write<uint32_t>(0); // flags
210
211  assert(W.OS.tell() - Start == SegmentLoadCommandSize);
212}
213
214void MachObjectWriter::writeSection(const MCAsmLayout &Layout,
215                                    const MCSection &Sec, uint64_t VMAddr,
216                                    uint64_t FileOffset, unsigned Flags,
217                                    uint64_t RelocationsStart,
218                                    unsigned NumRelocations) {
219  uint64_t SectionSize = Layout.getSectionAddressSize(&Sec);
220  const MCSectionMachO &Section = cast<MCSectionMachO>(Sec);
221
222  // The offset is unused for virtual sections.
223  if (Section.isVirtualSection()) {
224    assert(Layout.getSectionFileSize(&Sec) == 0 && "Invalid file size!");
225    FileOffset = 0;
226  }
227
228  // struct section (68 bytes) or
229  // struct section_64 (80 bytes)
230
231  uint64_t Start = W.OS.tell();
232  (void) Start;
233
234  writeWithPadding(Section.getSectionName(), 16);
235  writeWithPadding(Section.getSegmentName(), 16);
236  if (is64Bit()) {
237    W.write<uint64_t>(VMAddr);      // address
238    W.write<uint64_t>(SectionSize); // size
239  } else {
240    W.write<uint32_t>(VMAddr);      // address
241    W.write<uint32_t>(SectionSize); // size
242  }
243  W.write<uint32_t>(FileOffset);
244
245  assert(isPowerOf2_32(Section.getAlignment()) && "Invalid alignment!");
246  W.write<uint32_t>(Log2_32(Section.getAlignment()));
247  W.write<uint32_t>(NumRelocations ? RelocationsStart : 0);
248  W.write<uint32_t>(NumRelocations);
249  W.write<uint32_t>(Flags);
250  W.write<uint32_t>(IndirectSymBase.lookup(&Sec)); // reserved1
251  W.write<uint32_t>(Section.getStubSize()); // reserved2
252  if (is64Bit())
253    W.write<uint32_t>(0); // reserved3
254
255  assert(W.OS.tell() - Start ==
256         (is64Bit() ? sizeof(MachO::section_64) : sizeof(MachO::section)));
257}
258
259void MachObjectWriter::writeSymtabLoadCommand(uint32_t SymbolOffset,
260                                              uint32_t NumSymbols,
261                                              uint32_t StringTableOffset,
262                                              uint32_t StringTableSize) {
263  // struct symtab_command (24 bytes)
264
265  uint64_t Start = W.OS.tell();
266  (void) Start;
267
268  W.write<uint32_t>(MachO::LC_SYMTAB);
269  W.write<uint32_t>(sizeof(MachO::symtab_command));
270  W.write<uint32_t>(SymbolOffset);
271  W.write<uint32_t>(NumSymbols);
272  W.write<uint32_t>(StringTableOffset);
273  W.write<uint32_t>(StringTableSize);
274
275  assert(W.OS.tell() - Start == sizeof(MachO::symtab_command));
276}
277
278void MachObjectWriter::writeDysymtabLoadCommand(uint32_t FirstLocalSymbol,
279                                                uint32_t NumLocalSymbols,
280                                                uint32_t FirstExternalSymbol,
281                                                uint32_t NumExternalSymbols,
282                                                uint32_t FirstUndefinedSymbol,
283                                                uint32_t NumUndefinedSymbols,
284                                                uint32_t IndirectSymbolOffset,
285                                                uint32_t NumIndirectSymbols) {
286  // struct dysymtab_command (80 bytes)
287
288  uint64_t Start = W.OS.tell();
289  (void) Start;
290
291  W.write<uint32_t>(MachO::LC_DYSYMTAB);
292  W.write<uint32_t>(sizeof(MachO::dysymtab_command));
293  W.write<uint32_t>(FirstLocalSymbol);
294  W.write<uint32_t>(NumLocalSymbols);
295  W.write<uint32_t>(FirstExternalSymbol);
296  W.write<uint32_t>(NumExternalSymbols);
297  W.write<uint32_t>(FirstUndefinedSymbol);
298  W.write<uint32_t>(NumUndefinedSymbols);
299  W.write<uint32_t>(0); // tocoff
300  W.write<uint32_t>(0); // ntoc
301  W.write<uint32_t>(0); // modtaboff
302  W.write<uint32_t>(0); // nmodtab
303  W.write<uint32_t>(0); // extrefsymoff
304  W.write<uint32_t>(0); // nextrefsyms
305  W.write<uint32_t>(IndirectSymbolOffset);
306  W.write<uint32_t>(NumIndirectSymbols);
307  W.write<uint32_t>(0); // extreloff
308  W.write<uint32_t>(0); // nextrel
309  W.write<uint32_t>(0); // locreloff
310  W.write<uint32_t>(0); // nlocrel
311
312  assert(W.OS.tell() - Start == sizeof(MachO::dysymtab_command));
313}
314
315MachObjectWriter::MachSymbolData *
316MachObjectWriter::findSymbolData(const MCSymbol &Sym) {
317  for (auto *SymbolData :
318       {&LocalSymbolData, &ExternalSymbolData, &UndefinedSymbolData})
319    for (MachSymbolData &Entry : *SymbolData)
320      if (Entry.Symbol == &Sym)
321        return &Entry;
322
323  return nullptr;
324}
325
326const MCSymbol &MachObjectWriter::findAliasedSymbol(const MCSymbol &Sym) const {
327  const MCSymbol *S = &Sym;
328  while (S->isVariable()) {
329    const MCExpr *Value = S->getVariableValue();
330    const auto *Ref = dyn_cast<MCSymbolRefExpr>(Value);
331    if (!Ref)
332      return *S;
333    S = &Ref->getSymbol();
334  }
335  return *S;
336}
337
338void MachObjectWriter::writeNlist(MachSymbolData &MSD,
339                                  const MCAsmLayout &Layout) {
340  const MCSymbol *Symbol = MSD.Symbol;
341  const MCSymbol &Data = *Symbol;
342  const MCSymbol *AliasedSymbol = &findAliasedSymbol(*Symbol);
343  uint8_t SectionIndex = MSD.SectionIndex;
344  uint8_t Type = 0;
345  uint64_t Address = 0;
346  bool IsAlias = Symbol != AliasedSymbol;
347
348  const MCSymbol &OrigSymbol = *Symbol;
349  MachSymbolData *AliaseeInfo;
350  if (IsAlias) {
351    AliaseeInfo = findSymbolData(*AliasedSymbol);
352    if (AliaseeInfo)
353      SectionIndex = AliaseeInfo->SectionIndex;
354    Symbol = AliasedSymbol;
355    // FIXME: Should this update Data as well?
356  }
357
358  // Set the N_TYPE bits. See <mach-o/nlist.h>.
359  //
360  // FIXME: Are the prebound or indirect fields possible here?
361  if (IsAlias && Symbol->isUndefined())
362    Type = MachO::N_INDR;
363  else if (Symbol->isUndefined())
364    Type = MachO::N_UNDF;
365  else if (Symbol->isAbsolute())
366    Type = MachO::N_ABS;
367  else
368    Type = MachO::N_SECT;
369
370  // FIXME: Set STAB bits.
371
372  if (Data.isPrivateExtern())
373    Type |= MachO::N_PEXT;
374
375  // Set external bit.
376  if (Data.isExternal() || (!IsAlias && Symbol->isUndefined()))
377    Type |= MachO::N_EXT;
378
379  // Compute the symbol address.
380  if (IsAlias && Symbol->isUndefined())
381    Address = AliaseeInfo->StringIndex;
382  else if (Symbol->isDefined())
383    Address = getSymbolAddress(OrigSymbol, Layout);
384  else if (Symbol->isCommon()) {
385    // Common symbols are encoded with the size in the address
386    // field, and their alignment in the flags.
387    Address = Symbol->getCommonSize();
388  }
389
390  // struct nlist (12 bytes)
391
392  W.write<uint32_t>(MSD.StringIndex);
393  W.OS << char(Type);
394  W.OS << char(SectionIndex);
395
396  // The Mach-O streamer uses the lowest 16-bits of the flags for the 'desc'
397  // value.
398  bool EncodeAsAltEntry =
399    IsAlias && cast<MCSymbolMachO>(OrigSymbol).isAltEntry();
400  W.write<uint16_t>(cast<MCSymbolMachO>(Symbol)->getEncodedFlags(EncodeAsAltEntry));
401  if (is64Bit())
402    W.write<uint64_t>(Address);
403  else
404    W.write<uint32_t>(Address);
405}
406
407void MachObjectWriter::writeLinkeditLoadCommand(uint32_t Type,
408                                                uint32_t DataOffset,
409                                                uint32_t DataSize) {
410  uint64_t Start = W.OS.tell();
411  (void) Start;
412
413  W.write<uint32_t>(Type);
414  W.write<uint32_t>(sizeof(MachO::linkedit_data_command));
415  W.write<uint32_t>(DataOffset);
416  W.write<uint32_t>(DataSize);
417
418  assert(W.OS.tell() - Start == sizeof(MachO::linkedit_data_command));
419}
420
421static unsigned ComputeLinkerOptionsLoadCommandSize(
422  const std::vector<std::string> &Options, bool is64Bit)
423{
424  unsigned Size = sizeof(MachO::linker_option_command);
425  for (const std::string &Option : Options)
426    Size += Option.size() + 1;
427  return alignTo(Size, is64Bit ? 8 : 4);
428}
429
430void MachObjectWriter::writeLinkerOptionsLoadCommand(
431  const std::vector<std::string> &Options)
432{
433  unsigned Size = ComputeLinkerOptionsLoadCommandSize(Options, is64Bit());
434  uint64_t Start = W.OS.tell();
435  (void) Start;
436
437  W.write<uint32_t>(MachO::LC_LINKER_OPTION);
438  W.write<uint32_t>(Size);
439  W.write<uint32_t>(Options.size());
440  uint64_t BytesWritten = sizeof(MachO::linker_option_command);
441  for (const std::string &Option : Options) {
442    // Write each string, including the null byte.
443    W.OS << Option << '\0';
444    BytesWritten += Option.size() + 1;
445  }
446
447  // Pad to a multiple of the pointer size.
448  W.OS.write_zeros(
449      offsetToAlignment(BytesWritten, is64Bit() ? Align(8) : Align(4)));
450
451  assert(W.OS.tell() - Start == Size);
452}
453
454static bool isFixupTargetValid(const MCValue &Target) {
455  // Target is (LHS - RHS + cst).
456  // We don't support the form where LHS is null: -RHS + cst
457  if (!Target.getSymA() && Target.getSymB())
458    return false;
459  return true;
460}
461
462void MachObjectWriter::recordRelocation(MCAssembler &Asm,
463                                        const MCAsmLayout &Layout,
464                                        const MCFragment *Fragment,
465                                        const MCFixup &Fixup, MCValue Target,
466                                        uint64_t &FixedValue) {
467  if (!isFixupTargetValid(Target)) {
468    Asm.getContext().reportError(Fixup.getLoc(),
469                                 "unsupported relocation expression");
470    return;
471  }
472
473  TargetObjectWriter->recordRelocation(this, Asm, Layout, Fragment, Fixup,
474                                       Target, FixedValue);
475}
476
477void MachObjectWriter::bindIndirectSymbols(MCAssembler &Asm) {
478  // This is the point where 'as' creates actual symbols for indirect symbols
479  // (in the following two passes). It would be easier for us to do this sooner
480  // when we see the attribute, but that makes getting the order in the symbol
481  // table much more complicated than it is worth.
482  //
483  // FIXME: Revisit this when the dust settles.
484
485  // Report errors for use of .indirect_symbol not in a symbol pointer section
486  // or stub section.
487  for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(),
488         ie = Asm.indirect_symbol_end(); it != ie; ++it) {
489    const MCSectionMachO &Section = cast<MCSectionMachO>(*it->Section);
490
491    if (Section.getType() != MachO::S_NON_LAZY_SYMBOL_POINTERS &&
492        Section.getType() != MachO::S_LAZY_SYMBOL_POINTERS &&
493        Section.getType() != MachO::S_THREAD_LOCAL_VARIABLE_POINTERS &&
494        Section.getType() != MachO::S_SYMBOL_STUBS) {
495      MCSymbol &Symbol = *it->Symbol;
496      report_fatal_error("indirect symbol '" + Symbol.getName() +
497                         "' not in a symbol pointer or stub section");
498    }
499  }
500
501  // Bind non-lazy symbol pointers first.
502  unsigned IndirectIndex = 0;
503  for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(),
504         ie = Asm.indirect_symbol_end(); it != ie; ++it, ++IndirectIndex) {
505    const MCSectionMachO &Section = cast<MCSectionMachO>(*it->Section);
506
507    if (Section.getType() != MachO::S_NON_LAZY_SYMBOL_POINTERS &&
508        Section.getType() !=  MachO::S_THREAD_LOCAL_VARIABLE_POINTERS)
509      continue;
510
511    // Initialize the section indirect symbol base, if necessary.
512    IndirectSymBase.insert(std::make_pair(it->Section, IndirectIndex));
513
514    Asm.registerSymbol(*it->Symbol);
515  }
516
517  // Then lazy symbol pointers and symbol stubs.
518  IndirectIndex = 0;
519  for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(),
520         ie = Asm.indirect_symbol_end(); it != ie; ++it, ++IndirectIndex) {
521    const MCSectionMachO &Section = cast<MCSectionMachO>(*it->Section);
522
523    if (Section.getType() != MachO::S_LAZY_SYMBOL_POINTERS &&
524        Section.getType() != MachO::S_SYMBOL_STUBS)
525      continue;
526
527    // Initialize the section indirect symbol base, if necessary.
528    IndirectSymBase.insert(std::make_pair(it->Section, IndirectIndex));
529
530    // Set the symbol type to undefined lazy, but only on construction.
531    //
532    // FIXME: Do not hardcode.
533    bool Created;
534    Asm.registerSymbol(*it->Symbol, &Created);
535    if (Created)
536      cast<MCSymbolMachO>(it->Symbol)->setReferenceTypeUndefinedLazy(true);
537  }
538}
539
540/// computeSymbolTable - Compute the symbol table data
541void MachObjectWriter::computeSymbolTable(
542    MCAssembler &Asm, std::vector<MachSymbolData> &LocalSymbolData,
543    std::vector<MachSymbolData> &ExternalSymbolData,
544    std::vector<MachSymbolData> &UndefinedSymbolData) {
545  // Build section lookup table.
546  DenseMap<const MCSection*, uint8_t> SectionIndexMap;
547  unsigned Index = 1;
548  for (MCAssembler::iterator it = Asm.begin(),
549         ie = Asm.end(); it != ie; ++it, ++Index)
550    SectionIndexMap[&*it] = Index;
551  assert(Index <= 256 && "Too many sections!");
552
553  // Build the string table.
554  for (const MCSymbol &Symbol : Asm.symbols()) {
555    if (!Asm.isSymbolLinkerVisible(Symbol))
556      continue;
557
558    StringTable.add(Symbol.getName());
559  }
560  StringTable.finalize();
561
562  // Build the symbol arrays but only for non-local symbols.
563  //
564  // The particular order that we collect and then sort the symbols is chosen to
565  // match 'as'. Even though it doesn't matter for correctness, this is
566  // important for letting us diff .o files.
567  for (const MCSymbol &Symbol : Asm.symbols()) {
568    // Ignore non-linker visible symbols.
569    if (!Asm.isSymbolLinkerVisible(Symbol))
570      continue;
571
572    if (!Symbol.isExternal() && !Symbol.isUndefined())
573      continue;
574
575    MachSymbolData MSD;
576    MSD.Symbol = &Symbol;
577    MSD.StringIndex = StringTable.getOffset(Symbol.getName());
578
579    if (Symbol.isUndefined()) {
580      MSD.SectionIndex = 0;
581      UndefinedSymbolData.push_back(MSD);
582    } else if (Symbol.isAbsolute()) {
583      MSD.SectionIndex = 0;
584      ExternalSymbolData.push_back(MSD);
585    } else {
586      MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection());
587      assert(MSD.SectionIndex && "Invalid section index!");
588      ExternalSymbolData.push_back(MSD);
589    }
590  }
591
592  // Now add the data for local symbols.
593  for (const MCSymbol &Symbol : Asm.symbols()) {
594    // Ignore non-linker visible symbols.
595    if (!Asm.isSymbolLinkerVisible(Symbol))
596      continue;
597
598    if (Symbol.isExternal() || Symbol.isUndefined())
599      continue;
600
601    MachSymbolData MSD;
602    MSD.Symbol = &Symbol;
603    MSD.StringIndex = StringTable.getOffset(Symbol.getName());
604
605    if (Symbol.isAbsolute()) {
606      MSD.SectionIndex = 0;
607      LocalSymbolData.push_back(MSD);
608    } else {
609      MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection());
610      assert(MSD.SectionIndex && "Invalid section index!");
611      LocalSymbolData.push_back(MSD);
612    }
613  }
614
615  // External and undefined symbols are required to be in lexicographic order.
616  llvm::sort(ExternalSymbolData);
617  llvm::sort(UndefinedSymbolData);
618
619  // Set the symbol indices.
620  Index = 0;
621  for (auto *SymbolData :
622       {&LocalSymbolData, &ExternalSymbolData, &UndefinedSymbolData})
623    for (MachSymbolData &Entry : *SymbolData)
624      Entry.Symbol->setIndex(Index++);
625
626  for (const MCSection &Section : Asm) {
627    for (RelAndSymbol &Rel : Relocations[&Section]) {
628      if (!Rel.Sym)
629        continue;
630
631      // Set the Index and the IsExtern bit.
632      unsigned Index = Rel.Sym->getIndex();
633      assert(isInt<24>(Index));
634      if (W.Endian == support::little)
635        Rel.MRE.r_word1 = (Rel.MRE.r_word1 & (~0U << 24)) | Index | (1 << 27);
636      else
637        Rel.MRE.r_word1 = (Rel.MRE.r_word1 & 0xff) | Index << 8 | (1 << 4);
638    }
639  }
640}
641
642void MachObjectWriter::computeSectionAddresses(const MCAssembler &Asm,
643                                               const MCAsmLayout &Layout) {
644  uint64_t StartAddress = 0;
645  for (const MCSection *Sec : Layout.getSectionOrder()) {
646    StartAddress = alignTo(StartAddress, Sec->getAlignment());
647    SectionAddress[Sec] = StartAddress;
648    StartAddress += Layout.getSectionAddressSize(Sec);
649
650    // Explicitly pad the section to match the alignment requirements of the
651    // following one. This is for 'gas' compatibility, it shouldn't
652    /// strictly be necessary.
653    StartAddress += getPaddingSize(Sec, Layout);
654  }
655}
656
657void MachObjectWriter::executePostLayoutBinding(MCAssembler &Asm,
658                                                const MCAsmLayout &Layout) {
659  computeSectionAddresses(Asm, Layout);
660
661  // Create symbol data for any indirect symbols.
662  bindIndirectSymbols(Asm);
663}
664
665bool MachObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(
666    const MCAssembler &Asm, const MCSymbol &A, const MCSymbol &B,
667    bool InSet) const {
668  // FIXME: We don't handle things like
669  // foo = .
670  // creating atoms.
671  if (A.isVariable() || B.isVariable())
672    return false;
673  return MCObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(Asm, A, B,
674                                                                InSet);
675}
676
677bool MachObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(
678    const MCAssembler &Asm, const MCSymbol &SymA, const MCFragment &FB,
679    bool InSet, bool IsPCRel) const {
680  if (InSet)
681    return true;
682
683  // The effective address is
684  //     addr(atom(A)) + offset(A)
685  //   - addr(atom(B)) - offset(B)
686  // and the offsets are not relocatable, so the fixup is fully resolved when
687  //  addr(atom(A)) - addr(atom(B)) == 0.
688  const MCSymbol &SA = findAliasedSymbol(SymA);
689  const MCSection &SecA = SA.getSection();
690  const MCSection &SecB = *FB.getParent();
691
692  if (IsPCRel) {
693    // The simple (Darwin, except on x86_64) way of dealing with this was to
694    // assume that any reference to a temporary symbol *must* be a temporary
695    // symbol in the same atom, unless the sections differ. Therefore, any PCrel
696    // relocation to a temporary symbol (in the same section) is fully
697    // resolved. This also works in conjunction with absolutized .set, which
698    // requires the compiler to use .set to absolutize the differences between
699    // symbols which the compiler knows to be assembly time constants, so we
700    // don't need to worry about considering symbol differences fully resolved.
701    //
702    // If the file isn't using sub-sections-via-symbols, we can make the
703    // same assumptions about any symbol that we normally make about
704    // assembler locals.
705
706    bool hasReliableSymbolDifference = isX86_64();
707    if (!hasReliableSymbolDifference) {
708      if (!SA.isInSection() || &SecA != &SecB ||
709          (!SA.isTemporary() && FB.getAtom() != SA.getFragment()->getAtom() &&
710           Asm.getSubsectionsViaSymbols()))
711        return false;
712      return true;
713    }
714    // For Darwin x86_64, there is one special case when the reference IsPCRel.
715    // If the fragment with the reference does not have a base symbol but meets
716    // the simple way of dealing with this, in that it is a temporary symbol in
717    // the same atom then it is assumed to be fully resolved.  This is needed so
718    // a relocation entry is not created and so the static linker does not
719    // mess up the reference later.
720    else if(!FB.getAtom() &&
721            SA.isTemporary() && SA.isInSection() && &SecA == &SecB){
722      return true;
723    }
724  }
725
726  // If they are not in the same section, we can't compute the diff.
727  if (&SecA != &SecB)
728    return false;
729
730  const MCFragment *FA = SA.getFragment();
731
732  // Bail if the symbol has no fragment.
733  if (!FA)
734    return false;
735
736  // If the atoms are the same, they are guaranteed to have the same address.
737  if (FA->getAtom() == FB.getAtom())
738    return true;
739
740  // Otherwise, we can't prove this is fully resolved.
741  return false;
742}
743
744static MachO::LoadCommandType getLCFromMCVM(MCVersionMinType Type) {
745  switch (Type) {
746  case MCVM_OSXVersionMin:     return MachO::LC_VERSION_MIN_MACOSX;
747  case MCVM_IOSVersionMin:     return MachO::LC_VERSION_MIN_IPHONEOS;
748  case MCVM_TvOSVersionMin:    return MachO::LC_VERSION_MIN_TVOS;
749  case MCVM_WatchOSVersionMin: return MachO::LC_VERSION_MIN_WATCHOS;
750  }
751  llvm_unreachable("Invalid mc version min type");
752}
753
754uint64_t MachObjectWriter::writeObject(MCAssembler &Asm,
755                                       const MCAsmLayout &Layout) {
756  uint64_t StartOffset = W.OS.tell();
757
758  // Compute symbol table information and bind symbol indices.
759  computeSymbolTable(Asm, LocalSymbolData, ExternalSymbolData,
760                     UndefinedSymbolData);
761
762  unsigned NumSections = Asm.size();
763  const MCAssembler::VersionInfoType &VersionInfo =
764    Layout.getAssembler().getVersionInfo();
765
766  // The section data starts after the header, the segment load command (and
767  // section headers) and the symbol table.
768  unsigned NumLoadCommands = 1;
769  uint64_t LoadCommandsSize = is64Bit() ?
770    sizeof(MachO::segment_command_64) + NumSections * sizeof(MachO::section_64):
771    sizeof(MachO::segment_command) + NumSections * sizeof(MachO::section);
772
773  // Add the deployment target version info load command size, if used.
774  if (VersionInfo.Major != 0) {
775    ++NumLoadCommands;
776    if (VersionInfo.EmitBuildVersion)
777      LoadCommandsSize += sizeof(MachO::build_version_command);
778    else
779      LoadCommandsSize += sizeof(MachO::version_min_command);
780  }
781
782  // Add the data-in-code load command size, if used.
783  unsigned NumDataRegions = Asm.getDataRegions().size();
784  if (NumDataRegions) {
785    ++NumLoadCommands;
786    LoadCommandsSize += sizeof(MachO::linkedit_data_command);
787  }
788
789  // Add the loh load command size, if used.
790  uint64_t LOHRawSize = Asm.getLOHContainer().getEmitSize(*this, Layout);
791  uint64_t LOHSize = alignTo(LOHRawSize, is64Bit() ? 8 : 4);
792  if (LOHSize) {
793    ++NumLoadCommands;
794    LoadCommandsSize += sizeof(MachO::linkedit_data_command);
795  }
796
797  // Add the symbol table load command sizes, if used.
798  unsigned NumSymbols = LocalSymbolData.size() + ExternalSymbolData.size() +
799    UndefinedSymbolData.size();
800  if (NumSymbols) {
801    NumLoadCommands += 2;
802    LoadCommandsSize += (sizeof(MachO::symtab_command) +
803                         sizeof(MachO::dysymtab_command));
804  }
805
806  // Add the linker option load commands sizes.
807  for (const auto &Option : Asm.getLinkerOptions()) {
808    ++NumLoadCommands;
809    LoadCommandsSize += ComputeLinkerOptionsLoadCommandSize(Option, is64Bit());
810  }
811
812  // Compute the total size of the section data, as well as its file size and vm
813  // size.
814  uint64_t SectionDataStart = (is64Bit() ? sizeof(MachO::mach_header_64) :
815                               sizeof(MachO::mach_header)) + LoadCommandsSize;
816  uint64_t SectionDataSize = 0;
817  uint64_t SectionDataFileSize = 0;
818  uint64_t VMSize = 0;
819  for (const MCSection &Sec : Asm) {
820    uint64_t Address = getSectionAddress(&Sec);
821    uint64_t Size = Layout.getSectionAddressSize(&Sec);
822    uint64_t FileSize = Layout.getSectionFileSize(&Sec);
823    FileSize += getPaddingSize(&Sec, Layout);
824
825    VMSize = std::max(VMSize, Address + Size);
826
827    if (Sec.isVirtualSection())
828      continue;
829
830    SectionDataSize = std::max(SectionDataSize, Address + Size);
831    SectionDataFileSize = std::max(SectionDataFileSize, Address + FileSize);
832  }
833
834  // The section data is padded to 4 bytes.
835  //
836  // FIXME: Is this machine dependent?
837  unsigned SectionDataPadding =
838      offsetToAlignment(SectionDataFileSize, Align(4));
839  SectionDataFileSize += SectionDataPadding;
840
841  // Write the prolog, starting with the header and load command...
842  writeHeader(MachO::MH_OBJECT, NumLoadCommands, LoadCommandsSize,
843              Asm.getSubsectionsViaSymbols());
844  uint32_t Prot =
845      MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE;
846  writeSegmentLoadCommand("", NumSections, 0, VMSize, SectionDataStart,
847                          SectionDataSize, Prot, Prot);
848
849  // ... and then the section headers.
850  uint64_t RelocTableEnd = SectionDataStart + SectionDataFileSize;
851  for (const MCSection &Section : Asm) {
852    const auto &Sec = cast<MCSectionMachO>(Section);
853    std::vector<RelAndSymbol> &Relocs = Relocations[&Sec];
854    unsigned NumRelocs = Relocs.size();
855    uint64_t SectionStart = SectionDataStart + getSectionAddress(&Sec);
856    unsigned Flags = Sec.getTypeAndAttributes();
857    if (Sec.hasInstructions())
858      Flags |= MachO::S_ATTR_SOME_INSTRUCTIONS;
859    writeSection(Layout, Sec, getSectionAddress(&Sec), SectionStart, Flags,
860                 RelocTableEnd, NumRelocs);
861    RelocTableEnd += NumRelocs * sizeof(MachO::any_relocation_info);
862  }
863
864  // Write out the deployment target information, if it's available.
865  if (VersionInfo.Major != 0) {
866    auto EncodeVersion = [](VersionTuple V) -> uint32_t {
867      assert(!V.empty() && "empty version");
868      unsigned Update = V.getSubminor() ? *V.getSubminor() : 0;
869      unsigned Minor = V.getMinor() ? *V.getMinor() : 0;
870      assert(Update < 256 && "unencodable update target version");
871      assert(Minor < 256 && "unencodable minor target version");
872      assert(V.getMajor() < 65536 && "unencodable major target version");
873      return Update | (Minor << 8) | (V.getMajor() << 16);
874    };
875    uint32_t EncodedVersion = EncodeVersion(
876        VersionTuple(VersionInfo.Major, VersionInfo.Minor, VersionInfo.Update));
877    uint32_t SDKVersion = !VersionInfo.SDKVersion.empty()
878                              ? EncodeVersion(VersionInfo.SDKVersion)
879                              : 0;
880    if (VersionInfo.EmitBuildVersion) {
881      // FIXME: Currently empty tools. Add clang version in the future.
882      W.write<uint32_t>(MachO::LC_BUILD_VERSION);
883      W.write<uint32_t>(sizeof(MachO::build_version_command));
884      W.write<uint32_t>(VersionInfo.TypeOrPlatform.Platform);
885      W.write<uint32_t>(EncodedVersion);
886      W.write<uint32_t>(SDKVersion);
887      W.write<uint32_t>(0);         // Empty tools list.
888    } else {
889      MachO::LoadCommandType LCType
890        = getLCFromMCVM(VersionInfo.TypeOrPlatform.Type);
891      W.write<uint32_t>(LCType);
892      W.write<uint32_t>(sizeof(MachO::version_min_command));
893      W.write<uint32_t>(EncodedVersion);
894      W.write<uint32_t>(SDKVersion);
895    }
896  }
897
898  // Write the data-in-code load command, if used.
899  uint64_t DataInCodeTableEnd = RelocTableEnd + NumDataRegions * 8;
900  if (NumDataRegions) {
901    uint64_t DataRegionsOffset = RelocTableEnd;
902    uint64_t DataRegionsSize = NumDataRegions * 8;
903    writeLinkeditLoadCommand(MachO::LC_DATA_IN_CODE, DataRegionsOffset,
904                             DataRegionsSize);
905  }
906
907  // Write the loh load command, if used.
908  uint64_t LOHTableEnd = DataInCodeTableEnd + LOHSize;
909  if (LOHSize)
910    writeLinkeditLoadCommand(MachO::LC_LINKER_OPTIMIZATION_HINT,
911                             DataInCodeTableEnd, LOHSize);
912
913  // Write the symbol table load command, if used.
914  if (NumSymbols) {
915    unsigned FirstLocalSymbol = 0;
916    unsigned NumLocalSymbols = LocalSymbolData.size();
917    unsigned FirstExternalSymbol = FirstLocalSymbol + NumLocalSymbols;
918    unsigned NumExternalSymbols = ExternalSymbolData.size();
919    unsigned FirstUndefinedSymbol = FirstExternalSymbol + NumExternalSymbols;
920    unsigned NumUndefinedSymbols = UndefinedSymbolData.size();
921    unsigned NumIndirectSymbols = Asm.indirect_symbol_size();
922    unsigned NumSymTabSymbols =
923      NumLocalSymbols + NumExternalSymbols + NumUndefinedSymbols;
924    uint64_t IndirectSymbolSize = NumIndirectSymbols * 4;
925    uint64_t IndirectSymbolOffset = 0;
926
927    // If used, the indirect symbols are written after the section data.
928    if (NumIndirectSymbols)
929      IndirectSymbolOffset = LOHTableEnd;
930
931    // The symbol table is written after the indirect symbol data.
932    uint64_t SymbolTableOffset = LOHTableEnd + IndirectSymbolSize;
933
934    // The string table is written after symbol table.
935    uint64_t StringTableOffset =
936      SymbolTableOffset + NumSymTabSymbols * (is64Bit() ?
937                                              sizeof(MachO::nlist_64) :
938                                              sizeof(MachO::nlist));
939    writeSymtabLoadCommand(SymbolTableOffset, NumSymTabSymbols,
940                           StringTableOffset, StringTable.getSize());
941
942    writeDysymtabLoadCommand(FirstLocalSymbol, NumLocalSymbols,
943                             FirstExternalSymbol, NumExternalSymbols,
944                             FirstUndefinedSymbol, NumUndefinedSymbols,
945                             IndirectSymbolOffset, NumIndirectSymbols);
946  }
947
948  // Write the linker options load commands.
949  for (const auto &Option : Asm.getLinkerOptions())
950    writeLinkerOptionsLoadCommand(Option);
951
952  // Write the actual section data.
953  for (const MCSection &Sec : Asm) {
954    Asm.writeSectionData(W.OS, &Sec, Layout);
955
956    uint64_t Pad = getPaddingSize(&Sec, Layout);
957    W.OS.write_zeros(Pad);
958  }
959
960  // Write the extra padding.
961  W.OS.write_zeros(SectionDataPadding);
962
963  // Write the relocation entries.
964  for (const MCSection &Sec : Asm) {
965    // Write the section relocation entries, in reverse order to match 'as'
966    // (approximately, the exact algorithm is more complicated than this).
967    std::vector<RelAndSymbol> &Relocs = Relocations[&Sec];
968    for (const RelAndSymbol &Rel : make_range(Relocs.rbegin(), Relocs.rend())) {
969      W.write<uint32_t>(Rel.MRE.r_word0);
970      W.write<uint32_t>(Rel.MRE.r_word1);
971    }
972  }
973
974  // Write out the data-in-code region payload, if there is one.
975  for (MCAssembler::const_data_region_iterator
976         it = Asm.data_region_begin(), ie = Asm.data_region_end();
977         it != ie; ++it) {
978    const DataRegionData *Data = &(*it);
979    uint64_t Start = getSymbolAddress(*Data->Start, Layout);
980    uint64_t End;
981    if (Data->End)
982      End = getSymbolAddress(*Data->End, Layout);
983    else
984      report_fatal_error("Data region not terminated");
985
986    LLVM_DEBUG(dbgs() << "data in code region-- kind: " << Data->Kind
987                      << "  start: " << Start << "(" << Data->Start->getName()
988                      << ")"
989                      << "  end: " << End << "(" << Data->End->getName() << ")"
990                      << "  size: " << End - Start << "\n");
991    W.write<uint32_t>(Start);
992    W.write<uint16_t>(End - Start);
993    W.write<uint16_t>(Data->Kind);
994  }
995
996  // Write out the loh commands, if there is one.
997  if (LOHSize) {
998#ifndef NDEBUG
999    unsigned Start = W.OS.tell();
1000#endif
1001    Asm.getLOHContainer().emit(*this, Layout);
1002    // Pad to a multiple of the pointer size.
1003    W.OS.write_zeros(
1004        offsetToAlignment(LOHRawSize, is64Bit() ? Align(8) : Align(4)));
1005    assert(W.OS.tell() - Start == LOHSize);
1006  }
1007
1008  // Write the symbol table data, if used.
1009  if (NumSymbols) {
1010    // Write the indirect symbol entries.
1011    for (MCAssembler::const_indirect_symbol_iterator
1012           it = Asm.indirect_symbol_begin(),
1013           ie = Asm.indirect_symbol_end(); it != ie; ++it) {
1014      // Indirect symbols in the non-lazy symbol pointer section have some
1015      // special handling.
1016      const MCSectionMachO &Section =
1017          static_cast<const MCSectionMachO &>(*it->Section);
1018      if (Section.getType() == MachO::S_NON_LAZY_SYMBOL_POINTERS) {
1019        // If this symbol is defined and internal, mark it as such.
1020        if (it->Symbol->isDefined() && !it->Symbol->isExternal()) {
1021          uint32_t Flags = MachO::INDIRECT_SYMBOL_LOCAL;
1022          if (it->Symbol->isAbsolute())
1023            Flags |= MachO::INDIRECT_SYMBOL_ABS;
1024          W.write<uint32_t>(Flags);
1025          continue;
1026        }
1027      }
1028
1029      W.write<uint32_t>(it->Symbol->getIndex());
1030    }
1031
1032    // FIXME: Check that offsets match computed ones.
1033
1034    // Write the symbol table entries.
1035    for (auto *SymbolData :
1036         {&LocalSymbolData, &ExternalSymbolData, &UndefinedSymbolData})
1037      for (MachSymbolData &Entry : *SymbolData)
1038        writeNlist(Entry, Layout);
1039
1040    // Write the string table.
1041    StringTable.write(W.OS);
1042  }
1043
1044  return W.OS.tell() - StartOffset;
1045}
1046
1047std::unique_ptr<MCObjectWriter>
1048llvm::createMachObjectWriter(std::unique_ptr<MCMachObjectTargetWriter> MOTW,
1049                             raw_pwrite_stream &OS, bool IsLittleEndian) {
1050  return std::make_unique<MachObjectWriter>(std::move(MOTW), OS,
1051                                             IsLittleEndian);
1052}
1053