MachOWriter.cpp revision 360784
1//===- MachOWriter.cpp ------------------------------------------*- C++ -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9#include "MachOWriter.h" 10#include "MachOLayoutBuilder.h" 11#include "Object.h" 12#include "llvm/ADT/STLExtras.h" 13#include "llvm/BinaryFormat/MachO.h" 14#include "llvm/Object/MachO.h" 15#include "llvm/Support/Errc.h" 16#include "llvm/Support/ErrorHandling.h" 17#include <memory> 18 19namespace llvm { 20namespace objcopy { 21namespace macho { 22 23size_t MachOWriter::headerSize() const { 24 return Is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header); 25} 26 27size_t MachOWriter::loadCommandsSize() const { return O.Header.SizeOfCmds; } 28 29size_t MachOWriter::symTableSize() const { 30 return O.SymTable.Symbols.size() * 31 (Is64Bit ? sizeof(MachO::nlist_64) : sizeof(MachO::nlist)); 32} 33 34size_t MachOWriter::totalSize() const { 35 // Going from tail to head and looking for an appropriate "anchor" to 36 // calculate the total size assuming that all the offsets are either valid 37 // ("true") or 0 (0 indicates that the corresponding part is missing). 38 39 SmallVector<size_t, 7> Ends; 40 if (O.SymTabCommandIndex) { 41 const MachO::symtab_command &SymTabCommand = 42 O.LoadCommands[*O.SymTabCommandIndex] 43 .MachOLoadCommand.symtab_command_data; 44 if (SymTabCommand.symoff) 45 Ends.push_back(SymTabCommand.symoff + symTableSize()); 46 if (SymTabCommand.stroff) 47 Ends.push_back(SymTabCommand.stroff + SymTabCommand.strsize); 48 } 49 if (O.DyLdInfoCommandIndex) { 50 const MachO::dyld_info_command &DyLdInfoCommand = 51 O.LoadCommands[*O.DyLdInfoCommandIndex] 52 .MachOLoadCommand.dyld_info_command_data; 53 if (DyLdInfoCommand.rebase_off) { 54 assert((DyLdInfoCommand.rebase_size == O.Rebases.Opcodes.size()) && 55 "Incorrect rebase opcodes size"); 56 Ends.push_back(DyLdInfoCommand.rebase_off + DyLdInfoCommand.rebase_size); 57 } 58 if (DyLdInfoCommand.bind_off) { 59 assert((DyLdInfoCommand.bind_size == O.Binds.Opcodes.size()) && 60 "Incorrect bind opcodes size"); 61 Ends.push_back(DyLdInfoCommand.bind_off + DyLdInfoCommand.bind_size); 62 } 63 if (DyLdInfoCommand.weak_bind_off) { 64 assert((DyLdInfoCommand.weak_bind_size == O.WeakBinds.Opcodes.size()) && 65 "Incorrect weak bind opcodes size"); 66 Ends.push_back(DyLdInfoCommand.weak_bind_off + 67 DyLdInfoCommand.weak_bind_size); 68 } 69 if (DyLdInfoCommand.lazy_bind_off) { 70 assert((DyLdInfoCommand.lazy_bind_size == O.LazyBinds.Opcodes.size()) && 71 "Incorrect lazy bind opcodes size"); 72 Ends.push_back(DyLdInfoCommand.lazy_bind_off + 73 DyLdInfoCommand.lazy_bind_size); 74 } 75 if (DyLdInfoCommand.export_off) { 76 assert((DyLdInfoCommand.export_size == O.Exports.Trie.size()) && 77 "Incorrect trie size"); 78 Ends.push_back(DyLdInfoCommand.export_off + DyLdInfoCommand.export_size); 79 } 80 } 81 82 if (O.DySymTabCommandIndex) { 83 const MachO::dysymtab_command &DySymTabCommand = 84 O.LoadCommands[*O.DySymTabCommandIndex] 85 .MachOLoadCommand.dysymtab_command_data; 86 87 if (DySymTabCommand.indirectsymoff) 88 Ends.push_back(DySymTabCommand.indirectsymoff + 89 sizeof(uint32_t) * O.IndirectSymTable.Symbols.size()); 90 } 91 92 if (O.DataInCodeCommandIndex) { 93 const MachO::linkedit_data_command &LinkEditDataCommand = 94 O.LoadCommands[*O.DataInCodeCommandIndex] 95 .MachOLoadCommand.linkedit_data_command_data; 96 97 if (LinkEditDataCommand.dataoff) 98 Ends.push_back(LinkEditDataCommand.dataoff + 99 LinkEditDataCommand.datasize); 100 } 101 102 if (O.FunctionStartsCommandIndex) { 103 const MachO::linkedit_data_command &LinkEditDataCommand = 104 O.LoadCommands[*O.FunctionStartsCommandIndex] 105 .MachOLoadCommand.linkedit_data_command_data; 106 107 if (LinkEditDataCommand.dataoff) 108 Ends.push_back(LinkEditDataCommand.dataoff + 109 LinkEditDataCommand.datasize); 110 } 111 112 // Otherwise, use the last section / reloction. 113 for (const auto &LC : O.LoadCommands) 114 for (const auto &S : LC.Sections) { 115 Ends.push_back(S.Offset + S.Size); 116 if (S.RelOff) 117 Ends.push_back(S.RelOff + 118 S.NReloc * sizeof(MachO::any_relocation_info)); 119 } 120 121 if (!Ends.empty()) 122 return *std::max_element(Ends.begin(), Ends.end()); 123 124 // Otherwise, we have only Mach header and load commands. 125 return headerSize() + loadCommandsSize(); 126} 127 128void MachOWriter::writeHeader() { 129 MachO::mach_header_64 Header; 130 131 Header.magic = O.Header.Magic; 132 Header.cputype = O.Header.CPUType; 133 Header.cpusubtype = O.Header.CPUSubType; 134 Header.filetype = O.Header.FileType; 135 Header.ncmds = O.Header.NCmds; 136 Header.sizeofcmds = O.Header.SizeOfCmds; 137 Header.flags = O.Header.Flags; 138 Header.reserved = O.Header.Reserved; 139 140 if (IsLittleEndian != sys::IsLittleEndianHost) 141 MachO::swapStruct(Header); 142 143 auto HeaderSize = 144 Is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header); 145 memcpy(B.getBufferStart(), &Header, HeaderSize); 146} 147 148void MachOWriter::writeLoadCommands() { 149 uint8_t *Begin = B.getBufferStart() + headerSize(); 150 for (const auto &LC : O.LoadCommands) { 151 // Construct a load command. 152 MachO::macho_load_command MLC = LC.MachOLoadCommand; 153 switch (MLC.load_command_data.cmd) { 154 case MachO::LC_SEGMENT: 155 if (IsLittleEndian != sys::IsLittleEndianHost) 156 MachO::swapStruct(MLC.segment_command_data); 157 memcpy(Begin, &MLC.segment_command_data, sizeof(MachO::segment_command)); 158 Begin += sizeof(MachO::segment_command); 159 160 for (const auto &Sec : LC.Sections) 161 writeSectionInLoadCommand<MachO::section>(Sec, Begin); 162 continue; 163 case MachO::LC_SEGMENT_64: 164 if (IsLittleEndian != sys::IsLittleEndianHost) 165 MachO::swapStruct(MLC.segment_command_64_data); 166 memcpy(Begin, &MLC.segment_command_64_data, 167 sizeof(MachO::segment_command_64)); 168 Begin += sizeof(MachO::segment_command_64); 169 170 for (const auto &Sec : LC.Sections) 171 writeSectionInLoadCommand<MachO::section_64>(Sec, Begin); 172 continue; 173 } 174 175#define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \ 176 case MachO::LCName: \ 177 assert(sizeof(MachO::LCStruct) + LC.Payload.size() == \ 178 MLC.load_command_data.cmdsize); \ 179 if (IsLittleEndian != sys::IsLittleEndianHost) \ 180 MachO::swapStruct(MLC.LCStruct##_data); \ 181 memcpy(Begin, &MLC.LCStruct##_data, sizeof(MachO::LCStruct)); \ 182 Begin += sizeof(MachO::LCStruct); \ 183 if (!LC.Payload.empty()) \ 184 memcpy(Begin, LC.Payload.data(), LC.Payload.size()); \ 185 Begin += LC.Payload.size(); \ 186 break; 187 188 // Copy the load command as it is. 189 switch (MLC.load_command_data.cmd) { 190 default: 191 assert(sizeof(MachO::load_command) + LC.Payload.size() == 192 MLC.load_command_data.cmdsize); 193 if (IsLittleEndian != sys::IsLittleEndianHost) 194 MachO::swapStruct(MLC.load_command_data); 195 memcpy(Begin, &MLC.load_command_data, sizeof(MachO::load_command)); 196 Begin += sizeof(MachO::load_command); 197 if (!LC.Payload.empty()) 198 memcpy(Begin, LC.Payload.data(), LC.Payload.size()); 199 Begin += LC.Payload.size(); 200 break; 201#include "llvm/BinaryFormat/MachO.def" 202 } 203 } 204} 205 206template <typename StructType> 207void MachOWriter::writeSectionInLoadCommand(const Section &Sec, uint8_t *&Out) { 208 StructType Temp; 209 assert(Sec.Segname.size() <= sizeof(Temp.segname) && "too long segment name"); 210 assert(Sec.Sectname.size() <= sizeof(Temp.sectname) && 211 "too long section name"); 212 memset(&Temp, 0, sizeof(StructType)); 213 memcpy(Temp.segname, Sec.Segname.data(), Sec.Segname.size()); 214 memcpy(Temp.sectname, Sec.Sectname.data(), Sec.Sectname.size()); 215 Temp.addr = Sec.Addr; 216 Temp.size = Sec.Size; 217 Temp.offset = Sec.Offset; 218 Temp.align = Sec.Align; 219 Temp.reloff = Sec.RelOff; 220 Temp.nreloc = Sec.NReloc; 221 Temp.flags = Sec.Flags; 222 Temp.reserved1 = Sec.Reserved1; 223 Temp.reserved2 = Sec.Reserved2; 224 225 if (IsLittleEndian != sys::IsLittleEndianHost) 226 MachO::swapStruct(Temp); 227 memcpy(Out, &Temp, sizeof(StructType)); 228 Out += sizeof(StructType); 229} 230 231void MachOWriter::writeSections() { 232 for (const auto &LC : O.LoadCommands) 233 for (const auto &Sec : LC.Sections) { 234 if (Sec.isVirtualSection()) 235 continue; 236 237 assert(Sec.Offset && "Section offset can not be zero"); 238 assert((Sec.Size == Sec.Content.size()) && "Incorrect section size"); 239 memcpy(B.getBufferStart() + Sec.Offset, Sec.Content.data(), 240 Sec.Content.size()); 241 for (size_t Index = 0; Index < Sec.Relocations.size(); ++Index) { 242 auto RelocInfo = Sec.Relocations[Index]; 243 if (!RelocInfo.Scattered) { 244 auto *Info = 245 reinterpret_cast<MachO::relocation_info *>(&RelocInfo.Info); 246 Info->r_symbolnum = RelocInfo.Symbol->Index; 247 } 248 249 if (IsLittleEndian != sys::IsLittleEndianHost) 250 MachO::swapStruct( 251 reinterpret_cast<MachO::any_relocation_info &>(RelocInfo.Info)); 252 memcpy(B.getBufferStart() + Sec.RelOff + 253 Index * sizeof(MachO::any_relocation_info), 254 &RelocInfo.Info, sizeof(RelocInfo.Info)); 255 } 256 } 257} 258 259template <typename NListType> 260void writeNListEntry(const SymbolEntry &SE, bool IsLittleEndian, char *&Out, 261 uint32_t Nstrx) { 262 NListType ListEntry; 263 ListEntry.n_strx = Nstrx; 264 ListEntry.n_type = SE.n_type; 265 ListEntry.n_sect = SE.n_sect; 266 ListEntry.n_desc = SE.n_desc; 267 ListEntry.n_value = SE.n_value; 268 269 if (IsLittleEndian != sys::IsLittleEndianHost) 270 MachO::swapStruct(ListEntry); 271 memcpy(Out, reinterpret_cast<const char *>(&ListEntry), sizeof(NListType)); 272 Out += sizeof(NListType); 273} 274 275void MachOWriter::writeStringTable() { 276 if (!O.SymTabCommandIndex) 277 return; 278 const MachO::symtab_command &SymTabCommand = 279 O.LoadCommands[*O.SymTabCommandIndex] 280 .MachOLoadCommand.symtab_command_data; 281 282 uint8_t *StrTable = (uint8_t *)B.getBufferStart() + SymTabCommand.stroff; 283 LayoutBuilder.getStringTableBuilder().write(StrTable); 284} 285 286void MachOWriter::writeSymbolTable() { 287 if (!O.SymTabCommandIndex) 288 return; 289 const MachO::symtab_command &SymTabCommand = 290 O.LoadCommands[*O.SymTabCommandIndex] 291 .MachOLoadCommand.symtab_command_data; 292 293 char *SymTable = (char *)B.getBufferStart() + SymTabCommand.symoff; 294 for (auto Iter = O.SymTable.Symbols.begin(), End = O.SymTable.Symbols.end(); 295 Iter != End; Iter++) { 296 SymbolEntry *Sym = Iter->get(); 297 uint32_t Nstrx = LayoutBuilder.getStringTableBuilder().getOffset(Sym->Name); 298 299 if (Is64Bit) 300 writeNListEntry<MachO::nlist_64>(*Sym, IsLittleEndian, SymTable, Nstrx); 301 else 302 writeNListEntry<MachO::nlist>(*Sym, IsLittleEndian, SymTable, Nstrx); 303 } 304} 305 306void MachOWriter::writeRebaseInfo() { 307 if (!O.DyLdInfoCommandIndex) 308 return; 309 const MachO::dyld_info_command &DyLdInfoCommand = 310 O.LoadCommands[*O.DyLdInfoCommandIndex] 311 .MachOLoadCommand.dyld_info_command_data; 312 char *Out = (char *)B.getBufferStart() + DyLdInfoCommand.rebase_off; 313 assert((DyLdInfoCommand.rebase_size == O.Rebases.Opcodes.size()) && 314 "Incorrect rebase opcodes size"); 315 memcpy(Out, O.Rebases.Opcodes.data(), O.Rebases.Opcodes.size()); 316} 317 318void MachOWriter::writeBindInfo() { 319 if (!O.DyLdInfoCommandIndex) 320 return; 321 const MachO::dyld_info_command &DyLdInfoCommand = 322 O.LoadCommands[*O.DyLdInfoCommandIndex] 323 .MachOLoadCommand.dyld_info_command_data; 324 char *Out = (char *)B.getBufferStart() + DyLdInfoCommand.bind_off; 325 assert((DyLdInfoCommand.bind_size == O.Binds.Opcodes.size()) && 326 "Incorrect bind opcodes size"); 327 memcpy(Out, O.Binds.Opcodes.data(), O.Binds.Opcodes.size()); 328} 329 330void MachOWriter::writeWeakBindInfo() { 331 if (!O.DyLdInfoCommandIndex) 332 return; 333 const MachO::dyld_info_command &DyLdInfoCommand = 334 O.LoadCommands[*O.DyLdInfoCommandIndex] 335 .MachOLoadCommand.dyld_info_command_data; 336 char *Out = (char *)B.getBufferStart() + DyLdInfoCommand.weak_bind_off; 337 assert((DyLdInfoCommand.weak_bind_size == O.WeakBinds.Opcodes.size()) && 338 "Incorrect weak bind opcodes size"); 339 memcpy(Out, O.WeakBinds.Opcodes.data(), O.WeakBinds.Opcodes.size()); 340} 341 342void MachOWriter::writeLazyBindInfo() { 343 if (!O.DyLdInfoCommandIndex) 344 return; 345 const MachO::dyld_info_command &DyLdInfoCommand = 346 O.LoadCommands[*O.DyLdInfoCommandIndex] 347 .MachOLoadCommand.dyld_info_command_data; 348 char *Out = (char *)B.getBufferStart() + DyLdInfoCommand.lazy_bind_off; 349 assert((DyLdInfoCommand.lazy_bind_size == O.LazyBinds.Opcodes.size()) && 350 "Incorrect lazy bind opcodes size"); 351 memcpy(Out, O.LazyBinds.Opcodes.data(), O.LazyBinds.Opcodes.size()); 352} 353 354void MachOWriter::writeExportInfo() { 355 if (!O.DyLdInfoCommandIndex) 356 return; 357 const MachO::dyld_info_command &DyLdInfoCommand = 358 O.LoadCommands[*O.DyLdInfoCommandIndex] 359 .MachOLoadCommand.dyld_info_command_data; 360 char *Out = (char *)B.getBufferStart() + DyLdInfoCommand.export_off; 361 assert((DyLdInfoCommand.export_size == O.Exports.Trie.size()) && 362 "Incorrect export trie size"); 363 memcpy(Out, O.Exports.Trie.data(), O.Exports.Trie.size()); 364} 365 366void MachOWriter::writeIndirectSymbolTable() { 367 if (!O.DySymTabCommandIndex) 368 return; 369 370 const MachO::dysymtab_command &DySymTabCommand = 371 O.LoadCommands[*O.DySymTabCommandIndex] 372 .MachOLoadCommand.dysymtab_command_data; 373 374 uint32_t *Out = 375 (uint32_t *)(B.getBufferStart() + DySymTabCommand.indirectsymoff); 376 for (const IndirectSymbolEntry &Sym : O.IndirectSymTable.Symbols) { 377 uint32_t Entry = (Sym.Symbol) ? (*Sym.Symbol)->Index : Sym.OriginalIndex; 378 if (IsLittleEndian != sys::IsLittleEndianHost) 379 sys::swapByteOrder(Entry); 380 *Out++ = Entry; 381 } 382} 383 384void MachOWriter::writeDataInCodeData() { 385 if (!O.DataInCodeCommandIndex) 386 return; 387 const MachO::linkedit_data_command &LinkEditDataCommand = 388 O.LoadCommands[*O.DataInCodeCommandIndex] 389 .MachOLoadCommand.linkedit_data_command_data; 390 char *Out = (char *)B.getBufferStart() + LinkEditDataCommand.dataoff; 391 assert((LinkEditDataCommand.datasize == O.DataInCode.Data.size()) && 392 "Incorrect data in code data size"); 393 memcpy(Out, O.DataInCode.Data.data(), O.DataInCode.Data.size()); 394} 395 396void MachOWriter::writeFunctionStartsData() { 397 if (!O.FunctionStartsCommandIndex) 398 return; 399 const MachO::linkedit_data_command &LinkEditDataCommand = 400 O.LoadCommands[*O.FunctionStartsCommandIndex] 401 .MachOLoadCommand.linkedit_data_command_data; 402 char *Out = (char *)B.getBufferStart() + LinkEditDataCommand.dataoff; 403 assert((LinkEditDataCommand.datasize == O.FunctionStarts.Data.size()) && 404 "Incorrect function starts data size"); 405 memcpy(Out, O.FunctionStarts.Data.data(), O.FunctionStarts.Data.size()); 406} 407 408void MachOWriter::writeTail() { 409 typedef void (MachOWriter::*WriteHandlerType)(void); 410 typedef std::pair<uint64_t, WriteHandlerType> WriteOperation; 411 SmallVector<WriteOperation, 7> Queue; 412 413 if (O.SymTabCommandIndex) { 414 const MachO::symtab_command &SymTabCommand = 415 O.LoadCommands[*O.SymTabCommandIndex] 416 .MachOLoadCommand.symtab_command_data; 417 if (SymTabCommand.symoff) 418 Queue.push_back({SymTabCommand.symoff, &MachOWriter::writeSymbolTable}); 419 if (SymTabCommand.stroff) 420 Queue.push_back({SymTabCommand.stroff, &MachOWriter::writeStringTable}); 421 } 422 423 if (O.DyLdInfoCommandIndex) { 424 const MachO::dyld_info_command &DyLdInfoCommand = 425 O.LoadCommands[*O.DyLdInfoCommandIndex] 426 .MachOLoadCommand.dyld_info_command_data; 427 if (DyLdInfoCommand.rebase_off) 428 Queue.push_back( 429 {DyLdInfoCommand.rebase_off, &MachOWriter::writeRebaseInfo}); 430 if (DyLdInfoCommand.bind_off) 431 Queue.push_back({DyLdInfoCommand.bind_off, &MachOWriter::writeBindInfo}); 432 if (DyLdInfoCommand.weak_bind_off) 433 Queue.push_back( 434 {DyLdInfoCommand.weak_bind_off, &MachOWriter::writeWeakBindInfo}); 435 if (DyLdInfoCommand.lazy_bind_off) 436 Queue.push_back( 437 {DyLdInfoCommand.lazy_bind_off, &MachOWriter::writeLazyBindInfo}); 438 if (DyLdInfoCommand.export_off) 439 Queue.push_back( 440 {DyLdInfoCommand.export_off, &MachOWriter::writeExportInfo}); 441 } 442 443 if (O.DySymTabCommandIndex) { 444 const MachO::dysymtab_command &DySymTabCommand = 445 O.LoadCommands[*O.DySymTabCommandIndex] 446 .MachOLoadCommand.dysymtab_command_data; 447 448 if (DySymTabCommand.indirectsymoff) 449 Queue.emplace_back(DySymTabCommand.indirectsymoff, 450 &MachOWriter::writeIndirectSymbolTable); 451 } 452 453 if (O.DataInCodeCommandIndex) { 454 const MachO::linkedit_data_command &LinkEditDataCommand = 455 O.LoadCommands[*O.DataInCodeCommandIndex] 456 .MachOLoadCommand.linkedit_data_command_data; 457 458 if (LinkEditDataCommand.dataoff) 459 Queue.emplace_back(LinkEditDataCommand.dataoff, 460 &MachOWriter::writeDataInCodeData); 461 } 462 463 if (O.FunctionStartsCommandIndex) { 464 const MachO::linkedit_data_command &LinkEditDataCommand = 465 O.LoadCommands[*O.FunctionStartsCommandIndex] 466 .MachOLoadCommand.linkedit_data_command_data; 467 468 if (LinkEditDataCommand.dataoff) 469 Queue.emplace_back(LinkEditDataCommand.dataoff, 470 &MachOWriter::writeFunctionStartsData); 471 } 472 473 llvm::sort(Queue, [](const WriteOperation &LHS, const WriteOperation &RHS) { 474 return LHS.first < RHS.first; 475 }); 476 477 for (auto WriteOp : Queue) 478 (this->*WriteOp.second)(); 479} 480 481Error MachOWriter::finalize() { return LayoutBuilder.layout(); } 482 483Error MachOWriter::write() { 484 if (Error E = B.allocate(totalSize())) 485 return E; 486 memset(B.getBufferStart(), 0, totalSize()); 487 writeHeader(); 488 writeLoadCommands(); 489 writeSections(); 490 writeTail(); 491 return B.commit(); 492} 493 494} // end namespace macho 495} // end namespace objcopy 496} // end namespace llvm 497