1//===- InputSection.h -------------------------------------------*- C++ -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9#ifndef LLD_MACHO_INPUT_SECTION_H 10#define LLD_MACHO_INPUT_SECTION_H 11 12#include "Config.h" 13#include "Relocations.h" 14#include "Symbols.h" 15 16#include "lld/Common/LLVM.h" 17#include "lld/Common/Memory.h" 18#include "llvm/ADT/ArrayRef.h" 19#include "llvm/ADT/BitVector.h" 20#include "llvm/ADT/CachedHashString.h" 21#include "llvm/ADT/TinyPtrVector.h" 22#include "llvm/BinaryFormat/MachO.h" 23 24namespace lld { 25namespace macho { 26 27class InputFile; 28class OutputSection; 29 30class InputSection { 31public: 32 enum Kind : uint8_t { 33 ConcatKind, 34 CStringLiteralKind, 35 WordLiteralKind, 36 }; 37 38 Kind kind() const { return sectionKind; } 39 virtual ~InputSection() = default; 40 virtual uint64_t getSize() const { return data.size(); } 41 virtual bool empty() const { return data.empty(); } 42 InputFile *getFile() const { return section.file; } 43 StringRef getName() const { return section.name; } 44 StringRef getSegName() const { return section.segname; } 45 uint32_t getFlags() const { return section.flags; } 46 uint64_t getFileSize() const; 47 // Translates \p off -- an offset relative to this InputSection -- into an 48 // offset from the beginning of its parent OutputSection. 49 virtual uint64_t getOffset(uint64_t off) const = 0; 50 // The offset from the beginning of the file. 51 uint64_t getVA(uint64_t off) const; 52 // Return a user-friendly string for use in diagnostics. 53 // Format: /path/to/object.o:(symbol _func+0x123) 54 std::string getLocation(uint64_t off) const; 55 // Return the source line corresponding to an address, or the empty string. 56 // Format: Source.cpp:123 (/path/to/Source.cpp:123) 57 std::string getSourceLocation(uint64_t off) const; 58 // Return the relocation at \p off, if it exists. This does a linear search. 59 const Reloc *getRelocAt(uint32_t off) const; 60 // Whether the data at \p off in this InputSection is live. 61 virtual bool isLive(uint64_t off) const = 0; 62 virtual void markLive(uint64_t off) = 0; 63 virtual InputSection *canonical() { return this; } 64 virtual const InputSection *canonical() const { return this; } 65 66protected: 67 InputSection(Kind kind, const Section §ion, ArrayRef<uint8_t> data, 68 uint32_t align) 69 : sectionKind(kind), keepUnique(false), hasAltEntry(false), align(align), 70 data(data), section(section) {} 71 72 InputSection(const InputSection &rhs) 73 : sectionKind(rhs.sectionKind), keepUnique(false), hasAltEntry(false), 74 align(rhs.align), data(rhs.data), section(rhs.section) {} 75 76 Kind sectionKind; 77 78public: 79 // is address assigned? 80 bool isFinal = false; 81 // keep the address of the symbol(s) in this section unique in the final 82 // binary ? 83 bool keepUnique : 1; 84 // Does this section have symbols at offsets other than zero? (NOTE: only 85 // applies to ConcatInputSections.) 86 bool hasAltEntry : 1; 87 uint32_t align = 1; 88 89 OutputSection *parent = nullptr; 90 ArrayRef<uint8_t> data; 91 std::vector<Reloc> relocs; 92 // The symbols that belong to this InputSection, sorted by value. With 93 // .subsections_via_symbols, there is typically only one element here. 94 llvm::TinyPtrVector<Defined *> symbols; 95 96protected: 97 const Section §ion; 98 99 const Defined *getContainingSymbol(uint64_t off) const; 100}; 101 102// ConcatInputSections are combined into (Concat)OutputSections through simple 103// concatenation, in contrast with literal sections which may have their 104// contents merged before output. 105class ConcatInputSection final : public InputSection { 106public: 107 ConcatInputSection(const Section §ion, ArrayRef<uint8_t> data, 108 uint32_t align = 1) 109 : InputSection(ConcatKind, section, data, align) {} 110 111 uint64_t getOffset(uint64_t off) const override { return outSecOff + off; } 112 uint64_t getVA() const { return InputSection::getVA(0); } 113 // ConcatInputSections are entirely live or dead, so the offset is irrelevant. 114 bool isLive(uint64_t off) const override { return live; } 115 void markLive(uint64_t off) override { live = true; } 116 bool isCoalescedWeak() const { return wasCoalesced && symbols.empty(); } 117 bool shouldOmitFromOutput() const { return !live || isCoalescedWeak(); } 118 void writeTo(uint8_t *buf); 119 120 void foldIdentical(ConcatInputSection *redundant); 121 ConcatInputSection *canonical() override { 122 return replacement ? replacement : this; 123 } 124 const InputSection *canonical() const override { 125 return replacement ? replacement : this; 126 } 127 128 static bool classof(const InputSection *isec) { 129 return isec->kind() == ConcatKind; 130 } 131 132 // Points to the surviving section after this one is folded by ICF 133 ConcatInputSection *replacement = nullptr; 134 // Equivalence-class ID for ICF 135 uint32_t icfEqClass[2] = {0, 0}; 136 137 // With subsections_via_symbols, most symbols have their own InputSection, 138 // and for weak symbols (e.g. from inline functions), only the 139 // InputSection from one translation unit will make it to the output, 140 // while all copies in other translation units are coalesced into the 141 // first and not copied to the output. 142 bool wasCoalesced = false; 143 bool live = !config->deadStrip; 144 bool hasCallSites = false; 145 // This variable has two usages. Initially, it represents the input order. 146 // After assignAddresses is called, it represents the offset from the 147 // beginning of the output section this section was assigned to. 148 uint64_t outSecOff = 0; 149}; 150 151// Initialize a fake InputSection that does not belong to any InputFile. 152ConcatInputSection *makeSyntheticInputSection(StringRef segName, 153 StringRef sectName, 154 uint32_t flags = 0, 155 ArrayRef<uint8_t> data = {}, 156 uint32_t align = 1); 157 158// Helper functions to make it easy to sprinkle asserts. 159 160inline bool shouldOmitFromOutput(InputSection *isec) { 161 return isa<ConcatInputSection>(isec) && 162 cast<ConcatInputSection>(isec)->shouldOmitFromOutput(); 163} 164 165inline bool isCoalescedWeak(InputSection *isec) { 166 return isa<ConcatInputSection>(isec) && 167 cast<ConcatInputSection>(isec)->isCoalescedWeak(); 168} 169 170// We allocate a lot of these and binary search on them, so they should be as 171// compact as possible. Hence the use of 31 rather than 64 bits for the hash. 172struct StringPiece { 173 // Offset from the start of the containing input section. 174 uint32_t inSecOff; 175 uint32_t live : 1; 176 // Only set if deduplicating literals 177 uint32_t hash : 31; 178 // Offset from the start of the containing output section. 179 uint64_t outSecOff = 0; 180 181 StringPiece(uint64_t off, uint32_t hash) 182 : inSecOff(off), live(!config->deadStrip), hash(hash) {} 183}; 184 185static_assert(sizeof(StringPiece) == 16, "StringPiece is too big!"); 186 187// CStringInputSections are composed of multiple null-terminated string 188// literals, which we represent using StringPieces. These literals can be 189// deduplicated and tail-merged, so translating offsets between the input and 190// outputs sections is more complicated. 191// 192// NOTE: One significant difference between LLD and ld64 is that we merge all 193// cstring literals, even those referenced directly by non-private symbols. 194// ld64 is more conservative and does not do that. This was mostly done for 195// implementation simplicity; if we find programs that need the more 196// conservative behavior we can certainly implement that. 197class CStringInputSection final : public InputSection { 198public: 199 CStringInputSection(const Section §ion, ArrayRef<uint8_t> data, 200 uint32_t align, bool dedupLiterals) 201 : InputSection(CStringLiteralKind, section, data, align), 202 deduplicateLiterals(dedupLiterals) {} 203 204 uint64_t getOffset(uint64_t off) const override; 205 bool isLive(uint64_t off) const override { return getStringPiece(off).live; } 206 void markLive(uint64_t off) override { getStringPiece(off).live = true; } 207 // Find the StringPiece that contains this offset. 208 StringPiece &getStringPiece(uint64_t off); 209 const StringPiece &getStringPiece(uint64_t off) const; 210 // Split at each null byte. 211 void splitIntoPieces(); 212 213 LLVM_ATTRIBUTE_ALWAYS_INLINE 214 StringRef getStringRef(size_t i) const { 215 size_t begin = pieces[i].inSecOff; 216 // The endpoint should be *at* the null terminator, not after. This matches 217 // the behavior of StringRef(const char *Str). 218 size_t end = 219 ((pieces.size() - 1 == i) ? data.size() : pieces[i + 1].inSecOff) - 1; 220 return toStringRef(data.slice(begin, end - begin)); 221 } 222 223 StringRef getStringRefAtOffset(uint64_t off) const { 224 return getStringRef(getStringPieceIndex(off)); 225 } 226 227 // Returns i'th piece as a CachedHashStringRef. This function is very hot when 228 // string merging is enabled, so we want to inline. 229 LLVM_ATTRIBUTE_ALWAYS_INLINE 230 llvm::CachedHashStringRef getCachedHashStringRef(size_t i) const { 231 assert(deduplicateLiterals); 232 return {getStringRef(i), pieces[i].hash}; 233 } 234 235 static bool classof(const InputSection *isec) { 236 return isec->kind() == CStringLiteralKind; 237 } 238 239 bool deduplicateLiterals = false; 240 std::vector<StringPiece> pieces; 241 242private: 243 size_t getStringPieceIndex(uint64_t off) const; 244}; 245 246class WordLiteralInputSection final : public InputSection { 247public: 248 WordLiteralInputSection(const Section §ion, ArrayRef<uint8_t> data, 249 uint32_t align); 250 uint64_t getOffset(uint64_t off) const override; 251 bool isLive(uint64_t off) const override { 252 return live[off >> power2LiteralSize]; 253 } 254 void markLive(uint64_t off) override { 255 live[off >> power2LiteralSize] = true; 256 } 257 258 static bool classof(const InputSection *isec) { 259 return isec->kind() == WordLiteralKind; 260 } 261 262private: 263 unsigned power2LiteralSize; 264 // The liveness of data[off] is tracked by live[off >> power2LiteralSize]. 265 llvm::BitVector live; 266}; 267 268inline uint8_t sectionType(uint32_t flags) { 269 return flags & llvm::MachO::SECTION_TYPE; 270} 271 272inline bool isZeroFill(uint32_t flags) { 273 return llvm::MachO::isVirtualSection(sectionType(flags)); 274} 275 276inline bool isThreadLocalVariables(uint32_t flags) { 277 return sectionType(flags) == llvm::MachO::S_THREAD_LOCAL_VARIABLES; 278} 279 280// These sections contain the data for initializing thread-local variables. 281inline bool isThreadLocalData(uint32_t flags) { 282 return sectionType(flags) == llvm::MachO::S_THREAD_LOCAL_REGULAR || 283 sectionType(flags) == llvm::MachO::S_THREAD_LOCAL_ZEROFILL; 284} 285 286inline bool isDebugSection(uint32_t flags) { 287 return (flags & llvm::MachO::SECTION_ATTRIBUTES_USR) == 288 llvm::MachO::S_ATTR_DEBUG; 289} 290 291inline bool isWordLiteralSection(uint32_t flags) { 292 return sectionType(flags) == llvm::MachO::S_4BYTE_LITERALS || 293 sectionType(flags) == llvm::MachO::S_8BYTE_LITERALS || 294 sectionType(flags) == llvm::MachO::S_16BYTE_LITERALS; 295} 296 297bool isCodeSection(const InputSection *); 298bool isCfStringSection(const InputSection *); 299bool isClassRefsSection(const InputSection *); 300bool isSelRefsSection(const InputSection *); 301bool isEhFrameSection(const InputSection *); 302bool isGccExceptTabSection(const InputSection *); 303 304extern std::vector<ConcatInputSection *> inputSections; 305 306namespace section_names { 307 308constexpr const char authGot[] = "__auth_got"; 309constexpr const char authPtr[] = "__auth_ptr"; 310constexpr const char binding[] = "__binding"; 311constexpr const char bitcodeBundle[] = "__bundle"; 312constexpr const char cString[] = "__cstring"; 313constexpr const char cfString[] = "__cfstring"; 314constexpr const char cgProfile[] = "__cg_profile"; 315constexpr const char chainFixups[] = "__chainfixups"; 316constexpr const char codeSignature[] = "__code_signature"; 317constexpr const char common[] = "__common"; 318constexpr const char compactUnwind[] = "__compact_unwind"; 319constexpr const char data[] = "__data"; 320constexpr const char debugAbbrev[] = "__debug_abbrev"; 321constexpr const char debugInfo[] = "__debug_info"; 322constexpr const char debugLine[] = "__debug_line"; 323constexpr const char debugStr[] = "__debug_str"; 324constexpr const char debugStrOffs[] = "__debug_str_offs"; 325constexpr const char ehFrame[] = "__eh_frame"; 326constexpr const char gccExceptTab[] = "__gcc_except_tab"; 327constexpr const char export_[] = "__export"; 328constexpr const char dataInCode[] = "__data_in_code"; 329constexpr const char functionStarts[] = "__func_starts"; 330constexpr const char got[] = "__got"; 331constexpr const char header[] = "__mach_header"; 332constexpr const char indirectSymbolTable[] = "__ind_sym_tab"; 333constexpr const char initOffsets[] = "__init_offsets"; 334constexpr const char const_[] = "__const"; 335constexpr const char lazySymbolPtr[] = "__la_symbol_ptr"; 336constexpr const char lazyBinding[] = "__lazy_binding"; 337constexpr const char literals[] = "__literals"; 338constexpr const char moduleInitFunc[] = "__mod_init_func"; 339constexpr const char moduleTermFunc[] = "__mod_term_func"; 340constexpr const char nonLazySymbolPtr[] = "__nl_symbol_ptr"; 341constexpr const char objcCatList[] = "__objc_catlist"; 342constexpr const char objcClassList[] = "__objc_classlist"; 343constexpr const char objcClassRefs[] = "__objc_classrefs"; 344constexpr const char objcConst[] = "__objc_const"; 345constexpr const char objCImageInfo[] = "__objc_imageinfo"; 346constexpr const char objcStubs[] = "__objc_stubs"; 347constexpr const char objcSelrefs[] = "__objc_selrefs"; 348constexpr const char objcMethname[] = "__objc_methname"; 349constexpr const char objcNonLazyCatList[] = "__objc_nlcatlist"; 350constexpr const char objcNonLazyClassList[] = "__objc_nlclslist"; 351constexpr const char objcProtoList[] = "__objc_protolist"; 352constexpr const char pageZero[] = "__pagezero"; 353constexpr const char pointers[] = "__pointers"; 354constexpr const char rebase[] = "__rebase"; 355constexpr const char staticInit[] = "__StaticInit"; 356constexpr const char stringTable[] = "__string_table"; 357constexpr const char stubHelper[] = "__stub_helper"; 358constexpr const char stubs[] = "__stubs"; 359constexpr const char swift[] = "__swift"; 360constexpr const char symbolTable[] = "__symbol_table"; 361constexpr const char textCoalNt[] = "__textcoal_nt"; 362constexpr const char text[] = "__text"; 363constexpr const char threadPtrs[] = "__thread_ptrs"; 364constexpr const char threadVars[] = "__thread_vars"; 365constexpr const char unwindInfo[] = "__unwind_info"; 366constexpr const char weakBinding[] = "__weak_binding"; 367constexpr const char zeroFill[] = "__zerofill"; 368constexpr const char addrSig[] = "__llvm_addrsig"; 369 370} // namespace section_names 371 372} // namespace macho 373 374std::string toString(const macho::InputSection *); 375 376} // namespace lld 377 378#endif 379