1//===- SymbolTable.cpp ----------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "SymbolTable.h"
10#include "COFFLinkerContext.h"
11#include "Config.h"
12#include "Driver.h"
13#include "LTO.h"
14#include "PDB.h"
15#include "Symbols.h"
16#include "lld/Common/ErrorHandler.h"
17#include "lld/Common/Memory.h"
18#include "lld/Common/Timer.h"
19#include "llvm/DebugInfo/DIContext.h"
20#include "llvm/IR/LLVMContext.h"
21#include "llvm/LTO/LTO.h"
22#include "llvm/Object/WindowsMachineFlag.h"
23#include "llvm/Support/Debug.h"
24#include "llvm/Support/raw_ostream.h"
25#include <utility>
26
27using namespace llvm;
28
29namespace lld::coff {
30
31StringRef ltrim1(StringRef s, const char *chars) {
32  if (!s.empty() && strchr(chars, s[0]))
33    return s.substr(1);
34  return s;
35}
36
37static bool compatibleMachineType(COFFLinkerContext &ctx, MachineTypes mt) {
38  if (mt == IMAGE_FILE_MACHINE_UNKNOWN)
39    return true;
40  switch (ctx.config.machine) {
41  case ARM64:
42    return mt == ARM64 || mt == ARM64X;
43  case ARM64EC:
44    return COFF::isArm64EC(mt) || mt == AMD64;
45  case ARM64X:
46    return COFF::isAnyArm64(mt) || mt == AMD64;
47  default:
48    return ctx.config.machine == mt;
49  }
50}
51
52void SymbolTable::addFile(InputFile *file) {
53  log("Reading " + toString(file));
54  if (file->lazy) {
55    if (auto *f = dyn_cast<BitcodeFile>(file))
56      f->parseLazy();
57    else
58      cast<ObjFile>(file)->parseLazy();
59  } else {
60    file->parse();
61    if (auto *f = dyn_cast<ObjFile>(file)) {
62      ctx.objFileInstances.push_back(f);
63    } else if (auto *f = dyn_cast<BitcodeFile>(file)) {
64      if (ltoCompilationDone) {
65        error("LTO object file " + toString(file) + " linked in after "
66              "doing LTO compilation.");
67      }
68      ctx.bitcodeFileInstances.push_back(f);
69    } else if (auto *f = dyn_cast<ImportFile>(file)) {
70      ctx.importFileInstances.push_back(f);
71    }
72  }
73
74  MachineTypes mt = file->getMachineType();
75  if (ctx.config.machine == IMAGE_FILE_MACHINE_UNKNOWN) {
76    ctx.config.machine = mt;
77    ctx.driver.addWinSysRootLibSearchPaths();
78  } else if (!compatibleMachineType(ctx, mt)) {
79    error(toString(file) + ": machine type " + machineToStr(mt) +
80          " conflicts with " + machineToStr(ctx.config.machine));
81    return;
82  }
83
84  ctx.driver.parseDirectives(file);
85}
86
87static void errorOrWarn(const Twine &s, bool forceUnresolved) {
88  if (forceUnresolved)
89    warn(s);
90  else
91    error(s);
92}
93
94// Causes the file associated with a lazy symbol to be linked in.
95static void forceLazy(Symbol *s) {
96  s->pendingArchiveLoad = true;
97  switch (s->kind()) {
98  case Symbol::Kind::LazyArchiveKind: {
99    auto *l = cast<LazyArchive>(s);
100    l->file->addMember(l->sym);
101    break;
102  }
103  case Symbol::Kind::LazyObjectKind: {
104    InputFile *file = cast<LazyObject>(s)->file;
105    file->ctx.symtab.addFile(file);
106    break;
107  }
108  case Symbol::Kind::LazyDLLSymbolKind: {
109    auto *l = cast<LazyDLLSymbol>(s);
110    l->file->makeImport(l->sym);
111    break;
112  }
113  default:
114    llvm_unreachable(
115        "symbol passed to forceLazy is not a LazyArchive or LazyObject");
116  }
117}
118
119// Returns the symbol in SC whose value is <= Addr that is closest to Addr.
120// This is generally the global variable or function whose definition contains
121// Addr.
122static Symbol *getSymbol(SectionChunk *sc, uint32_t addr) {
123  DefinedRegular *candidate = nullptr;
124
125  for (Symbol *s : sc->file->getSymbols()) {
126    auto *d = dyn_cast_or_null<DefinedRegular>(s);
127    if (!d || !d->data || d->file != sc->file || d->getChunk() != sc ||
128        d->getValue() > addr ||
129        (candidate && d->getValue() < candidate->getValue()))
130      continue;
131
132    candidate = d;
133  }
134
135  return candidate;
136}
137
138static std::vector<std::string> getSymbolLocations(BitcodeFile *file) {
139  std::string res("\n>>> referenced by ");
140  StringRef source = file->obj->getSourceFileName();
141  if (!source.empty())
142    res += source.str() + "\n>>>               ";
143  res += toString(file);
144  return {res};
145}
146
147static std::optional<std::pair<StringRef, uint32_t>>
148getFileLineDwarf(const SectionChunk *c, uint32_t addr) {
149  std::optional<DILineInfo> optionalLineInfo =
150      c->file->getDILineInfo(addr, c->getSectionNumber() - 1);
151  if (!optionalLineInfo)
152    return std::nullopt;
153  const DILineInfo &lineInfo = *optionalLineInfo;
154  if (lineInfo.FileName == DILineInfo::BadString)
155    return std::nullopt;
156  return std::make_pair(saver().save(lineInfo.FileName), lineInfo.Line);
157}
158
159static std::optional<std::pair<StringRef, uint32_t>>
160getFileLine(const SectionChunk *c, uint32_t addr) {
161  // MinGW can optionally use codeview, even if the default is dwarf.
162  std::optional<std::pair<StringRef, uint32_t>> fileLine =
163      getFileLineCodeView(c, addr);
164  // If codeview didn't yield any result, check dwarf in MinGW mode.
165  if (!fileLine && c->file->ctx.config.mingw)
166    fileLine = getFileLineDwarf(c, addr);
167  return fileLine;
168}
169
170// Given a file and the index of a symbol in that file, returns a description
171// of all references to that symbol from that file. If no debug information is
172// available, returns just the name of the file, else one string per actual
173// reference as described in the debug info.
174// Returns up to maxStrings string descriptions, along with the total number of
175// locations found.
176static std::pair<std::vector<std::string>, size_t>
177getSymbolLocations(ObjFile *file, uint32_t symIndex, size_t maxStrings) {
178  struct Location {
179    Symbol *sym;
180    std::pair<StringRef, uint32_t> fileLine;
181  };
182  std::vector<Location> locations;
183  size_t numLocations = 0;
184
185  for (Chunk *c : file->getChunks()) {
186    auto *sc = dyn_cast<SectionChunk>(c);
187    if (!sc)
188      continue;
189    for (const coff_relocation &r : sc->getRelocs()) {
190      if (r.SymbolTableIndex != symIndex)
191        continue;
192      numLocations++;
193      if (locations.size() >= maxStrings)
194        continue;
195
196      std::optional<std::pair<StringRef, uint32_t>> fileLine =
197          getFileLine(sc, r.VirtualAddress);
198      Symbol *sym = getSymbol(sc, r.VirtualAddress);
199      if (fileLine)
200        locations.push_back({sym, *fileLine});
201      else if (sym)
202        locations.push_back({sym, {"", 0}});
203    }
204  }
205
206  if (maxStrings == 0)
207    return std::make_pair(std::vector<std::string>(), numLocations);
208
209  if (numLocations == 0)
210    return std::make_pair(
211        std::vector<std::string>{"\n>>> referenced by " + toString(file)}, 1);
212
213  std::vector<std::string> symbolLocations(locations.size());
214  size_t i = 0;
215  for (Location loc : locations) {
216    llvm::raw_string_ostream os(symbolLocations[i++]);
217    os << "\n>>> referenced by ";
218    if (!loc.fileLine.first.empty())
219      os << loc.fileLine.first << ":" << loc.fileLine.second
220         << "\n>>>               ";
221    os << toString(file);
222    if (loc.sym)
223      os << ":(" << toString(file->ctx, *loc.sym) << ')';
224  }
225  return std::make_pair(symbolLocations, numLocations);
226}
227
228std::vector<std::string> getSymbolLocations(ObjFile *file, uint32_t symIndex) {
229  return getSymbolLocations(file, symIndex, SIZE_MAX).first;
230}
231
232static std::pair<std::vector<std::string>, size_t>
233getSymbolLocations(InputFile *file, uint32_t symIndex, size_t maxStrings) {
234  if (auto *o = dyn_cast<ObjFile>(file))
235    return getSymbolLocations(o, symIndex, maxStrings);
236  if (auto *b = dyn_cast<BitcodeFile>(file)) {
237    std::vector<std::string> symbolLocations = getSymbolLocations(b);
238    size_t numLocations = symbolLocations.size();
239    if (symbolLocations.size() > maxStrings)
240      symbolLocations.resize(maxStrings);
241    return std::make_pair(symbolLocations, numLocations);
242  }
243  llvm_unreachable("unsupported file type passed to getSymbolLocations");
244  return std::make_pair(std::vector<std::string>(), (size_t)0);
245}
246
247// For an undefined symbol, stores all files referencing it and the index of
248// the undefined symbol in each file.
249struct UndefinedDiag {
250  Symbol *sym;
251  struct File {
252    InputFile *file;
253    uint32_t symIndex;
254  };
255  std::vector<File> files;
256};
257
258static void reportUndefinedSymbol(const COFFLinkerContext &ctx,
259                                  const UndefinedDiag &undefDiag) {
260  std::string out;
261  llvm::raw_string_ostream os(out);
262  os << "undefined symbol: " << toString(ctx, *undefDiag.sym);
263
264  const size_t maxUndefReferences = 3;
265  size_t numDisplayedRefs = 0, numRefs = 0;
266  for (const UndefinedDiag::File &ref : undefDiag.files) {
267    auto [symbolLocations, totalLocations] = getSymbolLocations(
268        ref.file, ref.symIndex, maxUndefReferences - numDisplayedRefs);
269
270    numRefs += totalLocations;
271    numDisplayedRefs += symbolLocations.size();
272    for (const std::string &s : symbolLocations) {
273      os << s;
274    }
275  }
276  if (numDisplayedRefs < numRefs)
277    os << "\n>>> referenced " << numRefs - numDisplayedRefs << " more times";
278  errorOrWarn(os.str(), ctx.config.forceUnresolved);
279}
280
281void SymbolTable::loadMinGWSymbols() {
282  for (auto &i : symMap) {
283    Symbol *sym = i.second;
284    auto *undef = dyn_cast<Undefined>(sym);
285    if (!undef)
286      continue;
287    if (undef->getWeakAlias())
288      continue;
289
290    StringRef name = undef->getName();
291
292    if (ctx.config.machine == I386 && ctx.config.stdcallFixup) {
293      // Check if we can resolve an undefined decorated symbol by finding
294      // the intended target as an undecorated symbol (only with a leading
295      // underscore).
296      StringRef origName = name;
297      StringRef baseName = name;
298      // Trim down stdcall/fastcall/vectorcall symbols to the base name.
299      baseName = ltrim1(baseName, "_@");
300      baseName = baseName.substr(0, baseName.find('@'));
301      // Add a leading underscore, as it would be in cdecl form.
302      std::string newName = ("_" + baseName).str();
303      Symbol *l;
304      if (newName != origName && (l = find(newName)) != nullptr) {
305        // If we found a symbol and it is lazy; load it.
306        if (l->isLazy() && !l->pendingArchiveLoad) {
307          log("Loading lazy " + l->getName() + " from " +
308              l->getFile()->getName() + " for stdcall fixup");
309          forceLazy(l);
310        }
311        // If it's lazy or already defined, hook it up as weak alias.
312        if (l->isLazy() || isa<Defined>(l)) {
313          if (ctx.config.warnStdcallFixup)
314            warn("Resolving " + origName + " by linking to " + newName);
315          else
316            log("Resolving " + origName + " by linking to " + newName);
317          undef->weakAlias = l;
318          continue;
319        }
320      }
321    }
322
323    if (ctx.config.autoImport) {
324      if (name.starts_with("__imp_"))
325        continue;
326      // If we have an undefined symbol, but we have a lazy symbol we could
327      // load, load it.
328      Symbol *l = find(("__imp_" + name).str());
329      if (!l || l->pendingArchiveLoad || !l->isLazy())
330        continue;
331
332      log("Loading lazy " + l->getName() + " from " + l->getFile()->getName() +
333          " for automatic import");
334      forceLazy(l);
335    }
336  }
337}
338
339Defined *SymbolTable::impSymbol(StringRef name) {
340  if (name.starts_with("__imp_"))
341    return nullptr;
342  return dyn_cast_or_null<Defined>(find(("__imp_" + name).str()));
343}
344
345bool SymbolTable::handleMinGWAutomaticImport(Symbol *sym, StringRef name) {
346  Defined *imp = impSymbol(name);
347  if (!imp)
348    return false;
349
350  // Replace the reference directly to a variable with a reference
351  // to the import address table instead. This obviously isn't right,
352  // but we mark the symbol as isRuntimePseudoReloc, and a later pass
353  // will add runtime pseudo relocations for every relocation against
354  // this Symbol. The runtime pseudo relocation framework expects the
355  // reference itself to point at the IAT entry.
356  size_t impSize = 0;
357  if (isa<DefinedImportData>(imp)) {
358    log("Automatically importing " + name + " from " +
359        cast<DefinedImportData>(imp)->getDLLName());
360    impSize = sizeof(DefinedImportData);
361  } else if (isa<DefinedRegular>(imp)) {
362    log("Automatically importing " + name + " from " +
363        toString(cast<DefinedRegular>(imp)->file));
364    impSize = sizeof(DefinedRegular);
365  } else {
366    warn("unable to automatically import " + name + " from " + imp->getName() +
367         " from " + toString(cast<DefinedRegular>(imp)->file) +
368         "; unexpected symbol type");
369    return false;
370  }
371  sym->replaceKeepingName(imp, impSize);
372  sym->isRuntimePseudoReloc = true;
373
374  // There may exist symbols named .refptr.<name> which only consist
375  // of a single pointer to <name>. If it turns out <name> is
376  // automatically imported, we don't need to keep the .refptr.<name>
377  // pointer at all, but redirect all accesses to it to the IAT entry
378  // for __imp_<name> instead, and drop the whole .refptr.<name> chunk.
379  DefinedRegular *refptr =
380      dyn_cast_or_null<DefinedRegular>(find((".refptr." + name).str()));
381  if (refptr && refptr->getChunk()->getSize() == ctx.config.wordsize) {
382    SectionChunk *sc = dyn_cast_or_null<SectionChunk>(refptr->getChunk());
383    if (sc && sc->getRelocs().size() == 1 && *sc->symbols().begin() == sym) {
384      log("Replacing .refptr." + name + " with " + imp->getName());
385      refptr->getChunk()->live = false;
386      refptr->replaceKeepingName(imp, impSize);
387    }
388  }
389  return true;
390}
391
392/// Helper function for reportUnresolvable and resolveRemainingUndefines.
393/// This function emits an "undefined symbol" diagnostic for each symbol in
394/// undefs. If localImports is not nullptr, it also emits a "locally
395/// defined symbol imported" diagnostic for symbols in localImports.
396/// objFiles and bitcodeFiles (if not nullptr) are used to report where
397/// undefined symbols are referenced.
398static void reportProblemSymbols(
399    const COFFLinkerContext &ctx, const SmallPtrSetImpl<Symbol *> &undefs,
400    const DenseMap<Symbol *, Symbol *> *localImports, bool needBitcodeFiles) {
401  // Return early if there is nothing to report (which should be
402  // the common case).
403  if (undefs.empty() && (!localImports || localImports->empty()))
404    return;
405
406  for (Symbol *b : ctx.config.gcroot) {
407    if (undefs.count(b))
408      errorOrWarn("<root>: undefined symbol: " + toString(ctx, *b),
409                  ctx.config.forceUnresolved);
410    if (localImports)
411      if (Symbol *imp = localImports->lookup(b))
412        warn("<root>: locally defined symbol imported: " + toString(ctx, *imp) +
413             " (defined in " + toString(imp->getFile()) + ") [LNK4217]");
414  }
415
416  std::vector<UndefinedDiag> undefDiags;
417  DenseMap<Symbol *, int> firstDiag;
418
419  auto processFile = [&](InputFile *file, ArrayRef<Symbol *> symbols) {
420    uint32_t symIndex = (uint32_t)-1;
421    for (Symbol *sym : symbols) {
422      ++symIndex;
423      if (!sym)
424        continue;
425      if (undefs.count(sym)) {
426        auto it = firstDiag.find(sym);
427        if (it == firstDiag.end()) {
428          firstDiag[sym] = undefDiags.size();
429          undefDiags.push_back({sym, {{file, symIndex}}});
430        } else {
431          undefDiags[it->second].files.push_back({file, symIndex});
432        }
433      }
434      if (localImports)
435        if (Symbol *imp = localImports->lookup(sym))
436          warn(toString(file) +
437               ": locally defined symbol imported: " + toString(ctx, *imp) +
438               " (defined in " + toString(imp->getFile()) + ") [LNK4217]");
439    }
440  };
441
442  for (ObjFile *file : ctx.objFileInstances)
443    processFile(file, file->getSymbols());
444
445  if (needBitcodeFiles)
446    for (BitcodeFile *file : ctx.bitcodeFileInstances)
447      processFile(file, file->getSymbols());
448
449  for (const UndefinedDiag &undefDiag : undefDiags)
450    reportUndefinedSymbol(ctx, undefDiag);
451}
452
453void SymbolTable::reportUnresolvable() {
454  SmallPtrSet<Symbol *, 8> undefs;
455  for (auto &i : symMap) {
456    Symbol *sym = i.second;
457    auto *undef = dyn_cast<Undefined>(sym);
458    if (!undef || sym->deferUndefined)
459      continue;
460    if (undef->getWeakAlias())
461      continue;
462    StringRef name = undef->getName();
463    if (name.starts_with("__imp_")) {
464      Symbol *imp = find(name.substr(strlen("__imp_")));
465      if (Defined *def = dyn_cast_or_null<Defined>(imp)) {
466        def->isUsedInRegularObj = true;
467        continue;
468      }
469    }
470    if (name.contains("_PchSym_"))
471      continue;
472    if (ctx.config.autoImport && impSymbol(name))
473      continue;
474    undefs.insert(sym);
475  }
476
477  reportProblemSymbols(ctx, undefs,
478                       /* localImports */ nullptr, true);
479}
480
481void SymbolTable::resolveRemainingUndefines() {
482  llvm::TimeTraceScope timeScope("Resolve remaining undefined symbols");
483  SmallPtrSet<Symbol *, 8> undefs;
484  DenseMap<Symbol *, Symbol *> localImports;
485
486  for (auto &i : symMap) {
487    Symbol *sym = i.second;
488    auto *undef = dyn_cast<Undefined>(sym);
489    if (!undef)
490      continue;
491    if (!sym->isUsedInRegularObj)
492      continue;
493
494    StringRef name = undef->getName();
495
496    // A weak alias may have been resolved, so check for that.
497    if (Defined *d = undef->getWeakAlias()) {
498      // We want to replace Sym with D. However, we can't just blindly
499      // copy sizeof(SymbolUnion) bytes from D to Sym because D may be an
500      // internal symbol, and internal symbols are stored as "unparented"
501      // Symbols. For that reason we need to check which type of symbol we
502      // are dealing with and copy the correct number of bytes.
503      if (isa<DefinedRegular>(d))
504        memcpy(sym, d, sizeof(DefinedRegular));
505      else if (isa<DefinedAbsolute>(d))
506        memcpy(sym, d, sizeof(DefinedAbsolute));
507      else
508        memcpy(sym, d, sizeof(SymbolUnion));
509      continue;
510    }
511
512    // If we can resolve a symbol by removing __imp_ prefix, do that.
513    // This odd rule is for compatibility with MSVC linker.
514    if (name.starts_with("__imp_")) {
515      Symbol *imp = find(name.substr(strlen("__imp_")));
516      if (imp && isa<Defined>(imp)) {
517        auto *d = cast<Defined>(imp);
518        replaceSymbol<DefinedLocalImport>(sym, ctx, name, d);
519        localImportChunks.push_back(cast<DefinedLocalImport>(sym)->getChunk());
520        localImports[sym] = d;
521        continue;
522      }
523    }
524
525    // We don't want to report missing Microsoft precompiled headers symbols.
526    // A proper message will be emitted instead in PDBLinker::aquirePrecompObj
527    if (name.contains("_PchSym_"))
528      continue;
529
530    if (ctx.config.autoImport && handleMinGWAutomaticImport(sym, name))
531      continue;
532
533    // Remaining undefined symbols are not fatal if /force is specified.
534    // They are replaced with dummy defined symbols.
535    if (ctx.config.forceUnresolved)
536      replaceSymbol<DefinedAbsolute>(sym, ctx, name, 0);
537    undefs.insert(sym);
538  }
539
540  reportProblemSymbols(
541      ctx, undefs,
542      ctx.config.warnLocallyDefinedImported ? &localImports : nullptr, false);
543}
544
545std::pair<Symbol *, bool> SymbolTable::insert(StringRef name) {
546  bool inserted = false;
547  Symbol *&sym = symMap[CachedHashStringRef(name)];
548  if (!sym) {
549    sym = reinterpret_cast<Symbol *>(make<SymbolUnion>());
550    sym->isUsedInRegularObj = false;
551    sym->pendingArchiveLoad = false;
552    sym->canInline = true;
553    inserted = true;
554  }
555  return {sym, inserted};
556}
557
558std::pair<Symbol *, bool> SymbolTable::insert(StringRef name, InputFile *file) {
559  std::pair<Symbol *, bool> result = insert(name);
560  if (!file || !isa<BitcodeFile>(file))
561    result.first->isUsedInRegularObj = true;
562  return result;
563}
564
565Symbol *SymbolTable::addUndefined(StringRef name, InputFile *f,
566                                  bool isWeakAlias) {
567  auto [s, wasInserted] = insert(name, f);
568  if (wasInserted || (s->isLazy() && isWeakAlias)) {
569    replaceSymbol<Undefined>(s, name);
570    return s;
571  }
572  if (s->isLazy())
573    forceLazy(s);
574  return s;
575}
576
577void SymbolTable::addLazyArchive(ArchiveFile *f, const Archive::Symbol &sym) {
578  StringRef name = sym.getName();
579  auto [s, wasInserted] = insert(name);
580  if (wasInserted) {
581    replaceSymbol<LazyArchive>(s, f, sym);
582    return;
583  }
584  auto *u = dyn_cast<Undefined>(s);
585  if (!u || u->weakAlias || s->pendingArchiveLoad)
586    return;
587  s->pendingArchiveLoad = true;
588  f->addMember(sym);
589}
590
591void SymbolTable::addLazyObject(InputFile *f, StringRef n) {
592  assert(f->lazy);
593  auto [s, wasInserted] = insert(n, f);
594  if (wasInserted) {
595    replaceSymbol<LazyObject>(s, f, n);
596    return;
597  }
598  auto *u = dyn_cast<Undefined>(s);
599  if (!u || u->weakAlias || s->pendingArchiveLoad)
600    return;
601  s->pendingArchiveLoad = true;
602  f->lazy = false;
603  addFile(f);
604}
605
606void SymbolTable::addLazyDLLSymbol(DLLFile *f, DLLFile::Symbol *sym,
607                                   StringRef n) {
608  auto [s, wasInserted] = insert(n);
609  if (wasInserted) {
610    replaceSymbol<LazyDLLSymbol>(s, f, sym, n);
611    return;
612  }
613  auto *u = dyn_cast<Undefined>(s);
614  if (!u || u->weakAlias || s->pendingArchiveLoad)
615    return;
616  s->pendingArchiveLoad = true;
617  f->makeImport(sym);
618}
619
620static std::string getSourceLocationBitcode(BitcodeFile *file) {
621  std::string res("\n>>> defined at ");
622  StringRef source = file->obj->getSourceFileName();
623  if (!source.empty())
624    res += source.str() + "\n>>>            ";
625  res += toString(file);
626  return res;
627}
628
629static std::string getSourceLocationObj(ObjFile *file, SectionChunk *sc,
630                                        uint32_t offset, StringRef name) {
631  std::optional<std::pair<StringRef, uint32_t>> fileLine;
632  if (sc)
633    fileLine = getFileLine(sc, offset);
634  if (!fileLine)
635    fileLine = file->getVariableLocation(name);
636
637  std::string res;
638  llvm::raw_string_ostream os(res);
639  os << "\n>>> defined at ";
640  if (fileLine)
641    os << fileLine->first << ":" << fileLine->second << "\n>>>            ";
642  os << toString(file);
643  return os.str();
644}
645
646static std::string getSourceLocation(InputFile *file, SectionChunk *sc,
647                                     uint32_t offset, StringRef name) {
648  if (!file)
649    return "";
650  if (auto *o = dyn_cast<ObjFile>(file))
651    return getSourceLocationObj(o, sc, offset, name);
652  if (auto *b = dyn_cast<BitcodeFile>(file))
653    return getSourceLocationBitcode(b);
654  return "\n>>> defined at " + toString(file);
655}
656
657// Construct and print an error message in the form of:
658//
659//   lld-link: error: duplicate symbol: foo
660//   >>> defined at bar.c:30
661//   >>>            bar.o
662//   >>> defined at baz.c:563
663//   >>>            baz.o
664void SymbolTable::reportDuplicate(Symbol *existing, InputFile *newFile,
665                                  SectionChunk *newSc,
666                                  uint32_t newSectionOffset) {
667  std::string msg;
668  llvm::raw_string_ostream os(msg);
669  os << "duplicate symbol: " << toString(ctx, *existing);
670
671  DefinedRegular *d = dyn_cast<DefinedRegular>(existing);
672  if (d && isa<ObjFile>(d->getFile())) {
673    os << getSourceLocation(d->getFile(), d->getChunk(), d->getValue(),
674                            existing->getName());
675  } else {
676    os << getSourceLocation(existing->getFile(), nullptr, 0, "");
677  }
678  os << getSourceLocation(newFile, newSc, newSectionOffset,
679                          existing->getName());
680
681  if (ctx.config.forceMultiple)
682    warn(os.str());
683  else
684    error(os.str());
685}
686
687Symbol *SymbolTable::addAbsolute(StringRef n, COFFSymbolRef sym) {
688  auto [s, wasInserted] = insert(n, nullptr);
689  s->isUsedInRegularObj = true;
690  if (wasInserted || isa<Undefined>(s) || s->isLazy())
691    replaceSymbol<DefinedAbsolute>(s, ctx, n, sym);
692  else if (auto *da = dyn_cast<DefinedAbsolute>(s)) {
693    if (da->getVA() != sym.getValue())
694      reportDuplicate(s, nullptr);
695  } else if (!isa<DefinedCOFF>(s))
696    reportDuplicate(s, nullptr);
697  return s;
698}
699
700Symbol *SymbolTable::addAbsolute(StringRef n, uint64_t va) {
701  auto [s, wasInserted] = insert(n, nullptr);
702  s->isUsedInRegularObj = true;
703  if (wasInserted || isa<Undefined>(s) || s->isLazy())
704    replaceSymbol<DefinedAbsolute>(s, ctx, n, va);
705  else if (auto *da = dyn_cast<DefinedAbsolute>(s)) {
706    if (da->getVA() != va)
707      reportDuplicate(s, nullptr);
708  } else if (!isa<DefinedCOFF>(s))
709    reportDuplicate(s, nullptr);
710  return s;
711}
712
713Symbol *SymbolTable::addSynthetic(StringRef n, Chunk *c) {
714  auto [s, wasInserted] = insert(n, nullptr);
715  s->isUsedInRegularObj = true;
716  if (wasInserted || isa<Undefined>(s) || s->isLazy())
717    replaceSymbol<DefinedSynthetic>(s, n, c);
718  else if (!isa<DefinedCOFF>(s))
719    reportDuplicate(s, nullptr);
720  return s;
721}
722
723Symbol *SymbolTable::addRegular(InputFile *f, StringRef n,
724                                const coff_symbol_generic *sym, SectionChunk *c,
725                                uint32_t sectionOffset, bool isWeak) {
726  auto [s, wasInserted] = insert(n, f);
727  if (wasInserted || !isa<DefinedRegular>(s) || s->isWeak)
728    replaceSymbol<DefinedRegular>(s, f, n, /*IsCOMDAT*/ false,
729                                  /*IsExternal*/ true, sym, c, isWeak);
730  else if (!isWeak)
731    reportDuplicate(s, f, c, sectionOffset);
732  return s;
733}
734
735std::pair<DefinedRegular *, bool>
736SymbolTable::addComdat(InputFile *f, StringRef n,
737                       const coff_symbol_generic *sym) {
738  auto [s, wasInserted] = insert(n, f);
739  if (wasInserted || !isa<DefinedRegular>(s)) {
740    replaceSymbol<DefinedRegular>(s, f, n, /*IsCOMDAT*/ true,
741                                  /*IsExternal*/ true, sym, nullptr);
742    return {cast<DefinedRegular>(s), true};
743  }
744  auto *existingSymbol = cast<DefinedRegular>(s);
745  if (!existingSymbol->isCOMDAT)
746    reportDuplicate(s, f);
747  return {existingSymbol, false};
748}
749
750Symbol *SymbolTable::addCommon(InputFile *f, StringRef n, uint64_t size,
751                               const coff_symbol_generic *sym, CommonChunk *c) {
752  auto [s, wasInserted] = insert(n, f);
753  if (wasInserted || !isa<DefinedCOFF>(s))
754    replaceSymbol<DefinedCommon>(s, f, n, size, sym, c);
755  else if (auto *dc = dyn_cast<DefinedCommon>(s))
756    if (size > dc->getSize())
757      replaceSymbol<DefinedCommon>(s, f, n, size, sym, c);
758  return s;
759}
760
761Symbol *SymbolTable::addImportData(StringRef n, ImportFile *f) {
762  auto [s, wasInserted] = insert(n, nullptr);
763  s->isUsedInRegularObj = true;
764  if (wasInserted || isa<Undefined>(s) || s->isLazy()) {
765    replaceSymbol<DefinedImportData>(s, n, f);
766    return s;
767  }
768
769  reportDuplicate(s, f);
770  return nullptr;
771}
772
773Symbol *SymbolTable::addImportThunk(StringRef name, DefinedImportData *id,
774                                    uint16_t machine) {
775  auto [s, wasInserted] = insert(name, nullptr);
776  s->isUsedInRegularObj = true;
777  if (wasInserted || isa<Undefined>(s) || s->isLazy()) {
778    replaceSymbol<DefinedImportThunk>(s, ctx, name, id, machine);
779    return s;
780  }
781
782  reportDuplicate(s, id->file);
783  return nullptr;
784}
785
786void SymbolTable::addLibcall(StringRef name) {
787  Symbol *sym = findUnderscore(name);
788  if (!sym)
789    return;
790
791  if (auto *l = dyn_cast<LazyArchive>(sym)) {
792    MemoryBufferRef mb = l->getMemberBuffer();
793    if (isBitcode(mb))
794      addUndefined(sym->getName());
795  } else if (LazyObject *o = dyn_cast<LazyObject>(sym)) {
796    if (isBitcode(o->file->mb))
797      addUndefined(sym->getName());
798  }
799}
800
801std::vector<Chunk *> SymbolTable::getChunks() const {
802  std::vector<Chunk *> res;
803  for (ObjFile *file : ctx.objFileInstances) {
804    ArrayRef<Chunk *> v = file->getChunks();
805    res.insert(res.end(), v.begin(), v.end());
806  }
807  return res;
808}
809
810Symbol *SymbolTable::find(StringRef name) const {
811  return symMap.lookup(CachedHashStringRef(name));
812}
813
814Symbol *SymbolTable::findUnderscore(StringRef name) const {
815  if (ctx.config.machine == I386)
816    return find(("_" + name).str());
817  return find(name);
818}
819
820// Return all symbols that start with Prefix, possibly ignoring the first
821// character of Prefix or the first character symbol.
822std::vector<Symbol *> SymbolTable::getSymsWithPrefix(StringRef prefix) {
823  std::vector<Symbol *> syms;
824  for (auto pair : symMap) {
825    StringRef name = pair.first.val();
826    if (name.starts_with(prefix) || name.starts_with(prefix.drop_front()) ||
827        name.drop_front().starts_with(prefix) ||
828        name.drop_front().starts_with(prefix.drop_front())) {
829      syms.push_back(pair.second);
830    }
831  }
832  return syms;
833}
834
835Symbol *SymbolTable::findMangle(StringRef name) {
836  if (Symbol *sym = find(name)) {
837    if (auto *u = dyn_cast<Undefined>(sym)) {
838      // We're specifically looking for weak aliases that ultimately resolve to
839      // defined symbols, hence the call to getWeakAlias() instead of just using
840      // the weakAlias member variable. This matches link.exe's behavior.
841      if (Symbol *weakAlias = u->getWeakAlias())
842        return weakAlias;
843    } else {
844      return sym;
845    }
846  }
847
848  // Efficient fuzzy string lookup is impossible with a hash table, so iterate
849  // the symbol table once and collect all possibly matching symbols into this
850  // vector. Then compare each possibly matching symbol with each possible
851  // mangling.
852  std::vector<Symbol *> syms = getSymsWithPrefix(name);
853  auto findByPrefix = [&syms](const Twine &t) -> Symbol * {
854    std::string prefix = t.str();
855    for (auto *s : syms)
856      if (s->getName().starts_with(prefix))
857        return s;
858    return nullptr;
859  };
860
861  // For non-x86, just look for C++ functions.
862  if (ctx.config.machine != I386)
863    return findByPrefix("?" + name + "@@Y");
864
865  if (!name.starts_with("_"))
866    return nullptr;
867  // Search for x86 stdcall function.
868  if (Symbol *s = findByPrefix(name + "@"))
869    return s;
870  // Search for x86 fastcall function.
871  if (Symbol *s = findByPrefix("@" + name.substr(1) + "@"))
872    return s;
873  // Search for x86 vectorcall function.
874  if (Symbol *s = findByPrefix(name.substr(1) + "@@"))
875    return s;
876  // Search for x86 C++ non-member function.
877  return findByPrefix("?" + name.substr(1) + "@@Y");
878}
879
880Symbol *SymbolTable::addUndefined(StringRef name) {
881  return addUndefined(name, nullptr, false);
882}
883
884void SymbolTable::compileBitcodeFiles() {
885  ltoCompilationDone = true;
886  if (ctx.bitcodeFileInstances.empty())
887    return;
888
889  llvm::TimeTraceScope timeScope("Compile bitcode");
890  ScopedTimer t(ctx.ltoTimer);
891  lto.reset(new BitcodeCompiler(ctx));
892  for (BitcodeFile *f : ctx.bitcodeFileInstances)
893    lto->add(*f);
894  for (InputFile *newObj : lto->compile()) {
895    ObjFile *obj = cast<ObjFile>(newObj);
896    obj->parse();
897    ctx.objFileInstances.push_back(obj);
898  }
899}
900
901} // namespace lld::coff
902