Internalize.cpp revision 360784
1//===-- Internalize.cpp - Mark functions internal -------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass loops over all of the functions and variables in the input module.
10// If the function or variable does not need to be preserved according to the
11// client supplied callback, it is marked as internal.
12//
13// This transformation would not be legal in a regular compilation, but it gets
14// extra information from the linker about what is safe.
15//
16// For example: Internalizing a function with external linkage. Only if we are
17// told it is only used from within this module, it is safe to do it.
18//
19//===----------------------------------------------------------------------===//
20
21#include "llvm/Transforms/IPO/Internalize.h"
22#include "llvm/ADT/SmallPtrSet.h"
23#include "llvm/ADT/Statistic.h"
24#include "llvm/ADT/StringSet.h"
25#include "llvm/Analysis/CallGraph.h"
26#include "llvm/IR/Module.h"
27#include "llvm/InitializePasses.h"
28#include "llvm/Pass.h"
29#include "llvm/Support/CommandLine.h"
30#include "llvm/Support/Debug.h"
31#include "llvm/Support/LineIterator.h"
32#include "llvm/Support/MemoryBuffer.h"
33#include "llvm/Support/raw_ostream.h"
34#include "llvm/Transforms/IPO.h"
35#include "llvm/Transforms/Utils/GlobalStatus.h"
36using namespace llvm;
37
38#define DEBUG_TYPE "internalize"
39
40STATISTIC(NumAliases, "Number of aliases internalized");
41STATISTIC(NumFunctions, "Number of functions internalized");
42STATISTIC(NumGlobals, "Number of global vars internalized");
43
44// APIFile - A file which contains a list of symbols that should not be marked
45// external.
46static cl::opt<std::string>
47    APIFile("internalize-public-api-file", cl::value_desc("filename"),
48            cl::desc("A file containing list of symbol names to preserve"));
49
50// APIList - A list of symbols that should not be marked internal.
51static cl::list<std::string>
52    APIList("internalize-public-api-list", cl::value_desc("list"),
53            cl::desc("A list of symbol names to preserve"), cl::CommaSeparated);
54
55namespace {
56// Helper to load an API list to preserve from file and expose it as a functor
57// for internalization.
58class PreserveAPIList {
59public:
60  PreserveAPIList() {
61    if (!APIFile.empty())
62      LoadFile(APIFile);
63    ExternalNames.insert(APIList.begin(), APIList.end());
64  }
65
66  bool operator()(const GlobalValue &GV) {
67    return ExternalNames.count(GV.getName());
68  }
69
70private:
71  // Contains the set of symbols loaded from file
72  StringSet<> ExternalNames;
73
74  void LoadFile(StringRef Filename) {
75    // Load the APIFile...
76    ErrorOr<std::unique_ptr<MemoryBuffer>> Buf =
77        MemoryBuffer::getFile(Filename);
78    if (!Buf) {
79      errs() << "WARNING: Internalize couldn't load file '" << Filename
80             << "'! Continuing as if it's empty.\n";
81      return; // Just continue as if the file were empty
82    }
83    for (line_iterator I(*Buf->get(), true), E; I != E; ++I)
84      ExternalNames.insert(*I);
85  }
86};
87} // end anonymous namespace
88
89bool InternalizePass::shouldPreserveGV(const GlobalValue &GV) {
90  // Function must be defined here
91  if (GV.isDeclaration())
92    return true;
93
94  // Available externally is really just a "declaration with a body".
95  if (GV.hasAvailableExternallyLinkage())
96    return true;
97
98  // Assume that dllexported symbols are referenced elsewhere
99  if (GV.hasDLLExportStorageClass())
100    return true;
101
102  // Already local, has nothing to do.
103  if (GV.hasLocalLinkage())
104    return false;
105
106  // Check some special cases
107  if (AlwaysPreserved.count(GV.getName()))
108    return true;
109
110  return MustPreserveGV(GV);
111}
112
113bool InternalizePass::maybeInternalize(
114    GlobalValue &GV, const DenseSet<const Comdat *> &ExternalComdats) {
115  if (Comdat *C = GV.getComdat()) {
116    if (ExternalComdats.count(C))
117      return false;
118
119    // If a comdat is not externally visible we can drop it.
120    if (auto GO = dyn_cast<GlobalObject>(&GV))
121      GO->setComdat(nullptr);
122
123    if (GV.hasLocalLinkage())
124      return false;
125  } else {
126    if (GV.hasLocalLinkage())
127      return false;
128
129    if (shouldPreserveGV(GV))
130      return false;
131  }
132
133  GV.setVisibility(GlobalValue::DefaultVisibility);
134  GV.setLinkage(GlobalValue::InternalLinkage);
135  return true;
136}
137
138// If GV is part of a comdat and is externally visible, keep track of its
139// comdat so that we don't internalize any of its members.
140void InternalizePass::checkComdatVisibility(
141    GlobalValue &GV, DenseSet<const Comdat *> &ExternalComdats) {
142  Comdat *C = GV.getComdat();
143  if (!C)
144    return;
145
146  if (shouldPreserveGV(GV))
147    ExternalComdats.insert(C);
148}
149
150bool InternalizePass::internalizeModule(Module &M, CallGraph *CG) {
151  bool Changed = false;
152  CallGraphNode *ExternalNode = CG ? CG->getExternalCallingNode() : nullptr;
153
154  SmallPtrSet<GlobalValue *, 8> Used;
155  collectUsedGlobalVariables(M, Used, false);
156
157  // Collect comdat visiblity information for the module.
158  DenseSet<const Comdat *> ExternalComdats;
159  if (!M.getComdatSymbolTable().empty()) {
160    for (Function &F : M)
161      checkComdatVisibility(F, ExternalComdats);
162    for (GlobalVariable &GV : M.globals())
163      checkComdatVisibility(GV, ExternalComdats);
164    for (GlobalAlias &GA : M.aliases())
165      checkComdatVisibility(GA, ExternalComdats);
166  }
167
168  // We must assume that globals in llvm.used have a reference that not even
169  // the linker can see, so we don't internalize them.
170  // For llvm.compiler.used the situation is a bit fuzzy. The assembler and
171  // linker can drop those symbols. If this pass is running as part of LTO,
172  // one might think that it could just drop llvm.compiler.used. The problem
173  // is that even in LTO llvm doesn't see every reference. For example,
174  // we don't see references from function local inline assembly. To be
175  // conservative, we internalize symbols in llvm.compiler.used, but we
176  // keep llvm.compiler.used so that the symbol is not deleted by llvm.
177  for (GlobalValue *V : Used) {
178    AlwaysPreserved.insert(V->getName());
179  }
180
181  // Mark all functions not in the api as internal.
182  for (Function &I : M) {
183    if (!maybeInternalize(I, ExternalComdats))
184      continue;
185    Changed = true;
186
187    if (ExternalNode)
188      // Remove a callgraph edge from the external node to this function.
189      ExternalNode->removeOneAbstractEdgeTo((*CG)[&I]);
190
191    ++NumFunctions;
192    LLVM_DEBUG(dbgs() << "Internalizing func " << I.getName() << "\n");
193  }
194
195  // Never internalize the llvm.used symbol.  It is used to implement
196  // attribute((used)).
197  // FIXME: Shouldn't this just filter on llvm.metadata section??
198  AlwaysPreserved.insert("llvm.used");
199  AlwaysPreserved.insert("llvm.compiler.used");
200
201  // Never internalize anchors used by the machine module info, else the info
202  // won't find them.  (see MachineModuleInfo.)
203  AlwaysPreserved.insert("llvm.global_ctors");
204  AlwaysPreserved.insert("llvm.global_dtors");
205  AlwaysPreserved.insert("llvm.global.annotations");
206
207  // Never internalize symbols code-gen inserts.
208  // FIXME: We should probably add this (and the __stack_chk_guard) via some
209  // type of call-back in CodeGen.
210  AlwaysPreserved.insert("__stack_chk_fail");
211  AlwaysPreserved.insert("__stack_chk_guard");
212
213  // Mark all global variables with initializers that are not in the api as
214  // internal as well.
215  for (auto &GV : M.globals()) {
216    if (!maybeInternalize(GV, ExternalComdats))
217      continue;
218    Changed = true;
219
220    ++NumGlobals;
221    LLVM_DEBUG(dbgs() << "Internalized gvar " << GV.getName() << "\n");
222  }
223
224  // Mark all aliases that are not in the api as internal as well.
225  for (auto &GA : M.aliases()) {
226    if (!maybeInternalize(GA, ExternalComdats))
227      continue;
228    Changed = true;
229
230    ++NumAliases;
231    LLVM_DEBUG(dbgs() << "Internalized alias " << GA.getName() << "\n");
232  }
233
234  return Changed;
235}
236
237InternalizePass::InternalizePass() : MustPreserveGV(PreserveAPIList()) {}
238
239PreservedAnalyses InternalizePass::run(Module &M, ModuleAnalysisManager &AM) {
240  if (!internalizeModule(M, AM.getCachedResult<CallGraphAnalysis>(M)))
241    return PreservedAnalyses::all();
242
243  PreservedAnalyses PA;
244  PA.preserve<CallGraphAnalysis>();
245  return PA;
246}
247
248namespace {
249class InternalizeLegacyPass : public ModulePass {
250  // Client supplied callback to control wheter a symbol must be preserved.
251  std::function<bool(const GlobalValue &)> MustPreserveGV;
252
253public:
254  static char ID; // Pass identification, replacement for typeid
255
256  InternalizeLegacyPass() : ModulePass(ID), MustPreserveGV(PreserveAPIList()) {}
257
258  InternalizeLegacyPass(std::function<bool(const GlobalValue &)> MustPreserveGV)
259      : ModulePass(ID), MustPreserveGV(std::move(MustPreserveGV)) {
260    initializeInternalizeLegacyPassPass(*PassRegistry::getPassRegistry());
261  }
262
263  bool runOnModule(Module &M) override {
264    if (skipModule(M))
265      return false;
266
267    CallGraphWrapperPass *CGPass =
268        getAnalysisIfAvailable<CallGraphWrapperPass>();
269    CallGraph *CG = CGPass ? &CGPass->getCallGraph() : nullptr;
270    return internalizeModule(M, MustPreserveGV, CG);
271  }
272
273  void getAnalysisUsage(AnalysisUsage &AU) const override {
274    AU.setPreservesCFG();
275    AU.addPreserved<CallGraphWrapperPass>();
276  }
277};
278}
279
280char InternalizeLegacyPass::ID = 0;
281INITIALIZE_PASS(InternalizeLegacyPass, "internalize",
282                "Internalize Global Symbols", false, false)
283
284ModulePass *llvm::createInternalizePass() {
285  return new InternalizeLegacyPass();
286}
287
288ModulePass *llvm::createInternalizePass(
289    std::function<bool(const GlobalValue &)> MustPreserveGV) {
290  return new InternalizeLegacyPass(std::move(MustPreserveGV));
291}
292