DataFlowSanitizer.cpp revision 360784
1//===- DataFlowSanitizer.cpp - dynamic data flow analysis -----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file is a part of DataFlowSanitizer, a generalised dynamic data flow
11/// analysis.
12///
13/// Unlike other Sanitizer tools, this tool is not designed to detect a specific
14/// class of bugs on its own.  Instead, it provides a generic dynamic data flow
15/// analysis framework to be used by clients to help detect application-specific
16/// issues within their own code.
17///
18/// The analysis is based on automatic propagation of data flow labels (also
19/// known as taint labels) through a program as it performs computation.  Each
20/// byte of application memory is backed by two bytes of shadow memory which
21/// hold the label.  On Linux/x86_64, memory is laid out as follows:
22///
23/// +--------------------+ 0x800000000000 (top of memory)
24/// | application memory |
25/// +--------------------+ 0x700000008000 (kAppAddr)
26/// |                    |
27/// |       unused       |
28/// |                    |
29/// +--------------------+ 0x200200000000 (kUnusedAddr)
30/// |    union table     |
31/// +--------------------+ 0x200000000000 (kUnionTableAddr)
32/// |   shadow memory    |
33/// +--------------------+ 0x000000010000 (kShadowAddr)
34/// | reserved by kernel |
35/// +--------------------+ 0x000000000000
36///
37/// To derive a shadow memory address from an application memory address,
38/// bits 44-46 are cleared to bring the address into the range
39/// [0x000000008000,0x100000000000).  Then the address is shifted left by 1 to
40/// account for the double byte representation of shadow labels and move the
41/// address into the shadow memory range.  See the function
42/// DataFlowSanitizer::getShadowAddress below.
43///
44/// For more information, please refer to the design document:
45/// http://clang.llvm.org/docs/DataFlowSanitizerDesign.html
46//
47//===----------------------------------------------------------------------===//
48
49#include "llvm/ADT/DenseMap.h"
50#include "llvm/ADT/DenseSet.h"
51#include "llvm/ADT/DepthFirstIterator.h"
52#include "llvm/ADT/None.h"
53#include "llvm/ADT/SmallPtrSet.h"
54#include "llvm/ADT/SmallVector.h"
55#include "llvm/ADT/StringExtras.h"
56#include "llvm/ADT/StringRef.h"
57#include "llvm/ADT/Triple.h"
58#include "llvm/Analysis/ValueTracking.h"
59#include "llvm/IR/Argument.h"
60#include "llvm/IR/Attributes.h"
61#include "llvm/IR/BasicBlock.h"
62#include "llvm/IR/CallSite.h"
63#include "llvm/IR/Constant.h"
64#include "llvm/IR/Constants.h"
65#include "llvm/IR/DataLayout.h"
66#include "llvm/IR/DerivedTypes.h"
67#include "llvm/IR/Dominators.h"
68#include "llvm/IR/Function.h"
69#include "llvm/IR/GlobalAlias.h"
70#include "llvm/IR/GlobalValue.h"
71#include "llvm/IR/GlobalVariable.h"
72#include "llvm/IR/IRBuilder.h"
73#include "llvm/IR/InlineAsm.h"
74#include "llvm/IR/InstVisitor.h"
75#include "llvm/IR/InstrTypes.h"
76#include "llvm/IR/Instruction.h"
77#include "llvm/IR/Instructions.h"
78#include "llvm/IR/IntrinsicInst.h"
79#include "llvm/IR/LLVMContext.h"
80#include "llvm/IR/MDBuilder.h"
81#include "llvm/IR/Module.h"
82#include "llvm/IR/Type.h"
83#include "llvm/IR/User.h"
84#include "llvm/IR/Value.h"
85#include "llvm/InitializePasses.h"
86#include "llvm/Pass.h"
87#include "llvm/Support/Casting.h"
88#include "llvm/Support/CommandLine.h"
89#include "llvm/Support/ErrorHandling.h"
90#include "llvm/Support/SpecialCaseList.h"
91#include "llvm/Support/VirtualFileSystem.h"
92#include "llvm/Transforms/Instrumentation.h"
93#include "llvm/Transforms/Utils/BasicBlockUtils.h"
94#include "llvm/Transforms/Utils/Local.h"
95#include <algorithm>
96#include <cassert>
97#include <cstddef>
98#include <cstdint>
99#include <iterator>
100#include <memory>
101#include <set>
102#include <string>
103#include <utility>
104#include <vector>
105
106using namespace llvm;
107
108// External symbol to be used when generating the shadow address for
109// architectures with multiple VMAs. Instead of using a constant integer
110// the runtime will set the external mask based on the VMA range.
111static const char *const kDFSanExternShadowPtrMask = "__dfsan_shadow_ptr_mask";
112
113// The -dfsan-preserve-alignment flag controls whether this pass assumes that
114// alignment requirements provided by the input IR are correct.  For example,
115// if the input IR contains a load with alignment 8, this flag will cause
116// the shadow load to have alignment 16.  This flag is disabled by default as
117// we have unfortunately encountered too much code (including Clang itself;
118// see PR14291) which performs misaligned access.
119static cl::opt<bool> ClPreserveAlignment(
120    "dfsan-preserve-alignment",
121    cl::desc("respect alignment requirements provided by input IR"), cl::Hidden,
122    cl::init(false));
123
124// The ABI list files control how shadow parameters are passed. The pass treats
125// every function labelled "uninstrumented" in the ABI list file as conforming
126// to the "native" (i.e. unsanitized) ABI.  Unless the ABI list contains
127// additional annotations for those functions, a call to one of those functions
128// will produce a warning message, as the labelling behaviour of the function is
129// unknown.  The other supported annotations are "functional" and "discard",
130// which are described below under DataFlowSanitizer::WrapperKind.
131static cl::list<std::string> ClABIListFiles(
132    "dfsan-abilist",
133    cl::desc("File listing native ABI functions and how the pass treats them"),
134    cl::Hidden);
135
136// Controls whether the pass uses IA_Args or IA_TLS as the ABI for instrumented
137// functions (see DataFlowSanitizer::InstrumentedABI below).
138static cl::opt<bool> ClArgsABI(
139    "dfsan-args-abi",
140    cl::desc("Use the argument ABI rather than the TLS ABI"),
141    cl::Hidden);
142
143// Controls whether the pass includes or ignores the labels of pointers in load
144// instructions.
145static cl::opt<bool> ClCombinePointerLabelsOnLoad(
146    "dfsan-combine-pointer-labels-on-load",
147    cl::desc("Combine the label of the pointer with the label of the data when "
148             "loading from memory."),
149    cl::Hidden, cl::init(true));
150
151// Controls whether the pass includes or ignores the labels of pointers in
152// stores instructions.
153static cl::opt<bool> ClCombinePointerLabelsOnStore(
154    "dfsan-combine-pointer-labels-on-store",
155    cl::desc("Combine the label of the pointer with the label of the data when "
156             "storing in memory."),
157    cl::Hidden, cl::init(false));
158
159static cl::opt<bool> ClDebugNonzeroLabels(
160    "dfsan-debug-nonzero-labels",
161    cl::desc("Insert calls to __dfsan_nonzero_label on observing a parameter, "
162             "load or return with a nonzero label"),
163    cl::Hidden);
164
165static StringRef GetGlobalTypeString(const GlobalValue &G) {
166  // Types of GlobalVariables are always pointer types.
167  Type *GType = G.getValueType();
168  // For now we support blacklisting struct types only.
169  if (StructType *SGType = dyn_cast<StructType>(GType)) {
170    if (!SGType->isLiteral())
171      return SGType->getName();
172  }
173  return "<unknown type>";
174}
175
176namespace {
177
178class DFSanABIList {
179  std::unique_ptr<SpecialCaseList> SCL;
180
181 public:
182  DFSanABIList() = default;
183
184  void set(std::unique_ptr<SpecialCaseList> List) { SCL = std::move(List); }
185
186  /// Returns whether either this function or its source file are listed in the
187  /// given category.
188  bool isIn(const Function &F, StringRef Category) const {
189    return isIn(*F.getParent(), Category) ||
190           SCL->inSection("dataflow", "fun", F.getName(), Category);
191  }
192
193  /// Returns whether this global alias is listed in the given category.
194  ///
195  /// If GA aliases a function, the alias's name is matched as a function name
196  /// would be.  Similarly, aliases of globals are matched like globals.
197  bool isIn(const GlobalAlias &GA, StringRef Category) const {
198    if (isIn(*GA.getParent(), Category))
199      return true;
200
201    if (isa<FunctionType>(GA.getValueType()))
202      return SCL->inSection("dataflow", "fun", GA.getName(), Category);
203
204    return SCL->inSection("dataflow", "global", GA.getName(), Category) ||
205           SCL->inSection("dataflow", "type", GetGlobalTypeString(GA),
206                          Category);
207  }
208
209  /// Returns whether this module is listed in the given category.
210  bool isIn(const Module &M, StringRef Category) const {
211    return SCL->inSection("dataflow", "src", M.getModuleIdentifier(), Category);
212  }
213};
214
215/// TransformedFunction is used to express the result of transforming one
216/// function type into another.  This struct is immutable.  It holds metadata
217/// useful for updating calls of the old function to the new type.
218struct TransformedFunction {
219  TransformedFunction(FunctionType* OriginalType,
220                      FunctionType* TransformedType,
221                      std::vector<unsigned> ArgumentIndexMapping)
222      : OriginalType(OriginalType),
223        TransformedType(TransformedType),
224        ArgumentIndexMapping(ArgumentIndexMapping) {}
225
226  // Disallow copies.
227  TransformedFunction(const TransformedFunction&) = delete;
228  TransformedFunction& operator=(const TransformedFunction&) = delete;
229
230  // Allow moves.
231  TransformedFunction(TransformedFunction&&) = default;
232  TransformedFunction& operator=(TransformedFunction&&) = default;
233
234  /// Type of the function before the transformation.
235  FunctionType *OriginalType;
236
237  /// Type of the function after the transformation.
238  FunctionType *TransformedType;
239
240  /// Transforming a function may change the position of arguments.  This
241  /// member records the mapping from each argument's old position to its new
242  /// position.  Argument positions are zero-indexed.  If the transformation
243  /// from F to F' made the first argument of F into the third argument of F',
244  /// then ArgumentIndexMapping[0] will equal 2.
245  std::vector<unsigned> ArgumentIndexMapping;
246};
247
248/// Given function attributes from a call site for the original function,
249/// return function attributes appropriate for a call to the transformed
250/// function.
251AttributeList TransformFunctionAttributes(
252    const TransformedFunction& TransformedFunction,
253    LLVMContext& Ctx, AttributeList CallSiteAttrs) {
254
255  // Construct a vector of AttributeSet for each function argument.
256  std::vector<llvm::AttributeSet> ArgumentAttributes(
257      TransformedFunction.TransformedType->getNumParams());
258
259  // Copy attributes from the parameter of the original function to the
260  // transformed version.  'ArgumentIndexMapping' holds the mapping from
261  // old argument position to new.
262  for (unsigned i=0, ie = TransformedFunction.ArgumentIndexMapping.size();
263       i < ie; ++i) {
264    unsigned TransformedIndex = TransformedFunction.ArgumentIndexMapping[i];
265    ArgumentAttributes[TransformedIndex] = CallSiteAttrs.getParamAttributes(i);
266  }
267
268  // Copy annotations on varargs arguments.
269  for (unsigned i = TransformedFunction.OriginalType->getNumParams(),
270       ie = CallSiteAttrs.getNumAttrSets(); i<ie; ++i) {
271    ArgumentAttributes.push_back(CallSiteAttrs.getParamAttributes(i));
272  }
273
274  return AttributeList::get(
275      Ctx,
276      CallSiteAttrs.getFnAttributes(),
277      CallSiteAttrs.getRetAttributes(),
278      llvm::makeArrayRef(ArgumentAttributes));
279}
280
281class DataFlowSanitizer : public ModulePass {
282  friend struct DFSanFunction;
283  friend class DFSanVisitor;
284
285  enum {
286    ShadowWidth = 16
287  };
288
289  /// Which ABI should be used for instrumented functions?
290  enum InstrumentedABI {
291    /// Argument and return value labels are passed through additional
292    /// arguments and by modifying the return type.
293    IA_Args,
294
295    /// Argument and return value labels are passed through TLS variables
296    /// __dfsan_arg_tls and __dfsan_retval_tls.
297    IA_TLS
298  };
299
300  /// How should calls to uninstrumented functions be handled?
301  enum WrapperKind {
302    /// This function is present in an uninstrumented form but we don't know
303    /// how it should be handled.  Print a warning and call the function anyway.
304    /// Don't label the return value.
305    WK_Warning,
306
307    /// This function does not write to (user-accessible) memory, and its return
308    /// value is unlabelled.
309    WK_Discard,
310
311    /// This function does not write to (user-accessible) memory, and the label
312    /// of its return value is the union of the label of its arguments.
313    WK_Functional,
314
315    /// Instead of calling the function, a custom wrapper __dfsw_F is called,
316    /// where F is the name of the function.  This function may wrap the
317    /// original function or provide its own implementation.  This is similar to
318    /// the IA_Args ABI, except that IA_Args uses a struct return type to
319    /// pass the return value shadow in a register, while WK_Custom uses an
320    /// extra pointer argument to return the shadow.  This allows the wrapped
321    /// form of the function type to be expressed in C.
322    WK_Custom
323  };
324
325  Module *Mod;
326  LLVMContext *Ctx;
327  IntegerType *ShadowTy;
328  PointerType *ShadowPtrTy;
329  IntegerType *IntptrTy;
330  ConstantInt *ZeroShadow;
331  ConstantInt *ShadowPtrMask;
332  ConstantInt *ShadowPtrMul;
333  Constant *ArgTLS;
334  Constant *RetvalTLS;
335  void *(*GetArgTLSPtr)();
336  void *(*GetRetvalTLSPtr)();
337  FunctionType *GetArgTLSTy;
338  FunctionType *GetRetvalTLSTy;
339  Constant *GetArgTLS;
340  Constant *GetRetvalTLS;
341  Constant *ExternalShadowMask;
342  FunctionType *DFSanUnionFnTy;
343  FunctionType *DFSanUnionLoadFnTy;
344  FunctionType *DFSanUnimplementedFnTy;
345  FunctionType *DFSanSetLabelFnTy;
346  FunctionType *DFSanNonzeroLabelFnTy;
347  FunctionType *DFSanVarargWrapperFnTy;
348  FunctionCallee DFSanUnionFn;
349  FunctionCallee DFSanCheckedUnionFn;
350  FunctionCallee DFSanUnionLoadFn;
351  FunctionCallee DFSanUnimplementedFn;
352  FunctionCallee DFSanSetLabelFn;
353  FunctionCallee DFSanNonzeroLabelFn;
354  FunctionCallee DFSanVarargWrapperFn;
355  MDNode *ColdCallWeights;
356  DFSanABIList ABIList;
357  DenseMap<Value *, Function *> UnwrappedFnMap;
358  AttrBuilder ReadOnlyNoneAttrs;
359  bool DFSanRuntimeShadowMask = false;
360
361  Value *getShadowAddress(Value *Addr, Instruction *Pos);
362  bool isInstrumented(const Function *F);
363  bool isInstrumented(const GlobalAlias *GA);
364  FunctionType *getArgsFunctionType(FunctionType *T);
365  FunctionType *getTrampolineFunctionType(FunctionType *T);
366  TransformedFunction getCustomFunctionType(FunctionType *T);
367  InstrumentedABI getInstrumentedABI();
368  WrapperKind getWrapperKind(Function *F);
369  void addGlobalNamePrefix(GlobalValue *GV);
370  Function *buildWrapperFunction(Function *F, StringRef NewFName,
371                                 GlobalValue::LinkageTypes NewFLink,
372                                 FunctionType *NewFT);
373  Constant *getOrBuildTrampolineFunction(FunctionType *FT, StringRef FName);
374
375public:
376  static char ID;
377
378  DataFlowSanitizer(
379      const std::vector<std::string> &ABIListFiles = std::vector<std::string>(),
380      void *(*getArgTLS)() = nullptr, void *(*getRetValTLS)() = nullptr);
381
382  bool doInitialization(Module &M) override;
383  bool runOnModule(Module &M) override;
384};
385
386struct DFSanFunction {
387  DataFlowSanitizer &DFS;
388  Function *F;
389  DominatorTree DT;
390  DataFlowSanitizer::InstrumentedABI IA;
391  bool IsNativeABI;
392  Value *ArgTLSPtr = nullptr;
393  Value *RetvalTLSPtr = nullptr;
394  AllocaInst *LabelReturnAlloca = nullptr;
395  DenseMap<Value *, Value *> ValShadowMap;
396  DenseMap<AllocaInst *, AllocaInst *> AllocaShadowMap;
397  std::vector<std::pair<PHINode *, PHINode *>> PHIFixups;
398  DenseSet<Instruction *> SkipInsts;
399  std::vector<Value *> NonZeroChecks;
400  bool AvoidNewBlocks;
401
402  struct CachedCombinedShadow {
403    BasicBlock *Block;
404    Value *Shadow;
405  };
406  DenseMap<std::pair<Value *, Value *>, CachedCombinedShadow>
407      CachedCombinedShadows;
408  DenseMap<Value *, std::set<Value *>> ShadowElements;
409
410  DFSanFunction(DataFlowSanitizer &DFS, Function *F, bool IsNativeABI)
411      : DFS(DFS), F(F), IA(DFS.getInstrumentedABI()), IsNativeABI(IsNativeABI) {
412    DT.recalculate(*F);
413    // FIXME: Need to track down the register allocator issue which causes poor
414    // performance in pathological cases with large numbers of basic blocks.
415    AvoidNewBlocks = F->size() > 1000;
416  }
417
418  Value *getArgTLSPtr();
419  Value *getArgTLS(unsigned Index, Instruction *Pos);
420  Value *getRetvalTLS();
421  Value *getShadow(Value *V);
422  void setShadow(Instruction *I, Value *Shadow);
423  Value *combineShadows(Value *V1, Value *V2, Instruction *Pos);
424  Value *combineOperandShadows(Instruction *Inst);
425  Value *loadShadow(Value *ShadowAddr, uint64_t Size, uint64_t Align,
426                    Instruction *Pos);
427  void storeShadow(Value *Addr, uint64_t Size, uint64_t Align, Value *Shadow,
428                   Instruction *Pos);
429};
430
431class DFSanVisitor : public InstVisitor<DFSanVisitor> {
432public:
433  DFSanFunction &DFSF;
434
435  DFSanVisitor(DFSanFunction &DFSF) : DFSF(DFSF) {}
436
437  const DataLayout &getDataLayout() const {
438    return DFSF.F->getParent()->getDataLayout();
439  }
440
441  void visitOperandShadowInst(Instruction &I);
442  void visitUnaryOperator(UnaryOperator &UO);
443  void visitBinaryOperator(BinaryOperator &BO);
444  void visitCastInst(CastInst &CI);
445  void visitCmpInst(CmpInst &CI);
446  void visitGetElementPtrInst(GetElementPtrInst &GEPI);
447  void visitLoadInst(LoadInst &LI);
448  void visitStoreInst(StoreInst &SI);
449  void visitReturnInst(ReturnInst &RI);
450  void visitCallSite(CallSite CS);
451  void visitPHINode(PHINode &PN);
452  void visitExtractElementInst(ExtractElementInst &I);
453  void visitInsertElementInst(InsertElementInst &I);
454  void visitShuffleVectorInst(ShuffleVectorInst &I);
455  void visitExtractValueInst(ExtractValueInst &I);
456  void visitInsertValueInst(InsertValueInst &I);
457  void visitAllocaInst(AllocaInst &I);
458  void visitSelectInst(SelectInst &I);
459  void visitMemSetInst(MemSetInst &I);
460  void visitMemTransferInst(MemTransferInst &I);
461};
462
463} // end anonymous namespace
464
465char DataFlowSanitizer::ID;
466
467INITIALIZE_PASS(DataFlowSanitizer, "dfsan",
468                "DataFlowSanitizer: dynamic data flow analysis.", false, false)
469
470ModulePass *
471llvm::createDataFlowSanitizerPass(const std::vector<std::string> &ABIListFiles,
472                                  void *(*getArgTLS)(),
473                                  void *(*getRetValTLS)()) {
474  return new DataFlowSanitizer(ABIListFiles, getArgTLS, getRetValTLS);
475}
476
477DataFlowSanitizer::DataFlowSanitizer(
478    const std::vector<std::string> &ABIListFiles, void *(*getArgTLS)(),
479    void *(*getRetValTLS)())
480    : ModulePass(ID), GetArgTLSPtr(getArgTLS), GetRetvalTLSPtr(getRetValTLS) {
481  std::vector<std::string> AllABIListFiles(std::move(ABIListFiles));
482  AllABIListFiles.insert(AllABIListFiles.end(), ClABIListFiles.begin(),
483                         ClABIListFiles.end());
484  // FIXME: should we propagate vfs::FileSystem to this constructor?
485  ABIList.set(
486      SpecialCaseList::createOrDie(AllABIListFiles, *vfs::getRealFileSystem()));
487}
488
489FunctionType *DataFlowSanitizer::getArgsFunctionType(FunctionType *T) {
490  SmallVector<Type *, 4> ArgTypes(T->param_begin(), T->param_end());
491  ArgTypes.append(T->getNumParams(), ShadowTy);
492  if (T->isVarArg())
493    ArgTypes.push_back(ShadowPtrTy);
494  Type *RetType = T->getReturnType();
495  if (!RetType->isVoidTy())
496    RetType = StructType::get(RetType, ShadowTy);
497  return FunctionType::get(RetType, ArgTypes, T->isVarArg());
498}
499
500FunctionType *DataFlowSanitizer::getTrampolineFunctionType(FunctionType *T) {
501  assert(!T->isVarArg());
502  SmallVector<Type *, 4> ArgTypes;
503  ArgTypes.push_back(T->getPointerTo());
504  ArgTypes.append(T->param_begin(), T->param_end());
505  ArgTypes.append(T->getNumParams(), ShadowTy);
506  Type *RetType = T->getReturnType();
507  if (!RetType->isVoidTy())
508    ArgTypes.push_back(ShadowPtrTy);
509  return FunctionType::get(T->getReturnType(), ArgTypes, false);
510}
511
512TransformedFunction DataFlowSanitizer::getCustomFunctionType(FunctionType *T) {
513  SmallVector<Type *, 4> ArgTypes;
514
515  // Some parameters of the custom function being constructed are
516  // parameters of T.  Record the mapping from parameters of T to
517  // parameters of the custom function, so that parameter attributes
518  // at call sites can be updated.
519  std::vector<unsigned> ArgumentIndexMapping;
520  for (unsigned i = 0, ie = T->getNumParams(); i != ie; ++i) {
521    Type* param_type = T->getParamType(i);
522    FunctionType *FT;
523    if (isa<PointerType>(param_type) && (FT = dyn_cast<FunctionType>(
524            cast<PointerType>(param_type)->getElementType()))) {
525      ArgumentIndexMapping.push_back(ArgTypes.size());
526      ArgTypes.push_back(getTrampolineFunctionType(FT)->getPointerTo());
527      ArgTypes.push_back(Type::getInt8PtrTy(*Ctx));
528    } else {
529      ArgumentIndexMapping.push_back(ArgTypes.size());
530      ArgTypes.push_back(param_type);
531    }
532  }
533  for (unsigned i = 0, e = T->getNumParams(); i != e; ++i)
534    ArgTypes.push_back(ShadowTy);
535  if (T->isVarArg())
536    ArgTypes.push_back(ShadowPtrTy);
537  Type *RetType = T->getReturnType();
538  if (!RetType->isVoidTy())
539    ArgTypes.push_back(ShadowPtrTy);
540  return TransformedFunction(
541      T, FunctionType::get(T->getReturnType(), ArgTypes, T->isVarArg()),
542      ArgumentIndexMapping);
543}
544
545bool DataFlowSanitizer::doInitialization(Module &M) {
546  Triple TargetTriple(M.getTargetTriple());
547  bool IsX86_64 = TargetTriple.getArch() == Triple::x86_64;
548  bool IsMIPS64 = TargetTriple.isMIPS64();
549  bool IsAArch64 = TargetTriple.getArch() == Triple::aarch64 ||
550                   TargetTriple.getArch() == Triple::aarch64_be;
551
552  const DataLayout &DL = M.getDataLayout();
553
554  Mod = &M;
555  Ctx = &M.getContext();
556  ShadowTy = IntegerType::get(*Ctx, ShadowWidth);
557  ShadowPtrTy = PointerType::getUnqual(ShadowTy);
558  IntptrTy = DL.getIntPtrType(*Ctx);
559  ZeroShadow = ConstantInt::getSigned(ShadowTy, 0);
560  ShadowPtrMul = ConstantInt::getSigned(IntptrTy, ShadowWidth / 8);
561  if (IsX86_64)
562    ShadowPtrMask = ConstantInt::getSigned(IntptrTy, ~0x700000000000LL);
563  else if (IsMIPS64)
564    ShadowPtrMask = ConstantInt::getSigned(IntptrTy, ~0xF000000000LL);
565  // AArch64 supports multiple VMAs and the shadow mask is set at runtime.
566  else if (IsAArch64)
567    DFSanRuntimeShadowMask = true;
568  else
569    report_fatal_error("unsupported triple");
570
571  Type *DFSanUnionArgs[2] = { ShadowTy, ShadowTy };
572  DFSanUnionFnTy =
573      FunctionType::get(ShadowTy, DFSanUnionArgs, /*isVarArg=*/ false);
574  Type *DFSanUnionLoadArgs[2] = { ShadowPtrTy, IntptrTy };
575  DFSanUnionLoadFnTy =
576      FunctionType::get(ShadowTy, DFSanUnionLoadArgs, /*isVarArg=*/ false);
577  DFSanUnimplementedFnTy = FunctionType::get(
578      Type::getVoidTy(*Ctx), Type::getInt8PtrTy(*Ctx), /*isVarArg=*/false);
579  Type *DFSanSetLabelArgs[3] = { ShadowTy, Type::getInt8PtrTy(*Ctx), IntptrTy };
580  DFSanSetLabelFnTy = FunctionType::get(Type::getVoidTy(*Ctx),
581                                        DFSanSetLabelArgs, /*isVarArg=*/false);
582  DFSanNonzeroLabelFnTy = FunctionType::get(
583      Type::getVoidTy(*Ctx), None, /*isVarArg=*/false);
584  DFSanVarargWrapperFnTy = FunctionType::get(
585      Type::getVoidTy(*Ctx), Type::getInt8PtrTy(*Ctx), /*isVarArg=*/false);
586
587  if (GetArgTLSPtr) {
588    Type *ArgTLSTy = ArrayType::get(ShadowTy, 64);
589    ArgTLS = nullptr;
590    GetArgTLSTy = FunctionType::get(PointerType::getUnqual(ArgTLSTy), false);
591    GetArgTLS = ConstantExpr::getIntToPtr(
592        ConstantInt::get(IntptrTy, uintptr_t(GetArgTLSPtr)),
593        PointerType::getUnqual(GetArgTLSTy));
594  }
595  if (GetRetvalTLSPtr) {
596    RetvalTLS = nullptr;
597    GetRetvalTLSTy = FunctionType::get(PointerType::getUnqual(ShadowTy), false);
598    GetRetvalTLS = ConstantExpr::getIntToPtr(
599        ConstantInt::get(IntptrTy, uintptr_t(GetRetvalTLSPtr)),
600        PointerType::getUnqual(GetRetvalTLSTy));
601  }
602
603  ColdCallWeights = MDBuilder(*Ctx).createBranchWeights(1, 1000);
604  return true;
605}
606
607bool DataFlowSanitizer::isInstrumented(const Function *F) {
608  return !ABIList.isIn(*F, "uninstrumented");
609}
610
611bool DataFlowSanitizer::isInstrumented(const GlobalAlias *GA) {
612  return !ABIList.isIn(*GA, "uninstrumented");
613}
614
615DataFlowSanitizer::InstrumentedABI DataFlowSanitizer::getInstrumentedABI() {
616  return ClArgsABI ? IA_Args : IA_TLS;
617}
618
619DataFlowSanitizer::WrapperKind DataFlowSanitizer::getWrapperKind(Function *F) {
620  if (ABIList.isIn(*F, "functional"))
621    return WK_Functional;
622  if (ABIList.isIn(*F, "discard"))
623    return WK_Discard;
624  if (ABIList.isIn(*F, "custom"))
625    return WK_Custom;
626
627  return WK_Warning;
628}
629
630void DataFlowSanitizer::addGlobalNamePrefix(GlobalValue *GV) {
631  std::string GVName = GV->getName(), Prefix = "dfs$";
632  GV->setName(Prefix + GVName);
633
634  // Try to change the name of the function in module inline asm.  We only do
635  // this for specific asm directives, currently only ".symver", to try to avoid
636  // corrupting asm which happens to contain the symbol name as a substring.
637  // Note that the substitution for .symver assumes that the versioned symbol
638  // also has an instrumented name.
639  std::string Asm = GV->getParent()->getModuleInlineAsm();
640  std::string SearchStr = ".symver " + GVName + ",";
641  size_t Pos = Asm.find(SearchStr);
642  if (Pos != std::string::npos) {
643    Asm.replace(Pos, SearchStr.size(),
644                ".symver " + Prefix + GVName + "," + Prefix);
645    GV->getParent()->setModuleInlineAsm(Asm);
646  }
647}
648
649Function *
650DataFlowSanitizer::buildWrapperFunction(Function *F, StringRef NewFName,
651                                        GlobalValue::LinkageTypes NewFLink,
652                                        FunctionType *NewFT) {
653  FunctionType *FT = F->getFunctionType();
654  Function *NewF = Function::Create(NewFT, NewFLink, F->getAddressSpace(),
655                                    NewFName, F->getParent());
656  NewF->copyAttributesFrom(F);
657  NewF->removeAttributes(
658      AttributeList::ReturnIndex,
659      AttributeFuncs::typeIncompatible(NewFT->getReturnType()));
660
661  BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", NewF);
662  if (F->isVarArg()) {
663    NewF->removeAttributes(AttributeList::FunctionIndex,
664                           AttrBuilder().addAttribute("split-stack"));
665    CallInst::Create(DFSanVarargWrapperFn,
666                     IRBuilder<>(BB).CreateGlobalStringPtr(F->getName()), "",
667                     BB);
668    new UnreachableInst(*Ctx, BB);
669  } else {
670    std::vector<Value *> Args;
671    unsigned n = FT->getNumParams();
672    for (Function::arg_iterator ai = NewF->arg_begin(); n != 0; ++ai, --n)
673      Args.push_back(&*ai);
674    CallInst *CI = CallInst::Create(F, Args, "", BB);
675    if (FT->getReturnType()->isVoidTy())
676      ReturnInst::Create(*Ctx, BB);
677    else
678      ReturnInst::Create(*Ctx, CI, BB);
679  }
680
681  return NewF;
682}
683
684Constant *DataFlowSanitizer::getOrBuildTrampolineFunction(FunctionType *FT,
685                                                          StringRef FName) {
686  FunctionType *FTT = getTrampolineFunctionType(FT);
687  FunctionCallee C = Mod->getOrInsertFunction(FName, FTT);
688  Function *F = dyn_cast<Function>(C.getCallee());
689  if (F && F->isDeclaration()) {
690    F->setLinkage(GlobalValue::LinkOnceODRLinkage);
691    BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", F);
692    std::vector<Value *> Args;
693    Function::arg_iterator AI = F->arg_begin(); ++AI;
694    for (unsigned N = FT->getNumParams(); N != 0; ++AI, --N)
695      Args.push_back(&*AI);
696    CallInst *CI = CallInst::Create(FT, &*F->arg_begin(), Args, "", BB);
697    ReturnInst *RI;
698    if (FT->getReturnType()->isVoidTy())
699      RI = ReturnInst::Create(*Ctx, BB);
700    else
701      RI = ReturnInst::Create(*Ctx, CI, BB);
702
703    DFSanFunction DFSF(*this, F, /*IsNativeABI=*/true);
704    Function::arg_iterator ValAI = F->arg_begin(), ShadowAI = AI; ++ValAI;
705    for (unsigned N = FT->getNumParams(); N != 0; ++ValAI, ++ShadowAI, --N)
706      DFSF.ValShadowMap[&*ValAI] = &*ShadowAI;
707    DFSanVisitor(DFSF).visitCallInst(*CI);
708    if (!FT->getReturnType()->isVoidTy())
709      new StoreInst(DFSF.getShadow(RI->getReturnValue()),
710                    &*std::prev(F->arg_end()), RI);
711  }
712
713  return cast<Constant>(C.getCallee());
714}
715
716bool DataFlowSanitizer::runOnModule(Module &M) {
717  if (ABIList.isIn(M, "skip"))
718    return false;
719
720  if (!GetArgTLSPtr) {
721    Type *ArgTLSTy = ArrayType::get(ShadowTy, 64);
722    ArgTLS = Mod->getOrInsertGlobal("__dfsan_arg_tls", ArgTLSTy);
723    if (GlobalVariable *G = dyn_cast<GlobalVariable>(ArgTLS))
724      G->setThreadLocalMode(GlobalVariable::InitialExecTLSModel);
725  }
726  if (!GetRetvalTLSPtr) {
727    RetvalTLS = Mod->getOrInsertGlobal("__dfsan_retval_tls", ShadowTy);
728    if (GlobalVariable *G = dyn_cast<GlobalVariable>(RetvalTLS))
729      G->setThreadLocalMode(GlobalVariable::InitialExecTLSModel);
730  }
731
732  ExternalShadowMask =
733      Mod->getOrInsertGlobal(kDFSanExternShadowPtrMask, IntptrTy);
734
735  {
736    AttributeList AL;
737    AL = AL.addAttribute(M.getContext(), AttributeList::FunctionIndex,
738                         Attribute::NoUnwind);
739    AL = AL.addAttribute(M.getContext(), AttributeList::FunctionIndex,
740                         Attribute::ReadNone);
741    AL = AL.addAttribute(M.getContext(), AttributeList::ReturnIndex,
742                         Attribute::ZExt);
743    AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
744    AL = AL.addParamAttribute(M.getContext(), 1, Attribute::ZExt);
745    DFSanUnionFn =
746        Mod->getOrInsertFunction("__dfsan_union", DFSanUnionFnTy, AL);
747  }
748
749  {
750    AttributeList AL;
751    AL = AL.addAttribute(M.getContext(), AttributeList::FunctionIndex,
752                         Attribute::NoUnwind);
753    AL = AL.addAttribute(M.getContext(), AttributeList::FunctionIndex,
754                         Attribute::ReadNone);
755    AL = AL.addAttribute(M.getContext(), AttributeList::ReturnIndex,
756                         Attribute::ZExt);
757    AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
758    AL = AL.addParamAttribute(M.getContext(), 1, Attribute::ZExt);
759    DFSanCheckedUnionFn =
760        Mod->getOrInsertFunction("dfsan_union", DFSanUnionFnTy, AL);
761  }
762  {
763    AttributeList AL;
764    AL = AL.addAttribute(M.getContext(), AttributeList::FunctionIndex,
765                         Attribute::NoUnwind);
766    AL = AL.addAttribute(M.getContext(), AttributeList::FunctionIndex,
767                         Attribute::ReadOnly);
768    AL = AL.addAttribute(M.getContext(), AttributeList::ReturnIndex,
769                         Attribute::ZExt);
770    DFSanUnionLoadFn =
771        Mod->getOrInsertFunction("__dfsan_union_load", DFSanUnionLoadFnTy, AL);
772  }
773  DFSanUnimplementedFn =
774      Mod->getOrInsertFunction("__dfsan_unimplemented", DFSanUnimplementedFnTy);
775  {
776    AttributeList AL;
777    AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
778    DFSanSetLabelFn =
779        Mod->getOrInsertFunction("__dfsan_set_label", DFSanSetLabelFnTy, AL);
780  }
781  DFSanNonzeroLabelFn =
782      Mod->getOrInsertFunction("__dfsan_nonzero_label", DFSanNonzeroLabelFnTy);
783  DFSanVarargWrapperFn = Mod->getOrInsertFunction("__dfsan_vararg_wrapper",
784                                                  DFSanVarargWrapperFnTy);
785
786  std::vector<Function *> FnsToInstrument;
787  SmallPtrSet<Function *, 2> FnsWithNativeABI;
788  for (Function &i : M) {
789    if (!i.isIntrinsic() &&
790        &i != DFSanUnionFn.getCallee()->stripPointerCasts() &&
791        &i != DFSanCheckedUnionFn.getCallee()->stripPointerCasts() &&
792        &i != DFSanUnionLoadFn.getCallee()->stripPointerCasts() &&
793        &i != DFSanUnimplementedFn.getCallee()->stripPointerCasts() &&
794        &i != DFSanSetLabelFn.getCallee()->stripPointerCasts() &&
795        &i != DFSanNonzeroLabelFn.getCallee()->stripPointerCasts() &&
796        &i != DFSanVarargWrapperFn.getCallee()->stripPointerCasts())
797      FnsToInstrument.push_back(&i);
798  }
799
800  // Give function aliases prefixes when necessary, and build wrappers where the
801  // instrumentedness is inconsistent.
802  for (Module::alias_iterator i = M.alias_begin(), e = M.alias_end(); i != e;) {
803    GlobalAlias *GA = &*i;
804    ++i;
805    // Don't stop on weak.  We assume people aren't playing games with the
806    // instrumentedness of overridden weak aliases.
807    if (auto F = dyn_cast<Function>(GA->getBaseObject())) {
808      bool GAInst = isInstrumented(GA), FInst = isInstrumented(F);
809      if (GAInst && FInst) {
810        addGlobalNamePrefix(GA);
811      } else if (GAInst != FInst) {
812        // Non-instrumented alias of an instrumented function, or vice versa.
813        // Replace the alias with a native-ABI wrapper of the aliasee.  The pass
814        // below will take care of instrumenting it.
815        Function *NewF =
816            buildWrapperFunction(F, "", GA->getLinkage(), F->getFunctionType());
817        GA->replaceAllUsesWith(ConstantExpr::getBitCast(NewF, GA->getType()));
818        NewF->takeName(GA);
819        GA->eraseFromParent();
820        FnsToInstrument.push_back(NewF);
821      }
822    }
823  }
824
825  ReadOnlyNoneAttrs.addAttribute(Attribute::ReadOnly)
826      .addAttribute(Attribute::ReadNone);
827
828  // First, change the ABI of every function in the module.  ABI-listed
829  // functions keep their original ABI and get a wrapper function.
830  for (std::vector<Function *>::iterator i = FnsToInstrument.begin(),
831                                         e = FnsToInstrument.end();
832       i != e; ++i) {
833    Function &F = **i;
834    FunctionType *FT = F.getFunctionType();
835
836    bool IsZeroArgsVoidRet = (FT->getNumParams() == 0 && !FT->isVarArg() &&
837                              FT->getReturnType()->isVoidTy());
838
839    if (isInstrumented(&F)) {
840      // Instrumented functions get a 'dfs$' prefix.  This allows us to more
841      // easily identify cases of mismatching ABIs.
842      if (getInstrumentedABI() == IA_Args && !IsZeroArgsVoidRet) {
843        FunctionType *NewFT = getArgsFunctionType(FT);
844        Function *NewF = Function::Create(NewFT, F.getLinkage(),
845                                          F.getAddressSpace(), "", &M);
846        NewF->copyAttributesFrom(&F);
847        NewF->removeAttributes(
848            AttributeList::ReturnIndex,
849            AttributeFuncs::typeIncompatible(NewFT->getReturnType()));
850        for (Function::arg_iterator FArg = F.arg_begin(),
851                                    NewFArg = NewF->arg_begin(),
852                                    FArgEnd = F.arg_end();
853             FArg != FArgEnd; ++FArg, ++NewFArg) {
854          FArg->replaceAllUsesWith(&*NewFArg);
855        }
856        NewF->getBasicBlockList().splice(NewF->begin(), F.getBasicBlockList());
857
858        for (Function::user_iterator UI = F.user_begin(), UE = F.user_end();
859             UI != UE;) {
860          BlockAddress *BA = dyn_cast<BlockAddress>(*UI);
861          ++UI;
862          if (BA) {
863            BA->replaceAllUsesWith(
864                BlockAddress::get(NewF, BA->getBasicBlock()));
865            delete BA;
866          }
867        }
868        F.replaceAllUsesWith(
869            ConstantExpr::getBitCast(NewF, PointerType::getUnqual(FT)));
870        NewF->takeName(&F);
871        F.eraseFromParent();
872        *i = NewF;
873        addGlobalNamePrefix(NewF);
874      } else {
875        addGlobalNamePrefix(&F);
876      }
877    } else if (!IsZeroArgsVoidRet || getWrapperKind(&F) == WK_Custom) {
878      // Build a wrapper function for F.  The wrapper simply calls F, and is
879      // added to FnsToInstrument so that any instrumentation according to its
880      // WrapperKind is done in the second pass below.
881      FunctionType *NewFT = getInstrumentedABI() == IA_Args
882                                ? getArgsFunctionType(FT)
883                                : FT;
884
885      // If the function being wrapped has local linkage, then preserve the
886      // function's linkage in the wrapper function.
887      GlobalValue::LinkageTypes wrapperLinkage =
888          F.hasLocalLinkage()
889              ? F.getLinkage()
890              : GlobalValue::LinkOnceODRLinkage;
891
892      Function *NewF = buildWrapperFunction(
893          &F, std::string("dfsw$") + std::string(F.getName()),
894          wrapperLinkage, NewFT);
895      if (getInstrumentedABI() == IA_TLS)
896        NewF->removeAttributes(AttributeList::FunctionIndex, ReadOnlyNoneAttrs);
897
898      Value *WrappedFnCst =
899          ConstantExpr::getBitCast(NewF, PointerType::getUnqual(FT));
900      F.replaceAllUsesWith(WrappedFnCst);
901
902      UnwrappedFnMap[WrappedFnCst] = &F;
903      *i = NewF;
904
905      if (!F.isDeclaration()) {
906        // This function is probably defining an interposition of an
907        // uninstrumented function and hence needs to keep the original ABI.
908        // But any functions it may call need to use the instrumented ABI, so
909        // we instrument it in a mode which preserves the original ABI.
910        FnsWithNativeABI.insert(&F);
911
912        // This code needs to rebuild the iterators, as they may be invalidated
913        // by the push_back, taking care that the new range does not include
914        // any functions added by this code.
915        size_t N = i - FnsToInstrument.begin(),
916               Count = e - FnsToInstrument.begin();
917        FnsToInstrument.push_back(&F);
918        i = FnsToInstrument.begin() + N;
919        e = FnsToInstrument.begin() + Count;
920      }
921               // Hopefully, nobody will try to indirectly call a vararg
922               // function... yet.
923    } else if (FT->isVarArg()) {
924      UnwrappedFnMap[&F] = &F;
925      *i = nullptr;
926    }
927  }
928
929  for (Function *i : FnsToInstrument) {
930    if (!i || i->isDeclaration())
931      continue;
932
933    removeUnreachableBlocks(*i);
934
935    DFSanFunction DFSF(*this, i, FnsWithNativeABI.count(i));
936
937    // DFSanVisitor may create new basic blocks, which confuses df_iterator.
938    // Build a copy of the list before iterating over it.
939    SmallVector<BasicBlock *, 4> BBList(depth_first(&i->getEntryBlock()));
940
941    for (BasicBlock *i : BBList) {
942      Instruction *Inst = &i->front();
943      while (true) {
944        // DFSanVisitor may split the current basic block, changing the current
945        // instruction's next pointer and moving the next instruction to the
946        // tail block from which we should continue.
947        Instruction *Next = Inst->getNextNode();
948        // DFSanVisitor may delete Inst, so keep track of whether it was a
949        // terminator.
950        bool IsTerminator = Inst->isTerminator();
951        if (!DFSF.SkipInsts.count(Inst))
952          DFSanVisitor(DFSF).visit(Inst);
953        if (IsTerminator)
954          break;
955        Inst = Next;
956      }
957    }
958
959    // We will not necessarily be able to compute the shadow for every phi node
960    // until we have visited every block.  Therefore, the code that handles phi
961    // nodes adds them to the PHIFixups list so that they can be properly
962    // handled here.
963    for (std::vector<std::pair<PHINode *, PHINode *>>::iterator
964             i = DFSF.PHIFixups.begin(),
965             e = DFSF.PHIFixups.end();
966         i != e; ++i) {
967      for (unsigned val = 0, n = i->first->getNumIncomingValues(); val != n;
968           ++val) {
969        i->second->setIncomingValue(
970            val, DFSF.getShadow(i->first->getIncomingValue(val)));
971      }
972    }
973
974    // -dfsan-debug-nonzero-labels will split the CFG in all kinds of crazy
975    // places (i.e. instructions in basic blocks we haven't even begun visiting
976    // yet).  To make our life easier, do this work in a pass after the main
977    // instrumentation.
978    if (ClDebugNonzeroLabels) {
979      for (Value *V : DFSF.NonZeroChecks) {
980        Instruction *Pos;
981        if (Instruction *I = dyn_cast<Instruction>(V))
982          Pos = I->getNextNode();
983        else
984          Pos = &DFSF.F->getEntryBlock().front();
985        while (isa<PHINode>(Pos) || isa<AllocaInst>(Pos))
986          Pos = Pos->getNextNode();
987        IRBuilder<> IRB(Pos);
988        Value *Ne = IRB.CreateICmpNE(V, DFSF.DFS.ZeroShadow);
989        BranchInst *BI = cast<BranchInst>(SplitBlockAndInsertIfThen(
990            Ne, Pos, /*Unreachable=*/false, ColdCallWeights));
991        IRBuilder<> ThenIRB(BI);
992        ThenIRB.CreateCall(DFSF.DFS.DFSanNonzeroLabelFn, {});
993      }
994    }
995  }
996
997  return false;
998}
999
1000Value *DFSanFunction::getArgTLSPtr() {
1001  if (ArgTLSPtr)
1002    return ArgTLSPtr;
1003  if (DFS.ArgTLS)
1004    return ArgTLSPtr = DFS.ArgTLS;
1005
1006  IRBuilder<> IRB(&F->getEntryBlock().front());
1007  return ArgTLSPtr = IRB.CreateCall(DFS.GetArgTLSTy, DFS.GetArgTLS, {});
1008}
1009
1010Value *DFSanFunction::getRetvalTLS() {
1011  if (RetvalTLSPtr)
1012    return RetvalTLSPtr;
1013  if (DFS.RetvalTLS)
1014    return RetvalTLSPtr = DFS.RetvalTLS;
1015
1016  IRBuilder<> IRB(&F->getEntryBlock().front());
1017  return RetvalTLSPtr =
1018             IRB.CreateCall(DFS.GetRetvalTLSTy, DFS.GetRetvalTLS, {});
1019}
1020
1021Value *DFSanFunction::getArgTLS(unsigned Idx, Instruction *Pos) {
1022  IRBuilder<> IRB(Pos);
1023  return IRB.CreateConstGEP2_64(ArrayType::get(DFS.ShadowTy, 64),
1024                                getArgTLSPtr(), 0, Idx);
1025}
1026
1027Value *DFSanFunction::getShadow(Value *V) {
1028  if (!isa<Argument>(V) && !isa<Instruction>(V))
1029    return DFS.ZeroShadow;
1030  Value *&Shadow = ValShadowMap[V];
1031  if (!Shadow) {
1032    if (Argument *A = dyn_cast<Argument>(V)) {
1033      if (IsNativeABI)
1034        return DFS.ZeroShadow;
1035      switch (IA) {
1036      case DataFlowSanitizer::IA_TLS: {
1037        Value *ArgTLSPtr = getArgTLSPtr();
1038        Instruction *ArgTLSPos =
1039            DFS.ArgTLS ? &*F->getEntryBlock().begin()
1040                       : cast<Instruction>(ArgTLSPtr)->getNextNode();
1041        IRBuilder<> IRB(ArgTLSPos);
1042        Shadow =
1043            IRB.CreateLoad(DFS.ShadowTy, getArgTLS(A->getArgNo(), ArgTLSPos));
1044        break;
1045      }
1046      case DataFlowSanitizer::IA_Args: {
1047        unsigned ArgIdx = A->getArgNo() + F->arg_size() / 2;
1048        Function::arg_iterator i = F->arg_begin();
1049        while (ArgIdx--)
1050          ++i;
1051        Shadow = &*i;
1052        assert(Shadow->getType() == DFS.ShadowTy);
1053        break;
1054      }
1055      }
1056      NonZeroChecks.push_back(Shadow);
1057    } else {
1058      Shadow = DFS.ZeroShadow;
1059    }
1060  }
1061  return Shadow;
1062}
1063
1064void DFSanFunction::setShadow(Instruction *I, Value *Shadow) {
1065  assert(!ValShadowMap.count(I));
1066  assert(Shadow->getType() == DFS.ShadowTy);
1067  ValShadowMap[I] = Shadow;
1068}
1069
1070Value *DataFlowSanitizer::getShadowAddress(Value *Addr, Instruction *Pos) {
1071  assert(Addr != RetvalTLS && "Reinstrumenting?");
1072  IRBuilder<> IRB(Pos);
1073  Value *ShadowPtrMaskValue;
1074  if (DFSanRuntimeShadowMask)
1075    ShadowPtrMaskValue = IRB.CreateLoad(IntptrTy, ExternalShadowMask);
1076  else
1077    ShadowPtrMaskValue = ShadowPtrMask;
1078  return IRB.CreateIntToPtr(
1079      IRB.CreateMul(
1080          IRB.CreateAnd(IRB.CreatePtrToInt(Addr, IntptrTy),
1081                        IRB.CreatePtrToInt(ShadowPtrMaskValue, IntptrTy)),
1082          ShadowPtrMul),
1083      ShadowPtrTy);
1084}
1085
1086// Generates IR to compute the union of the two given shadows, inserting it
1087// before Pos.  Returns the computed union Value.
1088Value *DFSanFunction::combineShadows(Value *V1, Value *V2, Instruction *Pos) {
1089  if (V1 == DFS.ZeroShadow)
1090    return V2;
1091  if (V2 == DFS.ZeroShadow)
1092    return V1;
1093  if (V1 == V2)
1094    return V1;
1095
1096  auto V1Elems = ShadowElements.find(V1);
1097  auto V2Elems = ShadowElements.find(V2);
1098  if (V1Elems != ShadowElements.end() && V2Elems != ShadowElements.end()) {
1099    if (std::includes(V1Elems->second.begin(), V1Elems->second.end(),
1100                      V2Elems->second.begin(), V2Elems->second.end())) {
1101      return V1;
1102    } else if (std::includes(V2Elems->second.begin(), V2Elems->second.end(),
1103                             V1Elems->second.begin(), V1Elems->second.end())) {
1104      return V2;
1105    }
1106  } else if (V1Elems != ShadowElements.end()) {
1107    if (V1Elems->second.count(V2))
1108      return V1;
1109  } else if (V2Elems != ShadowElements.end()) {
1110    if (V2Elems->second.count(V1))
1111      return V2;
1112  }
1113
1114  auto Key = std::make_pair(V1, V2);
1115  if (V1 > V2)
1116    std::swap(Key.first, Key.second);
1117  CachedCombinedShadow &CCS = CachedCombinedShadows[Key];
1118  if (CCS.Block && DT.dominates(CCS.Block, Pos->getParent()))
1119    return CCS.Shadow;
1120
1121  IRBuilder<> IRB(Pos);
1122  if (AvoidNewBlocks) {
1123    CallInst *Call = IRB.CreateCall(DFS.DFSanCheckedUnionFn, {V1, V2});
1124    Call->addAttribute(AttributeList::ReturnIndex, Attribute::ZExt);
1125    Call->addParamAttr(0, Attribute::ZExt);
1126    Call->addParamAttr(1, Attribute::ZExt);
1127
1128    CCS.Block = Pos->getParent();
1129    CCS.Shadow = Call;
1130  } else {
1131    BasicBlock *Head = Pos->getParent();
1132    Value *Ne = IRB.CreateICmpNE(V1, V2);
1133    BranchInst *BI = cast<BranchInst>(SplitBlockAndInsertIfThen(
1134        Ne, Pos, /*Unreachable=*/false, DFS.ColdCallWeights, &DT));
1135    IRBuilder<> ThenIRB(BI);
1136    CallInst *Call = ThenIRB.CreateCall(DFS.DFSanUnionFn, {V1, V2});
1137    Call->addAttribute(AttributeList::ReturnIndex, Attribute::ZExt);
1138    Call->addParamAttr(0, Attribute::ZExt);
1139    Call->addParamAttr(1, Attribute::ZExt);
1140
1141    BasicBlock *Tail = BI->getSuccessor(0);
1142    PHINode *Phi = PHINode::Create(DFS.ShadowTy, 2, "", &Tail->front());
1143    Phi->addIncoming(Call, Call->getParent());
1144    Phi->addIncoming(V1, Head);
1145
1146    CCS.Block = Tail;
1147    CCS.Shadow = Phi;
1148  }
1149
1150  std::set<Value *> UnionElems;
1151  if (V1Elems != ShadowElements.end()) {
1152    UnionElems = V1Elems->second;
1153  } else {
1154    UnionElems.insert(V1);
1155  }
1156  if (V2Elems != ShadowElements.end()) {
1157    UnionElems.insert(V2Elems->second.begin(), V2Elems->second.end());
1158  } else {
1159    UnionElems.insert(V2);
1160  }
1161  ShadowElements[CCS.Shadow] = std::move(UnionElems);
1162
1163  return CCS.Shadow;
1164}
1165
1166// A convenience function which folds the shadows of each of the operands
1167// of the provided instruction Inst, inserting the IR before Inst.  Returns
1168// the computed union Value.
1169Value *DFSanFunction::combineOperandShadows(Instruction *Inst) {
1170  if (Inst->getNumOperands() == 0)
1171    return DFS.ZeroShadow;
1172
1173  Value *Shadow = getShadow(Inst->getOperand(0));
1174  for (unsigned i = 1, n = Inst->getNumOperands(); i != n; ++i) {
1175    Shadow = combineShadows(Shadow, getShadow(Inst->getOperand(i)), Inst);
1176  }
1177  return Shadow;
1178}
1179
1180void DFSanVisitor::visitOperandShadowInst(Instruction &I) {
1181  Value *CombinedShadow = DFSF.combineOperandShadows(&I);
1182  DFSF.setShadow(&I, CombinedShadow);
1183}
1184
1185// Generates IR to load shadow corresponding to bytes [Addr, Addr+Size), where
1186// Addr has alignment Align, and take the union of each of those shadows.
1187Value *DFSanFunction::loadShadow(Value *Addr, uint64_t Size, uint64_t Align,
1188                                 Instruction *Pos) {
1189  if (AllocaInst *AI = dyn_cast<AllocaInst>(Addr)) {
1190    const auto i = AllocaShadowMap.find(AI);
1191    if (i != AllocaShadowMap.end()) {
1192      IRBuilder<> IRB(Pos);
1193      return IRB.CreateLoad(DFS.ShadowTy, i->second);
1194    }
1195  }
1196
1197  uint64_t ShadowAlign = Align * DFS.ShadowWidth / 8;
1198  SmallVector<const Value *, 2> Objs;
1199  GetUnderlyingObjects(Addr, Objs, Pos->getModule()->getDataLayout());
1200  bool AllConstants = true;
1201  for (const Value *Obj : Objs) {
1202    if (isa<Function>(Obj) || isa<BlockAddress>(Obj))
1203      continue;
1204    if (isa<GlobalVariable>(Obj) && cast<GlobalVariable>(Obj)->isConstant())
1205      continue;
1206
1207    AllConstants = false;
1208    break;
1209  }
1210  if (AllConstants)
1211    return DFS.ZeroShadow;
1212
1213  Value *ShadowAddr = DFS.getShadowAddress(Addr, Pos);
1214  switch (Size) {
1215  case 0:
1216    return DFS.ZeroShadow;
1217  case 1: {
1218    LoadInst *LI = new LoadInst(DFS.ShadowTy, ShadowAddr, "", Pos);
1219    LI->setAlignment(MaybeAlign(ShadowAlign));
1220    return LI;
1221  }
1222  case 2: {
1223    IRBuilder<> IRB(Pos);
1224    Value *ShadowAddr1 = IRB.CreateGEP(DFS.ShadowTy, ShadowAddr,
1225                                       ConstantInt::get(DFS.IntptrTy, 1));
1226    return combineShadows(
1227        IRB.CreateAlignedLoad(DFS.ShadowTy, ShadowAddr, ShadowAlign),
1228        IRB.CreateAlignedLoad(DFS.ShadowTy, ShadowAddr1, ShadowAlign), Pos);
1229  }
1230  }
1231  if (!AvoidNewBlocks && Size % (64 / DFS.ShadowWidth) == 0) {
1232    // Fast path for the common case where each byte has identical shadow: load
1233    // shadow 64 bits at a time, fall out to a __dfsan_union_load call if any
1234    // shadow is non-equal.
1235    BasicBlock *FallbackBB = BasicBlock::Create(*DFS.Ctx, "", F);
1236    IRBuilder<> FallbackIRB(FallbackBB);
1237    CallInst *FallbackCall = FallbackIRB.CreateCall(
1238        DFS.DFSanUnionLoadFn,
1239        {ShadowAddr, ConstantInt::get(DFS.IntptrTy, Size)});
1240    FallbackCall->addAttribute(AttributeList::ReturnIndex, Attribute::ZExt);
1241
1242    // Compare each of the shadows stored in the loaded 64 bits to each other,
1243    // by computing (WideShadow rotl ShadowWidth) == WideShadow.
1244    IRBuilder<> IRB(Pos);
1245    Value *WideAddr =
1246        IRB.CreateBitCast(ShadowAddr, Type::getInt64PtrTy(*DFS.Ctx));
1247    Value *WideShadow =
1248        IRB.CreateAlignedLoad(IRB.getInt64Ty(), WideAddr, ShadowAlign);
1249    Value *TruncShadow = IRB.CreateTrunc(WideShadow, DFS.ShadowTy);
1250    Value *ShlShadow = IRB.CreateShl(WideShadow, DFS.ShadowWidth);
1251    Value *ShrShadow = IRB.CreateLShr(WideShadow, 64 - DFS.ShadowWidth);
1252    Value *RotShadow = IRB.CreateOr(ShlShadow, ShrShadow);
1253    Value *ShadowsEq = IRB.CreateICmpEQ(WideShadow, RotShadow);
1254
1255    BasicBlock *Head = Pos->getParent();
1256    BasicBlock *Tail = Head->splitBasicBlock(Pos->getIterator());
1257
1258    if (DomTreeNode *OldNode = DT.getNode(Head)) {
1259      std::vector<DomTreeNode *> Children(OldNode->begin(), OldNode->end());
1260
1261      DomTreeNode *NewNode = DT.addNewBlock(Tail, Head);
1262      for (auto Child : Children)
1263        DT.changeImmediateDominator(Child, NewNode);
1264    }
1265
1266    // In the following code LastBr will refer to the previous basic block's
1267    // conditional branch instruction, whose true successor is fixed up to point
1268    // to the next block during the loop below or to the tail after the final
1269    // iteration.
1270    BranchInst *LastBr = BranchInst::Create(FallbackBB, FallbackBB, ShadowsEq);
1271    ReplaceInstWithInst(Head->getTerminator(), LastBr);
1272    DT.addNewBlock(FallbackBB, Head);
1273
1274    for (uint64_t Ofs = 64 / DFS.ShadowWidth; Ofs != Size;
1275         Ofs += 64 / DFS.ShadowWidth) {
1276      BasicBlock *NextBB = BasicBlock::Create(*DFS.Ctx, "", F);
1277      DT.addNewBlock(NextBB, LastBr->getParent());
1278      IRBuilder<> NextIRB(NextBB);
1279      WideAddr = NextIRB.CreateGEP(Type::getInt64Ty(*DFS.Ctx), WideAddr,
1280                                   ConstantInt::get(DFS.IntptrTy, 1));
1281      Value *NextWideShadow = NextIRB.CreateAlignedLoad(NextIRB.getInt64Ty(),
1282                                                        WideAddr, ShadowAlign);
1283      ShadowsEq = NextIRB.CreateICmpEQ(WideShadow, NextWideShadow);
1284      LastBr->setSuccessor(0, NextBB);
1285      LastBr = NextIRB.CreateCondBr(ShadowsEq, FallbackBB, FallbackBB);
1286    }
1287
1288    LastBr->setSuccessor(0, Tail);
1289    FallbackIRB.CreateBr(Tail);
1290    PHINode *Shadow = PHINode::Create(DFS.ShadowTy, 2, "", &Tail->front());
1291    Shadow->addIncoming(FallbackCall, FallbackBB);
1292    Shadow->addIncoming(TruncShadow, LastBr->getParent());
1293    return Shadow;
1294  }
1295
1296  IRBuilder<> IRB(Pos);
1297  CallInst *FallbackCall = IRB.CreateCall(
1298      DFS.DFSanUnionLoadFn, {ShadowAddr, ConstantInt::get(DFS.IntptrTy, Size)});
1299  FallbackCall->addAttribute(AttributeList::ReturnIndex, Attribute::ZExt);
1300  return FallbackCall;
1301}
1302
1303void DFSanVisitor::visitLoadInst(LoadInst &LI) {
1304  auto &DL = LI.getModule()->getDataLayout();
1305  uint64_t Size = DL.getTypeStoreSize(LI.getType());
1306  if (Size == 0) {
1307    DFSF.setShadow(&LI, DFSF.DFS.ZeroShadow);
1308    return;
1309  }
1310
1311  uint64_t Align;
1312  if (ClPreserveAlignment) {
1313    Align = LI.getAlignment();
1314    if (Align == 0)
1315      Align = DL.getABITypeAlignment(LI.getType());
1316  } else {
1317    Align = 1;
1318  }
1319  IRBuilder<> IRB(&LI);
1320  Value *Shadow = DFSF.loadShadow(LI.getPointerOperand(), Size, Align, &LI);
1321  if (ClCombinePointerLabelsOnLoad) {
1322    Value *PtrShadow = DFSF.getShadow(LI.getPointerOperand());
1323    Shadow = DFSF.combineShadows(Shadow, PtrShadow, &LI);
1324  }
1325  if (Shadow != DFSF.DFS.ZeroShadow)
1326    DFSF.NonZeroChecks.push_back(Shadow);
1327
1328  DFSF.setShadow(&LI, Shadow);
1329}
1330
1331void DFSanFunction::storeShadow(Value *Addr, uint64_t Size, uint64_t Align,
1332                                Value *Shadow, Instruction *Pos) {
1333  if (AllocaInst *AI = dyn_cast<AllocaInst>(Addr)) {
1334    const auto i = AllocaShadowMap.find(AI);
1335    if (i != AllocaShadowMap.end()) {
1336      IRBuilder<> IRB(Pos);
1337      IRB.CreateStore(Shadow, i->second);
1338      return;
1339    }
1340  }
1341
1342  uint64_t ShadowAlign = Align * DFS.ShadowWidth / 8;
1343  IRBuilder<> IRB(Pos);
1344  Value *ShadowAddr = DFS.getShadowAddress(Addr, Pos);
1345  if (Shadow == DFS.ZeroShadow) {
1346    IntegerType *ShadowTy = IntegerType::get(*DFS.Ctx, Size * DFS.ShadowWidth);
1347    Value *ExtZeroShadow = ConstantInt::get(ShadowTy, 0);
1348    Value *ExtShadowAddr =
1349        IRB.CreateBitCast(ShadowAddr, PointerType::getUnqual(ShadowTy));
1350    IRB.CreateAlignedStore(ExtZeroShadow, ExtShadowAddr, ShadowAlign);
1351    return;
1352  }
1353
1354  const unsigned ShadowVecSize = 128 / DFS.ShadowWidth;
1355  uint64_t Offset = 0;
1356  if (Size >= ShadowVecSize) {
1357    VectorType *ShadowVecTy = VectorType::get(DFS.ShadowTy, ShadowVecSize);
1358    Value *ShadowVec = UndefValue::get(ShadowVecTy);
1359    for (unsigned i = 0; i != ShadowVecSize; ++i) {
1360      ShadowVec = IRB.CreateInsertElement(
1361          ShadowVec, Shadow, ConstantInt::get(Type::getInt32Ty(*DFS.Ctx), i));
1362    }
1363    Value *ShadowVecAddr =
1364        IRB.CreateBitCast(ShadowAddr, PointerType::getUnqual(ShadowVecTy));
1365    do {
1366      Value *CurShadowVecAddr =
1367          IRB.CreateConstGEP1_32(ShadowVecTy, ShadowVecAddr, Offset);
1368      IRB.CreateAlignedStore(ShadowVec, CurShadowVecAddr, ShadowAlign);
1369      Size -= ShadowVecSize;
1370      ++Offset;
1371    } while (Size >= ShadowVecSize);
1372    Offset *= ShadowVecSize;
1373  }
1374  while (Size > 0) {
1375    Value *CurShadowAddr =
1376        IRB.CreateConstGEP1_32(DFS.ShadowTy, ShadowAddr, Offset);
1377    IRB.CreateAlignedStore(Shadow, CurShadowAddr, ShadowAlign);
1378    --Size;
1379    ++Offset;
1380  }
1381}
1382
1383void DFSanVisitor::visitStoreInst(StoreInst &SI) {
1384  auto &DL = SI.getModule()->getDataLayout();
1385  uint64_t Size = DL.getTypeStoreSize(SI.getValueOperand()->getType());
1386  if (Size == 0)
1387    return;
1388
1389  uint64_t Align;
1390  if (ClPreserveAlignment) {
1391    Align = SI.getAlignment();
1392    if (Align == 0)
1393      Align = DL.getABITypeAlignment(SI.getValueOperand()->getType());
1394  } else {
1395    Align = 1;
1396  }
1397
1398  Value* Shadow = DFSF.getShadow(SI.getValueOperand());
1399  if (ClCombinePointerLabelsOnStore) {
1400    Value *PtrShadow = DFSF.getShadow(SI.getPointerOperand());
1401    Shadow = DFSF.combineShadows(Shadow, PtrShadow, &SI);
1402  }
1403  DFSF.storeShadow(SI.getPointerOperand(), Size, Align, Shadow, &SI);
1404}
1405
1406void DFSanVisitor::visitUnaryOperator(UnaryOperator &UO) {
1407  visitOperandShadowInst(UO);
1408}
1409
1410void DFSanVisitor::visitBinaryOperator(BinaryOperator &BO) {
1411  visitOperandShadowInst(BO);
1412}
1413
1414void DFSanVisitor::visitCastInst(CastInst &CI) { visitOperandShadowInst(CI); }
1415
1416void DFSanVisitor::visitCmpInst(CmpInst &CI) { visitOperandShadowInst(CI); }
1417
1418void DFSanVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
1419  visitOperandShadowInst(GEPI);
1420}
1421
1422void DFSanVisitor::visitExtractElementInst(ExtractElementInst &I) {
1423  visitOperandShadowInst(I);
1424}
1425
1426void DFSanVisitor::visitInsertElementInst(InsertElementInst &I) {
1427  visitOperandShadowInst(I);
1428}
1429
1430void DFSanVisitor::visitShuffleVectorInst(ShuffleVectorInst &I) {
1431  visitOperandShadowInst(I);
1432}
1433
1434void DFSanVisitor::visitExtractValueInst(ExtractValueInst &I) {
1435  visitOperandShadowInst(I);
1436}
1437
1438void DFSanVisitor::visitInsertValueInst(InsertValueInst &I) {
1439  visitOperandShadowInst(I);
1440}
1441
1442void DFSanVisitor::visitAllocaInst(AllocaInst &I) {
1443  bool AllLoadsStores = true;
1444  for (User *U : I.users()) {
1445    if (isa<LoadInst>(U))
1446      continue;
1447
1448    if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
1449      if (SI->getPointerOperand() == &I)
1450        continue;
1451    }
1452
1453    AllLoadsStores = false;
1454    break;
1455  }
1456  if (AllLoadsStores) {
1457    IRBuilder<> IRB(&I);
1458    DFSF.AllocaShadowMap[&I] = IRB.CreateAlloca(DFSF.DFS.ShadowTy);
1459  }
1460  DFSF.setShadow(&I, DFSF.DFS.ZeroShadow);
1461}
1462
1463void DFSanVisitor::visitSelectInst(SelectInst &I) {
1464  Value *CondShadow = DFSF.getShadow(I.getCondition());
1465  Value *TrueShadow = DFSF.getShadow(I.getTrueValue());
1466  Value *FalseShadow = DFSF.getShadow(I.getFalseValue());
1467
1468  if (isa<VectorType>(I.getCondition()->getType())) {
1469    DFSF.setShadow(
1470        &I,
1471        DFSF.combineShadows(
1472            CondShadow, DFSF.combineShadows(TrueShadow, FalseShadow, &I), &I));
1473  } else {
1474    Value *ShadowSel;
1475    if (TrueShadow == FalseShadow) {
1476      ShadowSel = TrueShadow;
1477    } else {
1478      ShadowSel =
1479          SelectInst::Create(I.getCondition(), TrueShadow, FalseShadow, "", &I);
1480    }
1481    DFSF.setShadow(&I, DFSF.combineShadows(CondShadow, ShadowSel, &I));
1482  }
1483}
1484
1485void DFSanVisitor::visitMemSetInst(MemSetInst &I) {
1486  IRBuilder<> IRB(&I);
1487  Value *ValShadow = DFSF.getShadow(I.getValue());
1488  IRB.CreateCall(DFSF.DFS.DFSanSetLabelFn,
1489                 {ValShadow, IRB.CreateBitCast(I.getDest(), Type::getInt8PtrTy(
1490                                                                *DFSF.DFS.Ctx)),
1491                  IRB.CreateZExtOrTrunc(I.getLength(), DFSF.DFS.IntptrTy)});
1492}
1493
1494void DFSanVisitor::visitMemTransferInst(MemTransferInst &I) {
1495  IRBuilder<> IRB(&I);
1496  Value *DestShadow = DFSF.DFS.getShadowAddress(I.getDest(), &I);
1497  Value *SrcShadow = DFSF.DFS.getShadowAddress(I.getSource(), &I);
1498  Value *LenShadow = IRB.CreateMul(
1499      I.getLength(),
1500      ConstantInt::get(I.getLength()->getType(), DFSF.DFS.ShadowWidth / 8));
1501  Type *Int8Ptr = Type::getInt8PtrTy(*DFSF.DFS.Ctx);
1502  DestShadow = IRB.CreateBitCast(DestShadow, Int8Ptr);
1503  SrcShadow = IRB.CreateBitCast(SrcShadow, Int8Ptr);
1504  auto *MTI = cast<MemTransferInst>(
1505      IRB.CreateCall(I.getFunctionType(), I.getCalledValue(),
1506                     {DestShadow, SrcShadow, LenShadow, I.getVolatileCst()}));
1507  if (ClPreserveAlignment) {
1508    MTI->setDestAlignment(I.getDestAlignment() * (DFSF.DFS.ShadowWidth / 8));
1509    MTI->setSourceAlignment(I.getSourceAlignment() * (DFSF.DFS.ShadowWidth / 8));
1510  } else {
1511    MTI->setDestAlignment(DFSF.DFS.ShadowWidth / 8);
1512    MTI->setSourceAlignment(DFSF.DFS.ShadowWidth / 8);
1513  }
1514}
1515
1516void DFSanVisitor::visitReturnInst(ReturnInst &RI) {
1517  if (!DFSF.IsNativeABI && RI.getReturnValue()) {
1518    switch (DFSF.IA) {
1519    case DataFlowSanitizer::IA_TLS: {
1520      Value *S = DFSF.getShadow(RI.getReturnValue());
1521      IRBuilder<> IRB(&RI);
1522      IRB.CreateStore(S, DFSF.getRetvalTLS());
1523      break;
1524    }
1525    case DataFlowSanitizer::IA_Args: {
1526      IRBuilder<> IRB(&RI);
1527      Type *RT = DFSF.F->getFunctionType()->getReturnType();
1528      Value *InsVal =
1529          IRB.CreateInsertValue(UndefValue::get(RT), RI.getReturnValue(), 0);
1530      Value *InsShadow =
1531          IRB.CreateInsertValue(InsVal, DFSF.getShadow(RI.getReturnValue()), 1);
1532      RI.setOperand(0, InsShadow);
1533      break;
1534    }
1535    }
1536  }
1537}
1538
1539void DFSanVisitor::visitCallSite(CallSite CS) {
1540  Function *F = CS.getCalledFunction();
1541  if ((F && F->isIntrinsic()) || isa<InlineAsm>(CS.getCalledValue())) {
1542    visitOperandShadowInst(*CS.getInstruction());
1543    return;
1544  }
1545
1546  // Calls to this function are synthesized in wrappers, and we shouldn't
1547  // instrument them.
1548  if (F == DFSF.DFS.DFSanVarargWrapperFn.getCallee()->stripPointerCasts())
1549    return;
1550
1551  IRBuilder<> IRB(CS.getInstruction());
1552
1553  DenseMap<Value *, Function *>::iterator i =
1554      DFSF.DFS.UnwrappedFnMap.find(CS.getCalledValue());
1555  if (i != DFSF.DFS.UnwrappedFnMap.end()) {
1556    Function *F = i->second;
1557    switch (DFSF.DFS.getWrapperKind(F)) {
1558    case DataFlowSanitizer::WK_Warning:
1559      CS.setCalledFunction(F);
1560      IRB.CreateCall(DFSF.DFS.DFSanUnimplementedFn,
1561                     IRB.CreateGlobalStringPtr(F->getName()));
1562      DFSF.setShadow(CS.getInstruction(), DFSF.DFS.ZeroShadow);
1563      return;
1564    case DataFlowSanitizer::WK_Discard:
1565      CS.setCalledFunction(F);
1566      DFSF.setShadow(CS.getInstruction(), DFSF.DFS.ZeroShadow);
1567      return;
1568    case DataFlowSanitizer::WK_Functional:
1569      CS.setCalledFunction(F);
1570      visitOperandShadowInst(*CS.getInstruction());
1571      return;
1572    case DataFlowSanitizer::WK_Custom:
1573      // Don't try to handle invokes of custom functions, it's too complicated.
1574      // Instead, invoke the dfsw$ wrapper, which will in turn call the __dfsw_
1575      // wrapper.
1576      if (CallInst *CI = dyn_cast<CallInst>(CS.getInstruction())) {
1577        FunctionType *FT = F->getFunctionType();
1578        TransformedFunction CustomFn = DFSF.DFS.getCustomFunctionType(FT);
1579        std::string CustomFName = "__dfsw_";
1580        CustomFName += F->getName();
1581        FunctionCallee CustomF = DFSF.DFS.Mod->getOrInsertFunction(
1582            CustomFName, CustomFn.TransformedType);
1583        if (Function *CustomFn = dyn_cast<Function>(CustomF.getCallee())) {
1584          CustomFn->copyAttributesFrom(F);
1585
1586          // Custom functions returning non-void will write to the return label.
1587          if (!FT->getReturnType()->isVoidTy()) {
1588            CustomFn->removeAttributes(AttributeList::FunctionIndex,
1589                                       DFSF.DFS.ReadOnlyNoneAttrs);
1590          }
1591        }
1592
1593        std::vector<Value *> Args;
1594
1595        CallSite::arg_iterator i = CS.arg_begin();
1596        for (unsigned n = FT->getNumParams(); n != 0; ++i, --n) {
1597          Type *T = (*i)->getType();
1598          FunctionType *ParamFT;
1599          if (isa<PointerType>(T) &&
1600              (ParamFT = dyn_cast<FunctionType>(
1601                   cast<PointerType>(T)->getElementType()))) {
1602            std::string TName = "dfst";
1603            TName += utostr(FT->getNumParams() - n);
1604            TName += "$";
1605            TName += F->getName();
1606            Constant *T = DFSF.DFS.getOrBuildTrampolineFunction(ParamFT, TName);
1607            Args.push_back(T);
1608            Args.push_back(
1609                IRB.CreateBitCast(*i, Type::getInt8PtrTy(*DFSF.DFS.Ctx)));
1610          } else {
1611            Args.push_back(*i);
1612          }
1613        }
1614
1615        i = CS.arg_begin();
1616        const unsigned ShadowArgStart = Args.size();
1617        for (unsigned n = FT->getNumParams(); n != 0; ++i, --n)
1618          Args.push_back(DFSF.getShadow(*i));
1619
1620        if (FT->isVarArg()) {
1621          auto *LabelVATy = ArrayType::get(DFSF.DFS.ShadowTy,
1622                                           CS.arg_size() - FT->getNumParams());
1623          auto *LabelVAAlloca = new AllocaInst(
1624              LabelVATy, getDataLayout().getAllocaAddrSpace(),
1625              "labelva", &DFSF.F->getEntryBlock().front());
1626
1627          for (unsigned n = 0; i != CS.arg_end(); ++i, ++n) {
1628            auto LabelVAPtr = IRB.CreateStructGEP(LabelVATy, LabelVAAlloca, n);
1629            IRB.CreateStore(DFSF.getShadow(*i), LabelVAPtr);
1630          }
1631
1632          Args.push_back(IRB.CreateStructGEP(LabelVATy, LabelVAAlloca, 0));
1633        }
1634
1635        if (!FT->getReturnType()->isVoidTy()) {
1636          if (!DFSF.LabelReturnAlloca) {
1637            DFSF.LabelReturnAlloca =
1638              new AllocaInst(DFSF.DFS.ShadowTy,
1639                             getDataLayout().getAllocaAddrSpace(),
1640                             "labelreturn", &DFSF.F->getEntryBlock().front());
1641          }
1642          Args.push_back(DFSF.LabelReturnAlloca);
1643        }
1644
1645        for (i = CS.arg_begin() + FT->getNumParams(); i != CS.arg_end(); ++i)
1646          Args.push_back(*i);
1647
1648        CallInst *CustomCI = IRB.CreateCall(CustomF, Args);
1649        CustomCI->setCallingConv(CI->getCallingConv());
1650        CustomCI->setAttributes(TransformFunctionAttributes(CustomFn,
1651            CI->getContext(), CI->getAttributes()));
1652
1653        // Update the parameter attributes of the custom call instruction to
1654        // zero extend the shadow parameters. This is required for targets
1655        // which consider ShadowTy an illegal type.
1656        for (unsigned n = 0; n < FT->getNumParams(); n++) {
1657          const unsigned ArgNo = ShadowArgStart + n;
1658          if (CustomCI->getArgOperand(ArgNo)->getType() == DFSF.DFS.ShadowTy)
1659            CustomCI->addParamAttr(ArgNo, Attribute::ZExt);
1660        }
1661
1662        if (!FT->getReturnType()->isVoidTy()) {
1663          LoadInst *LabelLoad =
1664              IRB.CreateLoad(DFSF.DFS.ShadowTy, DFSF.LabelReturnAlloca);
1665          DFSF.setShadow(CustomCI, LabelLoad);
1666        }
1667
1668        CI->replaceAllUsesWith(CustomCI);
1669        CI->eraseFromParent();
1670        return;
1671      }
1672      break;
1673    }
1674  }
1675
1676  FunctionType *FT = cast<FunctionType>(
1677      CS.getCalledValue()->getType()->getPointerElementType());
1678  if (DFSF.DFS.getInstrumentedABI() == DataFlowSanitizer::IA_TLS) {
1679    for (unsigned i = 0, n = FT->getNumParams(); i != n; ++i) {
1680      IRB.CreateStore(DFSF.getShadow(CS.getArgument(i)),
1681                      DFSF.getArgTLS(i, CS.getInstruction()));
1682    }
1683  }
1684
1685  Instruction *Next = nullptr;
1686  if (!CS.getType()->isVoidTy()) {
1687    if (InvokeInst *II = dyn_cast<InvokeInst>(CS.getInstruction())) {
1688      if (II->getNormalDest()->getSinglePredecessor()) {
1689        Next = &II->getNormalDest()->front();
1690      } else {
1691        BasicBlock *NewBB =
1692            SplitEdge(II->getParent(), II->getNormalDest(), &DFSF.DT);
1693        Next = &NewBB->front();
1694      }
1695    } else {
1696      assert(CS->getIterator() != CS->getParent()->end());
1697      Next = CS->getNextNode();
1698    }
1699
1700    if (DFSF.DFS.getInstrumentedABI() == DataFlowSanitizer::IA_TLS) {
1701      IRBuilder<> NextIRB(Next);
1702      LoadInst *LI = NextIRB.CreateLoad(DFSF.DFS.ShadowTy, DFSF.getRetvalTLS());
1703      DFSF.SkipInsts.insert(LI);
1704      DFSF.setShadow(CS.getInstruction(), LI);
1705      DFSF.NonZeroChecks.push_back(LI);
1706    }
1707  }
1708
1709  // Do all instrumentation for IA_Args down here to defer tampering with the
1710  // CFG in a way that SplitEdge may be able to detect.
1711  if (DFSF.DFS.getInstrumentedABI() == DataFlowSanitizer::IA_Args) {
1712    FunctionType *NewFT = DFSF.DFS.getArgsFunctionType(FT);
1713    Value *Func =
1714        IRB.CreateBitCast(CS.getCalledValue(), PointerType::getUnqual(NewFT));
1715    std::vector<Value *> Args;
1716
1717    CallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
1718    for (unsigned n = FT->getNumParams(); n != 0; ++i, --n)
1719      Args.push_back(*i);
1720
1721    i = CS.arg_begin();
1722    for (unsigned n = FT->getNumParams(); n != 0; ++i, --n)
1723      Args.push_back(DFSF.getShadow(*i));
1724
1725    if (FT->isVarArg()) {
1726      unsigned VarArgSize = CS.arg_size() - FT->getNumParams();
1727      ArrayType *VarArgArrayTy = ArrayType::get(DFSF.DFS.ShadowTy, VarArgSize);
1728      AllocaInst *VarArgShadow =
1729        new AllocaInst(VarArgArrayTy, getDataLayout().getAllocaAddrSpace(),
1730                       "", &DFSF.F->getEntryBlock().front());
1731      Args.push_back(IRB.CreateConstGEP2_32(VarArgArrayTy, VarArgShadow, 0, 0));
1732      for (unsigned n = 0; i != e; ++i, ++n) {
1733        IRB.CreateStore(
1734            DFSF.getShadow(*i),
1735            IRB.CreateConstGEP2_32(VarArgArrayTy, VarArgShadow, 0, n));
1736        Args.push_back(*i);
1737      }
1738    }
1739
1740    CallSite NewCS;
1741    if (InvokeInst *II = dyn_cast<InvokeInst>(CS.getInstruction())) {
1742      NewCS = IRB.CreateInvoke(NewFT, Func, II->getNormalDest(),
1743                               II->getUnwindDest(), Args);
1744    } else {
1745      NewCS = IRB.CreateCall(NewFT, Func, Args);
1746    }
1747    NewCS.setCallingConv(CS.getCallingConv());
1748    NewCS.setAttributes(CS.getAttributes().removeAttributes(
1749        *DFSF.DFS.Ctx, AttributeList::ReturnIndex,
1750        AttributeFuncs::typeIncompatible(NewCS.getInstruction()->getType())));
1751
1752    if (Next) {
1753      ExtractValueInst *ExVal =
1754          ExtractValueInst::Create(NewCS.getInstruction(), 0, "", Next);
1755      DFSF.SkipInsts.insert(ExVal);
1756      ExtractValueInst *ExShadow =
1757          ExtractValueInst::Create(NewCS.getInstruction(), 1, "", Next);
1758      DFSF.SkipInsts.insert(ExShadow);
1759      DFSF.setShadow(ExVal, ExShadow);
1760      DFSF.NonZeroChecks.push_back(ExShadow);
1761
1762      CS.getInstruction()->replaceAllUsesWith(ExVal);
1763    }
1764
1765    CS.getInstruction()->eraseFromParent();
1766  }
1767}
1768
1769void DFSanVisitor::visitPHINode(PHINode &PN) {
1770  PHINode *ShadowPN =
1771      PHINode::Create(DFSF.DFS.ShadowTy, PN.getNumIncomingValues(), "", &PN);
1772
1773  // Give the shadow phi node valid predecessors to fool SplitEdge into working.
1774  Value *UndefShadow = UndefValue::get(DFSF.DFS.ShadowTy);
1775  for (PHINode::block_iterator i = PN.block_begin(), e = PN.block_end(); i != e;
1776       ++i) {
1777    ShadowPN->addIncoming(UndefShadow, *i);
1778  }
1779
1780  DFSF.PHIFixups.push_back(std::make_pair(&PN, ShadowPN));
1781  DFSF.setShadow(&PN, ShadowPN);
1782}
1783