NVPTXLowerAggrCopies.cpp revision 360784
1//===- NVPTXLowerAggrCopies.cpp - ------------------------------*- C++ -*--===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// \file
10// Lower aggregate copies, memset, memcpy, memmov intrinsics into loops when
11// the size is large or is not a compile-time constant.
12//
13//===----------------------------------------------------------------------===//
14
15#include "NVPTXLowerAggrCopies.h"
16#include "llvm/Analysis/TargetTransformInfo.h"
17#include "llvm/CodeGen/StackProtector.h"
18#include "llvm/IR/Constants.h"
19#include "llvm/IR/DataLayout.h"
20#include "llvm/IR/Function.h"
21#include "llvm/IR/IRBuilder.h"
22#include "llvm/IR/Instructions.h"
23#include "llvm/IR/IntrinsicInst.h"
24#include "llvm/IR/Intrinsics.h"
25#include "llvm/IR/LLVMContext.h"
26#include "llvm/IR/Module.h"
27#include "llvm/Support/Debug.h"
28#include "llvm/Transforms/Utils/BasicBlockUtils.h"
29#include "llvm/Transforms/Utils/LowerMemIntrinsics.h"
30
31#define DEBUG_TYPE "nvptx"
32
33using namespace llvm;
34
35namespace {
36
37// actual analysis class, which is a functionpass
38struct NVPTXLowerAggrCopies : public FunctionPass {
39  static char ID;
40
41  NVPTXLowerAggrCopies() : FunctionPass(ID) {}
42
43  void getAnalysisUsage(AnalysisUsage &AU) const override {
44    AU.addPreserved<StackProtector>();
45    AU.addRequired<TargetTransformInfoWrapperPass>();
46  }
47
48  bool runOnFunction(Function &F) override;
49
50  static const unsigned MaxAggrCopySize = 128;
51
52  StringRef getPassName() const override {
53    return "Lower aggregate copies/intrinsics into loops";
54  }
55};
56
57char NVPTXLowerAggrCopies::ID = 0;
58
59bool NVPTXLowerAggrCopies::runOnFunction(Function &F) {
60  SmallVector<LoadInst *, 4> AggrLoads;
61  SmallVector<MemIntrinsic *, 4> MemCalls;
62
63  const DataLayout &DL = F.getParent()->getDataLayout();
64  LLVMContext &Context = F.getParent()->getContext();
65  const TargetTransformInfo &TTI =
66      getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
67
68  // Collect all aggregate loads and mem* calls.
69  for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
70    for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE;
71         ++II) {
72      if (LoadInst *LI = dyn_cast<LoadInst>(II)) {
73        if (!LI->hasOneUse())
74          continue;
75
76        if (DL.getTypeStoreSize(LI->getType()) < MaxAggrCopySize)
77          continue;
78
79        if (StoreInst *SI = dyn_cast<StoreInst>(LI->user_back())) {
80          if (SI->getOperand(0) != LI)
81            continue;
82          AggrLoads.push_back(LI);
83        }
84      } else if (MemIntrinsic *IntrCall = dyn_cast<MemIntrinsic>(II)) {
85        // Convert intrinsic calls with variable size or with constant size
86        // larger than the MaxAggrCopySize threshold.
87        if (ConstantInt *LenCI = dyn_cast<ConstantInt>(IntrCall->getLength())) {
88          if (LenCI->getZExtValue() >= MaxAggrCopySize) {
89            MemCalls.push_back(IntrCall);
90          }
91        } else {
92          MemCalls.push_back(IntrCall);
93        }
94      }
95    }
96  }
97
98  if (AggrLoads.size() == 0 && MemCalls.size() == 0) {
99    return false;
100  }
101
102  //
103  // Do the transformation of an aggr load/copy/set to a loop
104  //
105  for (LoadInst *LI : AggrLoads) {
106    auto *SI = cast<StoreInst>(*LI->user_begin());
107    Value *SrcAddr = LI->getOperand(0);
108    Value *DstAddr = SI->getOperand(1);
109    unsigned NumLoads = DL.getTypeStoreSize(LI->getType());
110    ConstantInt *CopyLen =
111        ConstantInt::get(Type::getInt32Ty(Context), NumLoads);
112
113    createMemCpyLoopKnownSize(/* ConvertedInst */ SI,
114                              /* SrcAddr */ SrcAddr, /* DstAddr */ DstAddr,
115                              /* CopyLen */ CopyLen,
116                              /* SrcAlign */ LI->getAlignment(),
117                              /* DestAlign */ SI->getAlignment(),
118                              /* SrcIsVolatile */ LI->isVolatile(),
119                              /* DstIsVolatile */ SI->isVolatile(), TTI);
120
121    SI->eraseFromParent();
122    LI->eraseFromParent();
123  }
124
125  // Transform mem* intrinsic calls.
126  for (MemIntrinsic *MemCall : MemCalls) {
127    if (MemCpyInst *Memcpy = dyn_cast<MemCpyInst>(MemCall)) {
128      expandMemCpyAsLoop(Memcpy, TTI);
129    } else if (MemMoveInst *Memmove = dyn_cast<MemMoveInst>(MemCall)) {
130      expandMemMoveAsLoop(Memmove);
131    } else if (MemSetInst *Memset = dyn_cast<MemSetInst>(MemCall)) {
132      expandMemSetAsLoop(Memset);
133    }
134    MemCall->eraseFromParent();
135  }
136
137  return true;
138}
139
140} // namespace
141
142namespace llvm {
143void initializeNVPTXLowerAggrCopiesPass(PassRegistry &);
144}
145
146INITIALIZE_PASS(NVPTXLowerAggrCopies, "nvptx-lower-aggr-copies",
147                "Lower aggregate copies, and llvm.mem* intrinsics into loops",
148                false, false)
149
150FunctionPass *llvm::createLowerAggrCopies() {
151  return new NVPTXLowerAggrCopies();
152}
153