1//===-- AMDGPUPromoteKernelArguments.cpp ----------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This pass recursively promotes generic pointer arguments of a kernel
10/// into the global address space.
11///
12/// The pass walks kernel's pointer arguments, then loads from them. If a loaded
13/// value is a pointer and loaded pointer is unmodified in the kernel before the
14/// load, then promote loaded pointer to global. Then recursively continue.
15//
16//===----------------------------------------------------------------------===//
17
18#include "AMDGPU.h"
19#include "Utils/AMDGPUMemoryUtils.h"
20#include "llvm/ADT/SmallVector.h"
21#include "llvm/Analysis/AliasAnalysis.h"
22#include "llvm/Analysis/MemorySSA.h"
23#include "llvm/IR/IRBuilder.h"
24#include "llvm/InitializePasses.h"
25
26#define DEBUG_TYPE "amdgpu-promote-kernel-arguments"
27
28using namespace llvm;
29
30namespace {
31
32class AMDGPUPromoteKernelArguments : public FunctionPass {
33  MemorySSA *MSSA;
34
35  AliasAnalysis *AA;
36
37  Instruction *ArgCastInsertPt;
38
39  SmallVector<Value *> Ptrs;
40
41  void enqueueUsers(Value *Ptr);
42
43  bool promotePointer(Value *Ptr);
44
45  bool promoteLoad(LoadInst *LI);
46
47public:
48  static char ID;
49
50  AMDGPUPromoteKernelArguments() : FunctionPass(ID) {}
51
52  bool run(Function &F, MemorySSA &MSSA, AliasAnalysis &AA);
53
54  bool runOnFunction(Function &F) override;
55
56  void getAnalysisUsage(AnalysisUsage &AU) const override {
57    AU.addRequired<AAResultsWrapperPass>();
58    AU.addRequired<MemorySSAWrapperPass>();
59    AU.setPreservesAll();
60  }
61};
62
63} // end anonymous namespace
64
65void AMDGPUPromoteKernelArguments::enqueueUsers(Value *Ptr) {
66  SmallVector<User *> PtrUsers(Ptr->users());
67
68  while (!PtrUsers.empty()) {
69    Instruction *U = dyn_cast<Instruction>(PtrUsers.pop_back_val());
70    if (!U)
71      continue;
72
73    switch (U->getOpcode()) {
74    default:
75      break;
76    case Instruction::Load: {
77      LoadInst *LD = cast<LoadInst>(U);
78      if (LD->getPointerOperand()->stripInBoundsOffsets() == Ptr &&
79          !AMDGPU::isClobberedInFunction(LD, MSSA, AA))
80        Ptrs.push_back(LD);
81
82      break;
83    }
84    case Instruction::GetElementPtr:
85    case Instruction::AddrSpaceCast:
86    case Instruction::BitCast:
87      if (U->getOperand(0)->stripInBoundsOffsets() == Ptr)
88        PtrUsers.append(U->user_begin(), U->user_end());
89      break;
90    }
91  }
92}
93
94bool AMDGPUPromoteKernelArguments::promotePointer(Value *Ptr) {
95  bool Changed = false;
96
97  LoadInst *LI = dyn_cast<LoadInst>(Ptr);
98  if (LI)
99    Changed |= promoteLoad(LI);
100
101  PointerType *PT = dyn_cast<PointerType>(Ptr->getType());
102  if (!PT)
103    return Changed;
104
105  if (PT->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS ||
106      PT->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS ||
107      PT->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS)
108    enqueueUsers(Ptr);
109
110  if (PT->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS)
111    return Changed;
112
113  IRBuilder<> B(LI ? &*std::next(cast<Instruction>(Ptr)->getIterator())
114                   : ArgCastInsertPt);
115
116  // Cast pointer to global address space and back to flat and let
117  // Infer Address Spaces pass to do all necessary rewriting.
118  PointerType *NewPT =
119      PointerType::get(PT->getContext(), AMDGPUAS::GLOBAL_ADDRESS);
120  Value *Cast =
121      B.CreateAddrSpaceCast(Ptr, NewPT, Twine(Ptr->getName(), ".global"));
122  Value *CastBack =
123      B.CreateAddrSpaceCast(Cast, PT, Twine(Ptr->getName(), ".flat"));
124  Ptr->replaceUsesWithIf(CastBack,
125                         [Cast](Use &U) { return U.getUser() != Cast; });
126
127  return true;
128}
129
130bool AMDGPUPromoteKernelArguments::promoteLoad(LoadInst *LI) {
131  if (!LI->isSimple())
132    return false;
133
134  LI->setMetadata("amdgpu.noclobber", MDNode::get(LI->getContext(), {}));
135  return true;
136}
137
138// skip allocas
139static BasicBlock::iterator getInsertPt(BasicBlock &BB) {
140  BasicBlock::iterator InsPt = BB.getFirstInsertionPt();
141  for (BasicBlock::iterator E = BB.end(); InsPt != E; ++InsPt) {
142    AllocaInst *AI = dyn_cast<AllocaInst>(&*InsPt);
143
144    // If this is a dynamic alloca, the value may depend on the loaded kernargs,
145    // so loads will need to be inserted before it.
146    if (!AI || !AI->isStaticAlloca())
147      break;
148  }
149
150  return InsPt;
151}
152
153bool AMDGPUPromoteKernelArguments::run(Function &F, MemorySSA &MSSA,
154                                       AliasAnalysis &AA) {
155  if (skipFunction(F))
156    return false;
157
158  CallingConv::ID CC = F.getCallingConv();
159  if (CC != CallingConv::AMDGPU_KERNEL || F.arg_empty())
160    return false;
161
162  ArgCastInsertPt = &*getInsertPt(*F.begin());
163  this->MSSA = &MSSA;
164  this->AA = &AA;
165
166  for (Argument &Arg : F.args()) {
167    if (Arg.use_empty())
168      continue;
169
170    PointerType *PT = dyn_cast<PointerType>(Arg.getType());
171    if (!PT || (PT->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS &&
172                PT->getAddressSpace() != AMDGPUAS::GLOBAL_ADDRESS &&
173                PT->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS))
174      continue;
175
176    Ptrs.push_back(&Arg);
177  }
178
179  bool Changed = false;
180  while (!Ptrs.empty()) {
181    Value *Ptr = Ptrs.pop_back_val();
182    Changed |= promotePointer(Ptr);
183  }
184
185  return Changed;
186}
187
188bool AMDGPUPromoteKernelArguments::runOnFunction(Function &F) {
189  MemorySSA &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA();
190  AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
191  return run(F, MSSA, AA);
192}
193
194INITIALIZE_PASS_BEGIN(AMDGPUPromoteKernelArguments, DEBUG_TYPE,
195                      "AMDGPU Promote Kernel Arguments", false, false)
196INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
197INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
198INITIALIZE_PASS_END(AMDGPUPromoteKernelArguments, DEBUG_TYPE,
199                    "AMDGPU Promote Kernel Arguments", false, false)
200
201char AMDGPUPromoteKernelArguments::ID = 0;
202
203FunctionPass *llvm::createAMDGPUPromoteKernelArgumentsPass() {
204  return new AMDGPUPromoteKernelArguments();
205}
206
207PreservedAnalyses
208AMDGPUPromoteKernelArgumentsPass::run(Function &F,
209                                      FunctionAnalysisManager &AM) {
210  MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();
211  AliasAnalysis &AA = AM.getResult<AAManager>(F);
212  if (AMDGPUPromoteKernelArguments().run(F, MSSA, AA)) {
213    PreservedAnalyses PA;
214    PA.preserveSet<CFGAnalyses>();
215    PA.preserve<MemorySSAAnalysis>();
216    return PA;
217  }
218  return PreservedAnalyses::all();
219}
220