NVPTXPeephole.cpp revision 360784
1//===-- NVPTXPeephole.cpp - NVPTX Peephole Optimiztions -------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// In NVPTX, NVPTXFrameLowering will emit following instruction at the beginning
10// of a MachineFunction.
11//
12//   mov %SPL, %depot
13//   cvta.local %SP, %SPL
14//
15// Because Frame Index is a generic address and alloca can only return generic
16// pointer, without this pass the instructions producing alloca'ed address will
17// be based on %SP. NVPTXLowerAlloca tends to help replace store and load on
18// this address with their .local versions, but this may introduce a lot of
19// cvta.to.local instructions. Performance can be improved if we avoid casting
20// address back and forth and directly calculate local address based on %SPL.
21// This peephole pass optimizes these cases, for example
22//
23// It will transform the following pattern
24//    %0 = LEA_ADDRi64 %VRFrame, 4
25//    %1 = cvta_to_local_yes_64 %0
26//
27// into
28//    %1 = LEA_ADDRi64 %VRFrameLocal, 4
29//
30// %VRFrameLocal is the virtual register name of %SPL
31//
32//===----------------------------------------------------------------------===//
33
34#include "NVPTX.h"
35#include "llvm/CodeGen/MachineFunctionPass.h"
36#include "llvm/CodeGen/MachineInstrBuilder.h"
37#include "llvm/CodeGen/MachineRegisterInfo.h"
38#include "llvm/CodeGen/TargetInstrInfo.h"
39#include "llvm/CodeGen/TargetRegisterInfo.h"
40
41using namespace llvm;
42
43#define DEBUG_TYPE "nvptx-peephole"
44
45namespace llvm {
46void initializeNVPTXPeepholePass(PassRegistry &);
47}
48
49namespace {
50struct NVPTXPeephole : public MachineFunctionPass {
51 public:
52  static char ID;
53  NVPTXPeephole() : MachineFunctionPass(ID) {
54    initializeNVPTXPeepholePass(*PassRegistry::getPassRegistry());
55  }
56
57  bool runOnMachineFunction(MachineFunction &MF) override;
58
59  StringRef getPassName() const override {
60    return "NVPTX optimize redundant cvta.to.local instruction";
61  }
62
63  void getAnalysisUsage(AnalysisUsage &AU) const override {
64    MachineFunctionPass::getAnalysisUsage(AU);
65  }
66};
67}
68
69char NVPTXPeephole::ID = 0;
70
71INITIALIZE_PASS(NVPTXPeephole, "nvptx-peephole", "NVPTX Peephole", false, false)
72
73static bool isCVTAToLocalCombinationCandidate(MachineInstr &Root) {
74  auto &MBB = *Root.getParent();
75  auto &MF = *MBB.getParent();
76  // Check current instruction is cvta.to.local
77  if (Root.getOpcode() != NVPTX::cvta_to_local_yes_64 &&
78      Root.getOpcode() != NVPTX::cvta_to_local_yes)
79    return false;
80
81  auto &Op = Root.getOperand(1);
82  const auto &MRI = MF.getRegInfo();
83  MachineInstr *GenericAddrDef = nullptr;
84  if (Op.isReg() && Register::isVirtualRegister(Op.getReg())) {
85    GenericAddrDef = MRI.getUniqueVRegDef(Op.getReg());
86  }
87
88  // Check the register operand is uniquely defined by LEA_ADDRi instruction
89  if (!GenericAddrDef || GenericAddrDef->getParent() != &MBB ||
90      (GenericAddrDef->getOpcode() != NVPTX::LEA_ADDRi64 &&
91       GenericAddrDef->getOpcode() != NVPTX::LEA_ADDRi)) {
92    return false;
93  }
94
95  // Check the LEA_ADDRi operand is Frame index
96  auto &BaseAddrOp = GenericAddrDef->getOperand(1);
97  if (BaseAddrOp.isReg() && BaseAddrOp.getReg() == NVPTX::VRFrame) {
98    return true;
99  }
100
101  return false;
102}
103
104static void CombineCVTAToLocal(MachineInstr &Root) {
105  auto &MBB = *Root.getParent();
106  auto &MF = *MBB.getParent();
107  const auto &MRI = MF.getRegInfo();
108  const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
109  auto &Prev = *MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
110
111  MachineInstrBuilder MIB =
112      BuildMI(MF, Root.getDebugLoc(), TII->get(Prev.getOpcode()),
113              Root.getOperand(0).getReg())
114          .addReg(NVPTX::VRFrameLocal)
115          .add(Prev.getOperand(2));
116
117  MBB.insert((MachineBasicBlock::iterator)&Root, MIB);
118
119  // Check if MRI has only one non dbg use, which is Root
120  if (MRI.hasOneNonDBGUse(Prev.getOperand(0).getReg())) {
121    Prev.eraseFromParentAndMarkDBGValuesForRemoval();
122  }
123  Root.eraseFromParentAndMarkDBGValuesForRemoval();
124}
125
126bool NVPTXPeephole::runOnMachineFunction(MachineFunction &MF) {
127  if (skipFunction(MF.getFunction()))
128    return false;
129
130  bool Changed = false;
131  // Loop over all of the basic blocks.
132  for (auto &MBB : MF) {
133    // Traverse the basic block.
134    auto BlockIter = MBB.begin();
135
136    while (BlockIter != MBB.end()) {
137      auto &MI = *BlockIter++;
138      if (isCVTAToLocalCombinationCandidate(MI)) {
139        CombineCVTAToLocal(MI);
140        Changed = true;
141      }
142    }  // Instruction
143  }    // Basic Block
144
145  // Remove unnecessary %VRFrame = cvta.local %VRFrameLocal
146  const auto &MRI = MF.getRegInfo();
147  if (MRI.use_empty(NVPTX::VRFrame)) {
148    if (auto MI = MRI.getUniqueVRegDef(NVPTX::VRFrame)) {
149      MI->eraseFromParentAndMarkDBGValuesForRemoval();
150    }
151  }
152
153  return Changed;
154}
155
156MachineFunctionPass *llvm::createNVPTXPeephole() { return new NVPTXPeephole(); }
157