PPCQPXLoadSplat.cpp revision 360784
1//===----- PPCQPXLoadSplat.cpp - QPX Load Splat Simplification ------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// The QPX vector registers overlay the scalar floating-point registers, and
10// any scalar floating-point loads splat their value across all vector lanes.
11// Thus, if we have a scalar load followed by a splat, we can remove the splat
12// (i.e. replace the load with a load-and-splat pseudo instruction).
13//
14// This pass must run after anything that might do store-to-load forwarding.
15//
16//===----------------------------------------------------------------------===//
17
18#include "PPC.h"
19#include "PPCInstrBuilder.h"
20#include "PPCInstrInfo.h"
21#include "llvm/ADT/SmallVector.h"
22#include "llvm/ADT/Statistic.h"
23#include "llvm/CodeGen/MachineFunctionPass.h"
24#include "llvm/CodeGen/TargetSubtargetInfo.h"
25#include "llvm/Support/MathExtras.h"
26#include "llvm/Target/TargetMachine.h"
27using namespace llvm;
28
29#define DEBUG_TYPE "ppc-qpx-load-splat"
30
31STATISTIC(NumSimplified, "Number of QPX load splats simplified");
32
33namespace {
34  struct PPCQPXLoadSplat : public MachineFunctionPass {
35    static char ID;
36    PPCQPXLoadSplat() : MachineFunctionPass(ID) {
37      initializePPCQPXLoadSplatPass(*PassRegistry::getPassRegistry());
38    }
39
40    bool runOnMachineFunction(MachineFunction &Fn) override;
41
42    StringRef getPassName() const override {
43      return "PowerPC QPX Load Splat Simplification";
44    }
45  };
46  char PPCQPXLoadSplat::ID = 0;
47}
48
49INITIALIZE_PASS(PPCQPXLoadSplat, "ppc-qpx-load-splat",
50                "PowerPC QPX Load Splat Simplification",
51                false, false)
52
53FunctionPass *llvm::createPPCQPXLoadSplatPass() {
54  return new PPCQPXLoadSplat();
55}
56
57bool PPCQPXLoadSplat::runOnMachineFunction(MachineFunction &MF) {
58  if (skipFunction(MF.getFunction()))
59    return false;
60
61  bool MadeChange = false;
62  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
63
64  for (auto MFI = MF.begin(), MFIE = MF.end(); MFI != MFIE; ++MFI) {
65    MachineBasicBlock *MBB = &*MFI;
66    SmallVector<MachineInstr *, 4> Splats;
67
68    for (auto MBBI = MBB->rbegin(); MBBI != MBB->rend(); ++MBBI) {
69      MachineInstr *MI = &*MBBI;
70
71      if (MI->hasUnmodeledSideEffects() || MI->isCall()) {
72        Splats.clear();
73        continue;
74      }
75
76      // We're looking for a sequence like this:
77      // %f0 = LFD 0, killed %x3, implicit-def %qf0; mem:LD8[%a](tbaa=!2)
78      // %qf1 = QVESPLATI killed %qf0, 0, implicit %rm
79
80      for (auto SI = Splats.begin(); SI != Splats.end();) {
81        MachineInstr *SMI = *SI;
82        Register SplatReg = SMI->getOperand(0).getReg();
83        Register SrcReg = SMI->getOperand(1).getReg();
84
85        if (MI->modifiesRegister(SrcReg, TRI)) {
86          switch (MI->getOpcode()) {
87          default:
88            SI = Splats.erase(SI);
89            continue;
90          case PPC::LFS:
91          case PPC::LFD:
92          case PPC::LFSU:
93          case PPC::LFDU:
94          case PPC::LFSUX:
95          case PPC::LFDUX:
96          case PPC::LFSX:
97          case PPC::LFDX:
98          case PPC::LFIWAX:
99          case PPC::LFIWZX:
100            if (SplatReg != SrcReg) {
101              // We need to change the load to define the scalar subregister of
102              // the QPX splat source register.
103              unsigned SubRegIndex =
104                TRI->getSubRegIndex(SrcReg, MI->getOperand(0).getReg());
105              Register SplatSubReg = TRI->getSubReg(SplatReg, SubRegIndex);
106
107              // Substitute both the explicit defined register, and also the
108              // implicit def of the containing QPX register.
109              MI->getOperand(0).setReg(SplatSubReg);
110              MI->substituteRegister(SrcReg, SplatReg, 0, *TRI);
111            }
112
113            SI = Splats.erase(SI);
114
115            // If SMI is directly after MI, then MBBI's base iterator is
116            // pointing at SMI.  Adjust MBBI around the call to erase SMI to
117            // avoid invalidating MBBI.
118            ++MBBI;
119            SMI->eraseFromParent();
120            --MBBI;
121
122            ++NumSimplified;
123            MadeChange = true;
124            continue;
125          }
126        }
127
128        // If this instruction defines the splat register, then we cannot move
129        // the previous definition above it. If it reads from the splat
130        // register, then it must already be alive from some previous
131        // definition, and if the splat register is different from the source
132        // register, then this definition must not be the load for which we're
133        // searching.
134        if (MI->modifiesRegister(SplatReg, TRI) ||
135            (SrcReg != SplatReg &&
136             MI->readsRegister(SplatReg, TRI))) {
137          SI = Splats.erase(SI);
138          continue;
139        }
140
141        ++SI;
142      }
143
144      if (MI->getOpcode() != PPC::QVESPLATI &&
145          MI->getOpcode() != PPC::QVESPLATIs &&
146          MI->getOpcode() != PPC::QVESPLATIb)
147        continue;
148      if (MI->getOperand(2).getImm() != 0)
149        continue;
150
151      // If there are other uses of the scalar value after this, replacing
152      // those uses might be non-trivial.
153      if (!MI->getOperand(1).isKill())
154        continue;
155
156      Splats.push_back(MI);
157    }
158  }
159
160  return MadeChange;
161}
162