1//=======- GCNDPPCombine.cpp - optimization for DPP instructions ---==========//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8// The pass combines V_MOV_B32_dpp instruction with its VALU uses as a DPP src0
9// operand. If any of the use instruction cannot be combined with the mov the
10// whole sequence is reverted.
11//
12// $old = ...
13// $dpp_value = V_MOV_B32_dpp $old, $vgpr_to_be_read_from_other_lane,
14//                            dpp_controls..., $row_mask, $bank_mask, $bound_ctrl
15// $res = VALU $dpp_value [, src1]
16//
17// to
18//
19// $res = VALU_DPP $combined_old, $vgpr_to_be_read_from_other_lane, [src1,]
20//                 dpp_controls..., $row_mask, $bank_mask, $combined_bound_ctrl
21//
22// Combining rules :
23//
24// if $row_mask and $bank_mask are fully enabled (0xF) and
25//    $bound_ctrl==DPP_BOUND_ZERO or $old==0
26// -> $combined_old = undef,
27//    $combined_bound_ctrl = DPP_BOUND_ZERO
28//
29// if the VALU op is binary and
30//    $bound_ctrl==DPP_BOUND_OFF and
31//    $old==identity value (immediate) for the VALU op
32// -> $combined_old = src1,
33//    $combined_bound_ctrl = DPP_BOUND_OFF
34//
35// Otherwise cancel.
36//
37// The mov_dpp instruction should reside in the same BB as all its uses
38//===----------------------------------------------------------------------===//
39
40#include "AMDGPU.h"
41#include "AMDGPUSubtarget.h"
42#include "SIInstrInfo.h"
43#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
44#include "llvm/ADT/DenseMap.h"
45#include "llvm/ADT/SmallVector.h"
46#include "llvm/ADT/Statistic.h"
47#include "llvm/CodeGen/MachineBasicBlock.h"
48#include "llvm/CodeGen/MachineFunction.h"
49#include "llvm/CodeGen/MachineFunctionPass.h"
50#include "llvm/CodeGen/MachineInstr.h"
51#include "llvm/CodeGen/MachineInstrBuilder.h"
52#include "llvm/CodeGen/MachineOperand.h"
53#include "llvm/CodeGen/MachineRegisterInfo.h"
54#include "llvm/CodeGen/TargetRegisterInfo.h"
55#include "llvm/Pass.h"
56#include <cassert>
57
58using namespace llvm;
59
60#define DEBUG_TYPE "gcn-dpp-combine"
61
62STATISTIC(NumDPPMovsCombined, "Number of DPP moves combined.");
63
64namespace {
65
66class GCNDPPCombine : public MachineFunctionPass {
67  MachineRegisterInfo *MRI;
68  const SIInstrInfo *TII;
69
70  using RegSubRegPair = TargetInstrInfo::RegSubRegPair;
71
72  MachineOperand *getOldOpndValue(MachineOperand &OldOpnd) const;
73
74  MachineInstr *createDPPInst(MachineInstr &OrigMI,
75                              MachineInstr &MovMI,
76                              RegSubRegPair CombOldVGPR,
77                              MachineOperand *OldOpnd,
78                              bool CombBCZ) const;
79
80  MachineInstr *createDPPInst(MachineInstr &OrigMI,
81                              MachineInstr &MovMI,
82                              RegSubRegPair CombOldVGPR,
83                              bool CombBCZ) const;
84
85  bool hasNoImmOrEqual(MachineInstr &MI,
86                       unsigned OpndName,
87                       int64_t Value,
88                       int64_t Mask = -1) const;
89
90  bool combineDPPMov(MachineInstr &MI) const;
91
92public:
93  static char ID;
94
95  GCNDPPCombine() : MachineFunctionPass(ID) {
96    initializeGCNDPPCombinePass(*PassRegistry::getPassRegistry());
97  }
98
99  bool runOnMachineFunction(MachineFunction &MF) override;
100
101  StringRef getPassName() const override { return "GCN DPP Combine"; }
102
103  void getAnalysisUsage(AnalysisUsage &AU) const override {
104    AU.setPreservesCFG();
105    MachineFunctionPass::getAnalysisUsage(AU);
106  }
107
108private:
109  int getDPPOp(unsigned Op) const;
110};
111
112} // end anonymous namespace
113
114INITIALIZE_PASS(GCNDPPCombine, DEBUG_TYPE, "GCN DPP Combine", false, false)
115
116char GCNDPPCombine::ID = 0;
117
118char &llvm::GCNDPPCombineID = GCNDPPCombine::ID;
119
120FunctionPass *llvm::createGCNDPPCombinePass() {
121  return new GCNDPPCombine();
122}
123
124int GCNDPPCombine::getDPPOp(unsigned Op) const {
125  auto DPP32 = AMDGPU::getDPPOp32(Op);
126  if (DPP32 == -1) {
127    auto E32 = AMDGPU::getVOPe32(Op);
128    DPP32 = (E32 == -1)? -1 : AMDGPU::getDPPOp32(E32);
129  }
130  return (DPP32 == -1 || TII->pseudoToMCOpcode(DPP32) == -1) ? -1 : DPP32;
131}
132
133// tracks the register operand definition and returns:
134//   1. immediate operand used to initialize the register if found
135//   2. nullptr if the register operand is undef
136//   3. the operand itself otherwise
137MachineOperand *GCNDPPCombine::getOldOpndValue(MachineOperand &OldOpnd) const {
138  auto *Def = getVRegSubRegDef(getRegSubRegPair(OldOpnd), *MRI);
139  if (!Def)
140    return nullptr;
141
142  switch(Def->getOpcode()) {
143  default: break;
144  case AMDGPU::IMPLICIT_DEF:
145    return nullptr;
146  case AMDGPU::COPY:
147  case AMDGPU::V_MOV_B32_e32: {
148    auto &Op1 = Def->getOperand(1);
149    if (Op1.isImm())
150      return &Op1;
151    break;
152  }
153  }
154  return &OldOpnd;
155}
156
157MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
158                                           MachineInstr &MovMI,
159                                           RegSubRegPair CombOldVGPR,
160                                           bool CombBCZ) const {
161  assert(MovMI.getOpcode() == AMDGPU::V_MOV_B32_dpp);
162
163  auto OrigOp = OrigMI.getOpcode();
164  auto DPPOp = getDPPOp(OrigOp);
165  if (DPPOp == -1) {
166    LLVM_DEBUG(dbgs() << "  failed: no DPP opcode\n");
167    return nullptr;
168  }
169
170  auto DPPInst = BuildMI(*OrigMI.getParent(), OrigMI,
171                         OrigMI.getDebugLoc(), TII->get(DPPOp));
172  bool Fail = false;
173  do {
174    auto *Dst = TII->getNamedOperand(OrigMI, AMDGPU::OpName::vdst);
175    assert(Dst);
176    DPPInst.add(*Dst);
177    int NumOperands = 1;
178
179    const int OldIdx = AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::old);
180    if (OldIdx != -1) {
181      assert(OldIdx == NumOperands);
182      assert(isOfRegClass(CombOldVGPR, AMDGPU::VGPR_32RegClass, *MRI));
183      auto *Def = getVRegSubRegDef(CombOldVGPR, *MRI);
184      DPPInst.addReg(CombOldVGPR.Reg, Def ? 0 : RegState::Undef,
185                     CombOldVGPR.SubReg);
186      ++NumOperands;
187    } else {
188      // TODO: this discards MAC/FMA instructions for now, let's add it later
189      LLVM_DEBUG(dbgs() << "  failed: no old operand in DPP instruction,"
190                           " TBD\n");
191      Fail = true;
192      break;
193    }
194
195    if (auto *Mod0 = TII->getNamedOperand(OrigMI,
196                                          AMDGPU::OpName::src0_modifiers)) {
197      assert(NumOperands == AMDGPU::getNamedOperandIdx(DPPOp,
198                                          AMDGPU::OpName::src0_modifiers));
199      assert(0LL == (Mod0->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG)));
200      DPPInst.addImm(Mod0->getImm());
201      ++NumOperands;
202    } else if (AMDGPU::getNamedOperandIdx(DPPOp,
203                   AMDGPU::OpName::src0_modifiers) != -1) {
204      DPPInst.addImm(0);
205      ++NumOperands;
206    }
207    auto *Src0 = TII->getNamedOperand(MovMI, AMDGPU::OpName::src0);
208    assert(Src0);
209    if (!TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src0)) {
210      LLVM_DEBUG(dbgs() << "  failed: src0 is illegal\n");
211      Fail = true;
212      break;
213    }
214    DPPInst.add(*Src0);
215    DPPInst->getOperand(NumOperands).setIsKill(false);
216    ++NumOperands;
217
218    if (auto *Mod1 = TII->getNamedOperand(OrigMI,
219                                          AMDGPU::OpName::src1_modifiers)) {
220      assert(NumOperands == AMDGPU::getNamedOperandIdx(DPPOp,
221                                          AMDGPU::OpName::src1_modifiers));
222      assert(0LL == (Mod1->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG)));
223      DPPInst.addImm(Mod1->getImm());
224      ++NumOperands;
225    } else if (AMDGPU::getNamedOperandIdx(DPPOp,
226                   AMDGPU::OpName::src1_modifiers) != -1) {
227      DPPInst.addImm(0);
228      ++NumOperands;
229    }
230    if (auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1)) {
231      if (!TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src1)) {
232        LLVM_DEBUG(dbgs() << "  failed: src1 is illegal\n");
233        Fail = true;
234        break;
235      }
236      DPPInst.add(*Src1);
237      ++NumOperands;
238    }
239
240    if (auto *Src2 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src2)) {
241      if (!TII->getNamedOperand(*DPPInst.getInstr(), AMDGPU::OpName::src2) ||
242          !TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src2)) {
243        LLVM_DEBUG(dbgs() << "  failed: src2 is illegal\n");
244        Fail = true;
245        break;
246      }
247      DPPInst.add(*Src2);
248    }
249
250    DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::dpp_ctrl));
251    DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask));
252    DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::bank_mask));
253    DPPInst.addImm(CombBCZ ? 1 : 0);
254  } while (false);
255
256  if (Fail) {
257    DPPInst.getInstr()->eraseFromParent();
258    return nullptr;
259  }
260  LLVM_DEBUG(dbgs() << "  combined:  " << *DPPInst.getInstr());
261  return DPPInst.getInstr();
262}
263
264static bool isIdentityValue(unsigned OrigMIOp, MachineOperand *OldOpnd) {
265  assert(OldOpnd->isImm());
266  switch (OrigMIOp) {
267  default: break;
268  case AMDGPU::V_ADD_U32_e32:
269  case AMDGPU::V_ADD_U32_e64:
270  case AMDGPU::V_ADD_I32_e32:
271  case AMDGPU::V_ADD_I32_e64:
272  case AMDGPU::V_OR_B32_e32:
273  case AMDGPU::V_OR_B32_e64:
274  case AMDGPU::V_SUBREV_U32_e32:
275  case AMDGPU::V_SUBREV_U32_e64:
276  case AMDGPU::V_SUBREV_I32_e32:
277  case AMDGPU::V_SUBREV_I32_e64:
278  case AMDGPU::V_MAX_U32_e32:
279  case AMDGPU::V_MAX_U32_e64:
280  case AMDGPU::V_XOR_B32_e32:
281  case AMDGPU::V_XOR_B32_e64:
282    if (OldOpnd->getImm() == 0)
283      return true;
284    break;
285  case AMDGPU::V_AND_B32_e32:
286  case AMDGPU::V_AND_B32_e64:
287  case AMDGPU::V_MIN_U32_e32:
288  case AMDGPU::V_MIN_U32_e64:
289    if (static_cast<uint32_t>(OldOpnd->getImm()) ==
290        std::numeric_limits<uint32_t>::max())
291      return true;
292    break;
293  case AMDGPU::V_MIN_I32_e32:
294  case AMDGPU::V_MIN_I32_e64:
295    if (static_cast<int32_t>(OldOpnd->getImm()) ==
296        std::numeric_limits<int32_t>::max())
297      return true;
298    break;
299  case AMDGPU::V_MAX_I32_e32:
300  case AMDGPU::V_MAX_I32_e64:
301    if (static_cast<int32_t>(OldOpnd->getImm()) ==
302        std::numeric_limits<int32_t>::min())
303      return true;
304    break;
305  case AMDGPU::V_MUL_I32_I24_e32:
306  case AMDGPU::V_MUL_I32_I24_e64:
307  case AMDGPU::V_MUL_U32_U24_e32:
308  case AMDGPU::V_MUL_U32_U24_e64:
309    if (OldOpnd->getImm() == 1)
310      return true;
311    break;
312  }
313  return false;
314}
315
316MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
317                                           MachineInstr &MovMI,
318                                           RegSubRegPair CombOldVGPR,
319                                           MachineOperand *OldOpndValue,
320                                           bool CombBCZ) const {
321  assert(CombOldVGPR.Reg);
322  if (!CombBCZ && OldOpndValue && OldOpndValue->isImm()) {
323    auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1);
324    if (!Src1 || !Src1->isReg()) {
325      LLVM_DEBUG(dbgs() << "  failed: no src1 or it isn't a register\n");
326      return nullptr;
327    }
328    if (!isIdentityValue(OrigMI.getOpcode(), OldOpndValue)) {
329      LLVM_DEBUG(dbgs() << "  failed: old immediate isn't an identity\n");
330      return nullptr;
331    }
332    CombOldVGPR = getRegSubRegPair(*Src1);
333    if (!isOfRegClass(CombOldVGPR, AMDGPU::VGPR_32RegClass, *MRI)) {
334      LLVM_DEBUG(dbgs() << "  failed: src1 isn't a VGPR32 register\n");
335      return nullptr;
336    }
337  }
338  return createDPPInst(OrigMI, MovMI, CombOldVGPR, CombBCZ);
339}
340
341// returns true if MI doesn't have OpndName immediate operand or the
342// operand has Value
343bool GCNDPPCombine::hasNoImmOrEqual(MachineInstr &MI, unsigned OpndName,
344                                    int64_t Value, int64_t Mask) const {
345  auto *Imm = TII->getNamedOperand(MI, OpndName);
346  if (!Imm)
347    return true;
348
349  assert(Imm->isImm());
350  return (Imm->getImm() & Mask) == Value;
351}
352
353bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const {
354  assert(MovMI.getOpcode() == AMDGPU::V_MOV_B32_dpp);
355  LLVM_DEBUG(dbgs() << "\nDPP combine: " << MovMI);
356
357  auto *DstOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::vdst);
358  assert(DstOpnd && DstOpnd->isReg());
359  auto DPPMovReg = DstOpnd->getReg();
360  if (DPPMovReg.isPhysical()) {
361    LLVM_DEBUG(dbgs() << "  failed: dpp move writes physreg\n");
362    return false;
363  }
364  if (execMayBeModifiedBeforeAnyUse(*MRI, DPPMovReg, MovMI)) {
365    LLVM_DEBUG(dbgs() << "  failed: EXEC mask should remain the same"
366                         " for all uses\n");
367    return false;
368  }
369
370  auto *RowMaskOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask);
371  assert(RowMaskOpnd && RowMaskOpnd->isImm());
372  auto *BankMaskOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::bank_mask);
373  assert(BankMaskOpnd && BankMaskOpnd->isImm());
374  const bool MaskAllLanes = RowMaskOpnd->getImm() == 0xF &&
375                            BankMaskOpnd->getImm() == 0xF;
376
377  auto *BCZOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::bound_ctrl);
378  assert(BCZOpnd && BCZOpnd->isImm());
379  bool BoundCtrlZero = BCZOpnd->getImm();
380
381  auto *OldOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::old);
382  auto *SrcOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::src0);
383  assert(OldOpnd && OldOpnd->isReg());
384  assert(SrcOpnd && SrcOpnd->isReg());
385  if (OldOpnd->getReg().isPhysical() || SrcOpnd->getReg().isPhysical()) {
386    LLVM_DEBUG(dbgs() << "  failed: dpp move reads physreg\n");
387    return false;
388  }
389
390  auto * const OldOpndValue = getOldOpndValue(*OldOpnd);
391  // OldOpndValue is either undef (IMPLICIT_DEF) or immediate or something else
392  // We could use: assert(!OldOpndValue || OldOpndValue->isImm())
393  // but the third option is used to distinguish undef from non-immediate
394  // to reuse IMPLICIT_DEF instruction later
395  assert(!OldOpndValue || OldOpndValue->isImm() || OldOpndValue == OldOpnd);
396
397  bool CombBCZ = false;
398
399  if (MaskAllLanes && BoundCtrlZero) { // [1]
400    CombBCZ = true;
401  } else {
402    if (!OldOpndValue || !OldOpndValue->isImm()) {
403      LLVM_DEBUG(dbgs() << "  failed: the DPP mov isn't combinable\n");
404      return false;
405    }
406
407    if (OldOpndValue->getParent()->getParent() != MovMI.getParent()) {
408      LLVM_DEBUG(dbgs() <<
409        "  failed: old reg def and mov should be in the same BB\n");
410      return false;
411    }
412
413    if (OldOpndValue->getImm() == 0) {
414      if (MaskAllLanes) {
415        assert(!BoundCtrlZero); // by check [1]
416        CombBCZ = true;
417      }
418    } else if (BoundCtrlZero) {
419      assert(!MaskAllLanes); // by check [1]
420      LLVM_DEBUG(dbgs() <<
421        "  failed: old!=0 and bctrl:0 and not all lanes isn't combinable\n");
422      return false;
423    }
424  }
425
426  LLVM_DEBUG(dbgs() << "  old=";
427    if (!OldOpndValue)
428      dbgs() << "undef";
429    else
430      dbgs() << *OldOpndValue;
431    dbgs() << ", bound_ctrl=" << CombBCZ << '\n');
432
433  SmallVector<MachineInstr*, 4> OrigMIs, DPPMIs;
434  DenseMap<MachineInstr*, SmallVector<unsigned, 4>> RegSeqWithOpNos;
435  auto CombOldVGPR = getRegSubRegPair(*OldOpnd);
436  // try to reuse previous old reg if its undefined (IMPLICIT_DEF)
437  if (CombBCZ && OldOpndValue) { // CombOldVGPR should be undef
438    CombOldVGPR = RegSubRegPair(
439      MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass));
440    auto UndefInst = BuildMI(*MovMI.getParent(), MovMI, MovMI.getDebugLoc(),
441                             TII->get(AMDGPU::IMPLICIT_DEF), CombOldVGPR.Reg);
442    DPPMIs.push_back(UndefInst.getInstr());
443  }
444
445  OrigMIs.push_back(&MovMI);
446  bool Rollback = true;
447  SmallVector<MachineOperand*, 16> Uses;
448
449  for (auto &Use : MRI->use_nodbg_operands(DPPMovReg)) {
450    Uses.push_back(&Use);
451  }
452
453  while (!Uses.empty()) {
454    MachineOperand *Use = Uses.pop_back_val();
455    Rollback = true;
456
457    auto &OrigMI = *Use->getParent();
458    LLVM_DEBUG(dbgs() << "  try: " << OrigMI);
459
460    auto OrigOp = OrigMI.getOpcode();
461    if (OrigOp == AMDGPU::REG_SEQUENCE) {
462      Register FwdReg = OrigMI.getOperand(0).getReg();
463      unsigned FwdSubReg = 0;
464
465      if (execMayBeModifiedBeforeAnyUse(*MRI, FwdReg, OrigMI)) {
466        LLVM_DEBUG(dbgs() << "  failed: EXEC mask should remain the same"
467                             " for all uses\n");
468        break;
469      }
470
471      unsigned OpNo, E = OrigMI.getNumOperands();
472      for (OpNo = 1; OpNo < E; OpNo += 2) {
473        if (OrigMI.getOperand(OpNo).getReg() == DPPMovReg) {
474          FwdSubReg = OrigMI.getOperand(OpNo + 1).getImm();
475          break;
476        }
477      }
478
479      if (!FwdSubReg)
480        break;
481
482      for (auto &Op : MRI->use_nodbg_operands(FwdReg)) {
483        if (Op.getSubReg() == FwdSubReg)
484          Uses.push_back(&Op);
485      }
486      RegSeqWithOpNos[&OrigMI].push_back(OpNo);
487      continue;
488    }
489
490    if (TII->isVOP3(OrigOp)) {
491      if (!TII->hasVALU32BitEncoding(OrigOp)) {
492        LLVM_DEBUG(dbgs() << "  failed: VOP3 hasn't e32 equivalent\n");
493        break;
494      }
495      // check if other than abs|neg modifiers are set (opsel for example)
496      const int64_t Mask = ~(SISrcMods::ABS | SISrcMods::NEG);
497      if (!hasNoImmOrEqual(OrigMI, AMDGPU::OpName::src0_modifiers, 0, Mask) ||
498          !hasNoImmOrEqual(OrigMI, AMDGPU::OpName::src1_modifiers, 0, Mask) ||
499          !hasNoImmOrEqual(OrigMI, AMDGPU::OpName::clamp, 0) ||
500          !hasNoImmOrEqual(OrigMI, AMDGPU::OpName::omod, 0)) {
501        LLVM_DEBUG(dbgs() << "  failed: VOP3 has non-default modifiers\n");
502        break;
503      }
504    } else if (!TII->isVOP1(OrigOp) && !TII->isVOP2(OrigOp)) {
505      LLVM_DEBUG(dbgs() << "  failed: not VOP1/2/3\n");
506      break;
507    }
508
509    LLVM_DEBUG(dbgs() << "  combining: " << OrigMI);
510    if (Use == TII->getNamedOperand(OrigMI, AMDGPU::OpName::src0)) {
511      if (auto *DPPInst = createDPPInst(OrigMI, MovMI, CombOldVGPR,
512                                        OldOpndValue, CombBCZ)) {
513        DPPMIs.push_back(DPPInst);
514        Rollback = false;
515      }
516    } else if (OrigMI.isCommutable() &&
517               Use == TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1)) {
518      auto *BB = OrigMI.getParent();
519      auto *NewMI = BB->getParent()->CloneMachineInstr(&OrigMI);
520      BB->insert(OrigMI, NewMI);
521      if (TII->commuteInstruction(*NewMI)) {
522        LLVM_DEBUG(dbgs() << "  commuted:  " << *NewMI);
523        if (auto *DPPInst = createDPPInst(*NewMI, MovMI, CombOldVGPR,
524                                          OldOpndValue, CombBCZ)) {
525          DPPMIs.push_back(DPPInst);
526          Rollback = false;
527        }
528      } else
529        LLVM_DEBUG(dbgs() << "  failed: cannot be commuted\n");
530      NewMI->eraseFromParent();
531    } else
532      LLVM_DEBUG(dbgs() << "  failed: no suitable operands\n");
533    if (Rollback)
534      break;
535    OrigMIs.push_back(&OrigMI);
536  }
537
538  Rollback |= !Uses.empty();
539
540  for (auto *MI : *(Rollback? &DPPMIs : &OrigMIs))
541    MI->eraseFromParent();
542
543  if (!Rollback) {
544    for (auto &S : RegSeqWithOpNos) {
545      if (MRI->use_nodbg_empty(S.first->getOperand(0).getReg())) {
546        S.first->eraseFromParent();
547        continue;
548      }
549      while (!S.second.empty())
550        S.first->getOperand(S.second.pop_back_val()).setIsUndef(true);
551    }
552  }
553
554  return !Rollback;
555}
556
557bool GCNDPPCombine::runOnMachineFunction(MachineFunction &MF) {
558  auto &ST = MF.getSubtarget<GCNSubtarget>();
559  if (!ST.hasDPP() || skipFunction(MF.getFunction()))
560    return false;
561
562  MRI = &MF.getRegInfo();
563  TII = ST.getInstrInfo();
564
565  assert(MRI->isSSA() && "Must be run on SSA");
566
567  bool Changed = false;
568  for (auto &MBB : MF) {
569    for (auto I = MBB.rbegin(), E = MBB.rend(); I != E;) {
570      auto &MI = *I++;
571      if (MI.getOpcode() == AMDGPU::V_MOV_B32_dpp && combineDPPMov(MI)) {
572        Changed = true;
573        ++NumDPPMovsCombined;
574      } else if (MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO) {
575        auto Split = TII->expandMovDPP64(MI);
576        for (auto M : { Split.first, Split.second }) {
577          if (combineDPPMov(*M))
578            ++NumDPPMovsCombined;
579        }
580        Changed = true;
581      }
582    }
583  }
584  return Changed;
585}
586