MIRCanonicalizerPass.cpp revision 360784
1//===-------------- MIRCanonicalizer.cpp - MIR Canonicalizer --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// The purpose of this pass is to employ a canonical code transformation so
10// that code compiled with slightly different IR passes can be diffed more
11// effectively than otherwise. This is done by renaming vregs in a given
12// LiveRange in a canonical way. This pass also does a pseudo-scheduling to
13// move defs closer to their use inorder to reduce diffs caused by slightly
14// different schedules.
15//
16// Basic Usage:
17//
18// llc -o - -run-pass mir-canonicalizer example.mir
19//
20// Reorders instructions canonically.
21// Renames virtual register operands canonically.
22// Strips certain MIR artifacts (optionally).
23//
24//===----------------------------------------------------------------------===//
25
26#include "MIRVRegNamerUtils.h"
27#include "llvm/ADT/PostOrderIterator.h"
28#include "llvm/ADT/STLExtras.h"
29#include "llvm/CodeGen/MachineFunctionPass.h"
30#include "llvm/CodeGen/MachineInstrBuilder.h"
31#include "llvm/CodeGen/MachineRegisterInfo.h"
32#include "llvm/CodeGen/Passes.h"
33#include "llvm/InitializePasses.h"
34#include "llvm/Support/Debug.h"
35#include "llvm/Support/raw_ostream.h"
36
37#include <queue>
38
39using namespace llvm;
40
41namespace llvm {
42extern char &MIRCanonicalizerID;
43} // namespace llvm
44
45#define DEBUG_TYPE "mir-canonicalizer"
46
47static cl::opt<unsigned>
48    CanonicalizeFunctionNumber("canon-nth-function", cl::Hidden, cl::init(~0u),
49                               cl::value_desc("N"),
50                               cl::desc("Function number to canonicalize."));
51
52namespace {
53
54class MIRCanonicalizer : public MachineFunctionPass {
55public:
56  static char ID;
57  MIRCanonicalizer() : MachineFunctionPass(ID) {}
58
59  StringRef getPassName() const override {
60    return "Rename register operands in a canonical ordering.";
61  }
62
63  void getAnalysisUsage(AnalysisUsage &AU) const override {
64    AU.setPreservesCFG();
65    MachineFunctionPass::getAnalysisUsage(AU);
66  }
67
68  bool runOnMachineFunction(MachineFunction &MF) override;
69};
70
71} // end anonymous namespace
72
73char MIRCanonicalizer::ID;
74
75char &llvm::MIRCanonicalizerID = MIRCanonicalizer::ID;
76
77INITIALIZE_PASS_BEGIN(MIRCanonicalizer, "mir-canonicalizer",
78                      "Rename Register Operands Canonically", false, false)
79
80INITIALIZE_PASS_END(MIRCanonicalizer, "mir-canonicalizer",
81                    "Rename Register Operands Canonically", false, false)
82
83static std::vector<MachineBasicBlock *> GetRPOList(MachineFunction &MF) {
84  if (MF.empty())
85    return {};
86  ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin());
87  std::vector<MachineBasicBlock *> RPOList;
88  for (auto MBB : RPOT) {
89    RPOList.push_back(MBB);
90  }
91
92  return RPOList;
93}
94
95static bool
96rescheduleLexographically(std::vector<MachineInstr *> instructions,
97                          MachineBasicBlock *MBB,
98                          std::function<MachineBasicBlock::iterator()> getPos) {
99
100  bool Changed = false;
101  using StringInstrPair = std::pair<std::string, MachineInstr *>;
102  std::vector<StringInstrPair> StringInstrMap;
103
104  for (auto *II : instructions) {
105    std::string S;
106    raw_string_ostream OS(S);
107    II->print(OS);
108    OS.flush();
109
110    // Trim the assignment, or start from the begining in the case of a store.
111    const size_t i = S.find("=");
112    StringInstrMap.push_back({(i == std::string::npos) ? S : S.substr(i), II});
113  }
114
115  llvm::sort(StringInstrMap,
116             [](const StringInstrPair &a, const StringInstrPair &b) -> bool {
117               return (a.first < b.first);
118             });
119
120  for (auto &II : StringInstrMap) {
121
122    LLVM_DEBUG({
123      dbgs() << "Splicing ";
124      II.second->dump();
125      dbgs() << " right before: ";
126      getPos()->dump();
127    });
128
129    Changed = true;
130    MBB->splice(getPos(), MBB, II.second);
131  }
132
133  return Changed;
134}
135
136static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount,
137                                  MachineBasicBlock *MBB) {
138
139  bool Changed = false;
140
141  // Calculates the distance of MI from the begining of its parent BB.
142  auto getInstrIdx = [](const MachineInstr &MI) {
143    unsigned i = 0;
144    for (auto &CurMI : *MI.getParent()) {
145      if (&CurMI == &MI)
146        return i;
147      i++;
148    }
149    return ~0U;
150  };
151
152  // Pre-Populate vector of instructions to reschedule so that we don't
153  // clobber the iterator.
154  std::vector<MachineInstr *> Instructions;
155  for (auto &MI : *MBB) {
156    Instructions.push_back(&MI);
157  }
158
159  std::map<MachineInstr *, std::vector<MachineInstr *>> MultiUsers;
160  std::map<unsigned, MachineInstr *> MultiUserLookup;
161  unsigned UseToBringDefCloserToCount = 0;
162  std::vector<MachineInstr *> PseudoIdempotentInstructions;
163  std::vector<unsigned> PhysRegDefs;
164  for (auto *II : Instructions) {
165    for (unsigned i = 1; i < II->getNumOperands(); i++) {
166      MachineOperand &MO = II->getOperand(i);
167      if (!MO.isReg())
168        continue;
169
170      if (Register::isVirtualRegister(MO.getReg()))
171        continue;
172
173      if (!MO.isDef())
174        continue;
175
176      PhysRegDefs.push_back(MO.getReg());
177    }
178  }
179
180  for (auto *II : Instructions) {
181    if (II->getNumOperands() == 0)
182      continue;
183    if (II->mayLoadOrStore())
184      continue;
185
186    MachineOperand &MO = II->getOperand(0);
187    if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg()))
188      continue;
189    if (!MO.isDef())
190      continue;
191
192    bool IsPseudoIdempotent = true;
193    for (unsigned i = 1; i < II->getNumOperands(); i++) {
194
195      if (II->getOperand(i).isImm()) {
196        continue;
197      }
198
199      if (II->getOperand(i).isReg()) {
200        if (!Register::isVirtualRegister(II->getOperand(i).getReg()))
201          if (llvm::find(PhysRegDefs, II->getOperand(i).getReg()) ==
202              PhysRegDefs.end()) {
203            continue;
204          }
205      }
206
207      IsPseudoIdempotent = false;
208      break;
209    }
210
211    if (IsPseudoIdempotent) {
212      PseudoIdempotentInstructions.push_back(II);
213      continue;
214    }
215
216    LLVM_DEBUG(dbgs() << "Operand " << 0 << " of "; II->dump(); MO.dump(););
217
218    MachineInstr *Def = II;
219    unsigned Distance = ~0U;
220    MachineInstr *UseToBringDefCloserTo = nullptr;
221    MachineRegisterInfo *MRI = &MBB->getParent()->getRegInfo();
222    for (auto &UO : MRI->use_nodbg_operands(MO.getReg())) {
223      MachineInstr *UseInst = UO.getParent();
224
225      const unsigned DefLoc = getInstrIdx(*Def);
226      const unsigned UseLoc = getInstrIdx(*UseInst);
227      const unsigned Delta = (UseLoc - DefLoc);
228
229      if (UseInst->getParent() != Def->getParent())
230        continue;
231      if (DefLoc >= UseLoc)
232        continue;
233
234      if (Delta < Distance) {
235        Distance = Delta;
236        UseToBringDefCloserTo = UseInst;
237        MultiUserLookup[UseToBringDefCloserToCount++] = UseToBringDefCloserTo;
238      }
239    }
240
241    const auto BBE = MBB->instr_end();
242    MachineBasicBlock::iterator DefI = BBE;
243    MachineBasicBlock::iterator UseI = BBE;
244
245    for (auto BBI = MBB->instr_begin(); BBI != BBE; ++BBI) {
246
247      if (DefI != BBE && UseI != BBE)
248        break;
249
250      if (&*BBI == Def) {
251        DefI = BBI;
252        continue;
253      }
254
255      if (&*BBI == UseToBringDefCloserTo) {
256        UseI = BBI;
257        continue;
258      }
259    }
260
261    if (DefI == BBE || UseI == BBE)
262      continue;
263
264    LLVM_DEBUG({
265      dbgs() << "Splicing ";
266      DefI->dump();
267      dbgs() << " right before: ";
268      UseI->dump();
269    });
270
271    MultiUsers[UseToBringDefCloserTo].push_back(Def);
272    Changed = true;
273    MBB->splice(UseI, MBB, DefI);
274  }
275
276  // Sort the defs for users of multiple defs lexographically.
277  for (const auto &E : MultiUserLookup) {
278
279    auto UseI =
280        std::find_if(MBB->instr_begin(), MBB->instr_end(),
281                     [&](MachineInstr &MI) -> bool { return &MI == E.second; });
282
283    if (UseI == MBB->instr_end())
284      continue;
285
286    LLVM_DEBUG(
287        dbgs() << "Rescheduling Multi-Use Instructions Lexographically.";);
288    Changed |= rescheduleLexographically(
289        MultiUsers[E.second], MBB,
290        [&]() -> MachineBasicBlock::iterator { return UseI; });
291  }
292
293  PseudoIdempotentInstCount = PseudoIdempotentInstructions.size();
294  LLVM_DEBUG(
295      dbgs() << "Rescheduling Idempotent Instructions Lexographically.";);
296  Changed |= rescheduleLexographically(
297      PseudoIdempotentInstructions, MBB,
298      [&]() -> MachineBasicBlock::iterator { return MBB->begin(); });
299
300  return Changed;
301}
302
303static bool propagateLocalCopies(MachineBasicBlock *MBB) {
304  bool Changed = false;
305  MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
306
307  std::vector<MachineInstr *> Copies;
308  for (MachineInstr &MI : MBB->instrs()) {
309    if (MI.isCopy())
310      Copies.push_back(&MI);
311  }
312
313  for (MachineInstr *MI : Copies) {
314
315    if (!MI->getOperand(0).isReg())
316      continue;
317    if (!MI->getOperand(1).isReg())
318      continue;
319
320    const Register Dst = MI->getOperand(0).getReg();
321    const Register Src = MI->getOperand(1).getReg();
322
323    if (!Register::isVirtualRegister(Dst))
324      continue;
325    if (!Register::isVirtualRegister(Src))
326      continue;
327    // Not folding COPY instructions if regbankselect has not set the RCs.
328    // Why are we only considering Register Classes? Because the verifier
329    // sometimes gets upset if the register classes don't match even if the
330    // types do. A future patch might add COPY folding for matching types in
331    // pre-registerbankselect code.
332    if (!MRI.getRegClassOrNull(Dst))
333      continue;
334    if (MRI.getRegClass(Dst) != MRI.getRegClass(Src))
335      continue;
336
337    std::vector<MachineOperand *> Uses;
338    for (auto UI = MRI.use_begin(Dst); UI != MRI.use_end(); ++UI)
339      Uses.push_back(&*UI);
340    for (auto *MO : Uses)
341      MO->setReg(Src);
342
343    Changed = true;
344    MI->eraseFromParent();
345  }
346
347  return Changed;
348}
349
350static bool doDefKillClear(MachineBasicBlock *MBB) {
351  bool Changed = false;
352
353  for (auto &MI : *MBB) {
354    for (auto &MO : MI.operands()) {
355      if (!MO.isReg())
356        continue;
357      if (!MO.isDef() && MO.isKill()) {
358        Changed = true;
359        MO.setIsKill(false);
360      }
361
362      if (MO.isDef() && MO.isDead()) {
363        Changed = true;
364        MO.setIsDead(false);
365      }
366    }
367  }
368
369  return Changed;
370}
371
372static bool runOnBasicBlock(MachineBasicBlock *MBB,
373                            unsigned BasicBlockNum, VRegRenamer &Renamer) {
374  LLVM_DEBUG({
375    dbgs() << "\n\n  NEW BASIC BLOCK: " << MBB->getName() << "  \n\n";
376    dbgs() << "\n\n================================================\n\n";
377  });
378
379  bool Changed = false;
380
381  LLVM_DEBUG(dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << "\n\n";);
382
383  LLVM_DEBUG(dbgs() << "MBB Before Canonical Copy Propagation:\n";
384             MBB->dump(););
385  Changed |= propagateLocalCopies(MBB);
386  LLVM_DEBUG(dbgs() << "MBB After Canonical Copy Propagation:\n"; MBB->dump(););
387
388  LLVM_DEBUG(dbgs() << "MBB Before Scheduling:\n"; MBB->dump(););
389  unsigned IdempotentInstCount = 0;
390  Changed |= rescheduleCanonically(IdempotentInstCount, MBB);
391  LLVM_DEBUG(dbgs() << "MBB After Scheduling:\n"; MBB->dump(););
392
393  Changed |= Renamer.renameVRegs(MBB, BasicBlockNum);
394
395  // TODO: Consider dropping this. Dropping kill defs is probably not
396  // semantically sound.
397  Changed |= doDefKillClear(MBB);
398
399  LLVM_DEBUG(dbgs() << "Updated MachineBasicBlock:\n"; MBB->dump();
400             dbgs() << "\n";);
401  LLVM_DEBUG(
402      dbgs() << "\n\n================================================\n\n");
403  return Changed;
404}
405
406bool MIRCanonicalizer::runOnMachineFunction(MachineFunction &MF) {
407
408  static unsigned functionNum = 0;
409  if (CanonicalizeFunctionNumber != ~0U) {
410    if (CanonicalizeFunctionNumber != functionNum++)
411      return false;
412    LLVM_DEBUG(dbgs() << "\n Canonicalizing Function " << MF.getName()
413                      << "\n";);
414  }
415
416  // we need a valid vreg to create a vreg type for skipping all those
417  // stray vreg numbers so reach alignment/canonical vreg values.
418  std::vector<MachineBasicBlock *> RPOList = GetRPOList(MF);
419
420  LLVM_DEBUG(
421      dbgs() << "\n\n  NEW MACHINE FUNCTION: " << MF.getName() << "  \n\n";
422      dbgs() << "\n\n================================================\n\n";
423      dbgs() << "Total Basic Blocks: " << RPOList.size() << "\n";
424      for (auto MBB
425           : RPOList) { dbgs() << MBB->getName() << "\n"; } dbgs()
426      << "\n\n================================================\n\n";);
427
428  unsigned BBNum = 0;
429  bool Changed = false;
430  MachineRegisterInfo &MRI = MF.getRegInfo();
431  VRegRenamer Renamer(MRI);
432  for (auto MBB : RPOList)
433    Changed |= runOnBasicBlock(MBB, BBNum++, Renamer);
434
435  return Changed;
436}
437