AArch64InstrInfo.cpp revision 360784
1//===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the AArch64 implementation of the TargetInstrInfo class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AArch64InstrInfo.h"
14#include "AArch64MachineFunctionInfo.h"
15#include "AArch64Subtarget.h"
16#include "MCTargetDesc/AArch64AddressingModes.h"
17#include "Utils/AArch64BaseInfo.h"
18#include "llvm/ADT/ArrayRef.h"
19#include "llvm/ADT/STLExtras.h"
20#include "llvm/ADT/SmallVector.h"
21#include "llvm/CodeGen/MachineBasicBlock.h"
22#include "llvm/CodeGen/MachineFrameInfo.h"
23#include "llvm/CodeGen/MachineFunction.h"
24#include "llvm/CodeGen/MachineInstr.h"
25#include "llvm/CodeGen/MachineInstrBuilder.h"
26#include "llvm/CodeGen/MachineMemOperand.h"
27#include "llvm/CodeGen/MachineOperand.h"
28#include "llvm/CodeGen/MachineRegisterInfo.h"
29#include "llvm/CodeGen/MachineModuleInfo.h"
30#include "llvm/CodeGen/StackMaps.h"
31#include "llvm/CodeGen/TargetRegisterInfo.h"
32#include "llvm/CodeGen/TargetSubtargetInfo.h"
33#include "llvm/IR/DebugInfoMetadata.h"
34#include "llvm/IR/DebugLoc.h"
35#include "llvm/IR/GlobalValue.h"
36#include "llvm/MC/MCAsmInfo.h"
37#include "llvm/MC/MCInst.h"
38#include "llvm/MC/MCInstrDesc.h"
39#include "llvm/Support/Casting.h"
40#include "llvm/Support/CodeGen.h"
41#include "llvm/Support/CommandLine.h"
42#include "llvm/Support/Compiler.h"
43#include "llvm/Support/ErrorHandling.h"
44#include "llvm/Support/MathExtras.h"
45#include "llvm/Target/TargetMachine.h"
46#include "llvm/Target/TargetOptions.h"
47#include <cassert>
48#include <cstdint>
49#include <iterator>
50#include <utility>
51
52using namespace llvm;
53
54#define GET_INSTRINFO_CTOR_DTOR
55#include "AArch64GenInstrInfo.inc"
56
57static cl::opt<unsigned> TBZDisplacementBits(
58    "aarch64-tbz-offset-bits", cl::Hidden, cl::init(14),
59    cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
60
61static cl::opt<unsigned> CBZDisplacementBits(
62    "aarch64-cbz-offset-bits", cl::Hidden, cl::init(19),
63    cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));
64
65static cl::opt<unsigned>
66    BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19),
67                        cl::desc("Restrict range of Bcc instructions (DEBUG)"));
68
69AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI)
70    : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP,
71                          AArch64::CATCHRET),
72      RI(STI.getTargetTriple()), Subtarget(STI) {}
73
74/// GetInstSize - Return the number of bytes of code the specified
75/// instruction may be.  This returns the maximum number of bytes.
76unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
77  const MachineBasicBlock &MBB = *MI.getParent();
78  const MachineFunction *MF = MBB.getParent();
79  const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
80
81  {
82    auto Op = MI.getOpcode();
83    if (Op == AArch64::INLINEASM || Op == AArch64::INLINEASM_BR)
84      return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
85  }
86
87  // Meta-instructions emit no code.
88  if (MI.isMetaInstruction())
89    return 0;
90
91  // FIXME: We currently only handle pseudoinstructions that don't get expanded
92  //        before the assembly printer.
93  unsigned NumBytes = 0;
94  const MCInstrDesc &Desc = MI.getDesc();
95  switch (Desc.getOpcode()) {
96  default:
97    // Anything not explicitly designated otherwise is a normal 4-byte insn.
98    NumBytes = 4;
99    break;
100  case TargetOpcode::STACKMAP:
101    // The upper bound for a stackmap intrinsic is the full length of its shadow
102    NumBytes = StackMapOpers(&MI).getNumPatchBytes();
103    assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
104    break;
105  case TargetOpcode::PATCHPOINT:
106    // The size of the patchpoint intrinsic is the number of bytes requested
107    NumBytes = PatchPointOpers(&MI).getNumPatchBytes();
108    assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
109    break;
110  case AArch64::TLSDESC_CALLSEQ:
111    // This gets lowered to an instruction sequence which takes 16 bytes
112    NumBytes = 16;
113    break;
114  case AArch64::JumpTableDest32:
115  case AArch64::JumpTableDest16:
116  case AArch64::JumpTableDest8:
117    NumBytes = 12;
118    break;
119  case AArch64::SPACE:
120    NumBytes = MI.getOperand(1).getImm();
121    break;
122  }
123
124  return NumBytes;
125}
126
127static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target,
128                            SmallVectorImpl<MachineOperand> &Cond) {
129  // Block ends with fall-through condbranch.
130  switch (LastInst->getOpcode()) {
131  default:
132    llvm_unreachable("Unknown branch instruction?");
133  case AArch64::Bcc:
134    Target = LastInst->getOperand(1).getMBB();
135    Cond.push_back(LastInst->getOperand(0));
136    break;
137  case AArch64::CBZW:
138  case AArch64::CBZX:
139  case AArch64::CBNZW:
140  case AArch64::CBNZX:
141    Target = LastInst->getOperand(1).getMBB();
142    Cond.push_back(MachineOperand::CreateImm(-1));
143    Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
144    Cond.push_back(LastInst->getOperand(0));
145    break;
146  case AArch64::TBZW:
147  case AArch64::TBZX:
148  case AArch64::TBNZW:
149  case AArch64::TBNZX:
150    Target = LastInst->getOperand(2).getMBB();
151    Cond.push_back(MachineOperand::CreateImm(-1));
152    Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
153    Cond.push_back(LastInst->getOperand(0));
154    Cond.push_back(LastInst->getOperand(1));
155  }
156}
157
158static unsigned getBranchDisplacementBits(unsigned Opc) {
159  switch (Opc) {
160  default:
161    llvm_unreachable("unexpected opcode!");
162  case AArch64::B:
163    return 64;
164  case AArch64::TBNZW:
165  case AArch64::TBZW:
166  case AArch64::TBNZX:
167  case AArch64::TBZX:
168    return TBZDisplacementBits;
169  case AArch64::CBNZW:
170  case AArch64::CBZW:
171  case AArch64::CBNZX:
172  case AArch64::CBZX:
173    return CBZDisplacementBits;
174  case AArch64::Bcc:
175    return BCCDisplacementBits;
176  }
177}
178
179bool AArch64InstrInfo::isBranchOffsetInRange(unsigned BranchOp,
180                                             int64_t BrOffset) const {
181  unsigned Bits = getBranchDisplacementBits(BranchOp);
182  assert(Bits >= 3 && "max branch displacement must be enough to jump"
183                      "over conditional branch expansion");
184  return isIntN(Bits, BrOffset / 4);
185}
186
187MachineBasicBlock *
188AArch64InstrInfo::getBranchDestBlock(const MachineInstr &MI) const {
189  switch (MI.getOpcode()) {
190  default:
191    llvm_unreachable("unexpected opcode!");
192  case AArch64::B:
193    return MI.getOperand(0).getMBB();
194  case AArch64::TBZW:
195  case AArch64::TBNZW:
196  case AArch64::TBZX:
197  case AArch64::TBNZX:
198    return MI.getOperand(2).getMBB();
199  case AArch64::CBZW:
200  case AArch64::CBNZW:
201  case AArch64::CBZX:
202  case AArch64::CBNZX:
203  case AArch64::Bcc:
204    return MI.getOperand(1).getMBB();
205  }
206}
207
208// Branch analysis.
209bool AArch64InstrInfo::analyzeBranch(MachineBasicBlock &MBB,
210                                     MachineBasicBlock *&TBB,
211                                     MachineBasicBlock *&FBB,
212                                     SmallVectorImpl<MachineOperand> &Cond,
213                                     bool AllowModify) const {
214  // If the block has no terminators, it just falls into the block after it.
215  MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
216  if (I == MBB.end())
217    return false;
218
219  if (!isUnpredicatedTerminator(*I))
220    return false;
221
222  // Get the last instruction in the block.
223  MachineInstr *LastInst = &*I;
224
225  // If there is only one terminator instruction, process it.
226  unsigned LastOpc = LastInst->getOpcode();
227  if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
228    if (isUncondBranchOpcode(LastOpc)) {
229      TBB = LastInst->getOperand(0).getMBB();
230      return false;
231    }
232    if (isCondBranchOpcode(LastOpc)) {
233      // Block ends with fall-through condbranch.
234      parseCondBranch(LastInst, TBB, Cond);
235      return false;
236    }
237    return true; // Can't handle indirect branch.
238  }
239
240  // Get the instruction before it if it is a terminator.
241  MachineInstr *SecondLastInst = &*I;
242  unsigned SecondLastOpc = SecondLastInst->getOpcode();
243
244  // If AllowModify is true and the block ends with two or more unconditional
245  // branches, delete all but the first unconditional branch.
246  if (AllowModify && isUncondBranchOpcode(LastOpc)) {
247    while (isUncondBranchOpcode(SecondLastOpc)) {
248      LastInst->eraseFromParent();
249      LastInst = SecondLastInst;
250      LastOpc = LastInst->getOpcode();
251      if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
252        // Return now the only terminator is an unconditional branch.
253        TBB = LastInst->getOperand(0).getMBB();
254        return false;
255      } else {
256        SecondLastInst = &*I;
257        SecondLastOpc = SecondLastInst->getOpcode();
258      }
259    }
260  }
261
262  // If there are three terminators, we don't know what sort of block this is.
263  if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I))
264    return true;
265
266  // If the block ends with a B and a Bcc, handle it.
267  if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
268    parseCondBranch(SecondLastInst, TBB, Cond);
269    FBB = LastInst->getOperand(0).getMBB();
270    return false;
271  }
272
273  // If the block ends with two unconditional branches, handle it.  The second
274  // one is not executed, so remove it.
275  if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
276    TBB = SecondLastInst->getOperand(0).getMBB();
277    I = LastInst;
278    if (AllowModify)
279      I->eraseFromParent();
280    return false;
281  }
282
283  // ...likewise if it ends with an indirect branch followed by an unconditional
284  // branch.
285  if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
286    I = LastInst;
287    if (AllowModify)
288      I->eraseFromParent();
289    return true;
290  }
291
292  // Otherwise, can't handle this.
293  return true;
294}
295
296bool AArch64InstrInfo::reverseBranchCondition(
297    SmallVectorImpl<MachineOperand> &Cond) const {
298  if (Cond[0].getImm() != -1) {
299    // Regular Bcc
300    AArch64CC::CondCode CC = (AArch64CC::CondCode)(int)Cond[0].getImm();
301    Cond[0].setImm(AArch64CC::getInvertedCondCode(CC));
302  } else {
303    // Folded compare-and-branch
304    switch (Cond[1].getImm()) {
305    default:
306      llvm_unreachable("Unknown conditional branch!");
307    case AArch64::CBZW:
308      Cond[1].setImm(AArch64::CBNZW);
309      break;
310    case AArch64::CBNZW:
311      Cond[1].setImm(AArch64::CBZW);
312      break;
313    case AArch64::CBZX:
314      Cond[1].setImm(AArch64::CBNZX);
315      break;
316    case AArch64::CBNZX:
317      Cond[1].setImm(AArch64::CBZX);
318      break;
319    case AArch64::TBZW:
320      Cond[1].setImm(AArch64::TBNZW);
321      break;
322    case AArch64::TBNZW:
323      Cond[1].setImm(AArch64::TBZW);
324      break;
325    case AArch64::TBZX:
326      Cond[1].setImm(AArch64::TBNZX);
327      break;
328    case AArch64::TBNZX:
329      Cond[1].setImm(AArch64::TBZX);
330      break;
331    }
332  }
333
334  return false;
335}
336
337unsigned AArch64InstrInfo::removeBranch(MachineBasicBlock &MBB,
338                                        int *BytesRemoved) const {
339  MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
340  if (I == MBB.end())
341    return 0;
342
343  if (!isUncondBranchOpcode(I->getOpcode()) &&
344      !isCondBranchOpcode(I->getOpcode()))
345    return 0;
346
347  // Remove the branch.
348  I->eraseFromParent();
349
350  I = MBB.end();
351
352  if (I == MBB.begin()) {
353    if (BytesRemoved)
354      *BytesRemoved = 4;
355    return 1;
356  }
357  --I;
358  if (!isCondBranchOpcode(I->getOpcode())) {
359    if (BytesRemoved)
360      *BytesRemoved = 4;
361    return 1;
362  }
363
364  // Remove the branch.
365  I->eraseFromParent();
366  if (BytesRemoved)
367    *BytesRemoved = 8;
368
369  return 2;
370}
371
372void AArch64InstrInfo::instantiateCondBranch(
373    MachineBasicBlock &MBB, const DebugLoc &DL, MachineBasicBlock *TBB,
374    ArrayRef<MachineOperand> Cond) const {
375  if (Cond[0].getImm() != -1) {
376    // Regular Bcc
377    BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB);
378  } else {
379    // Folded compare-and-branch
380    // Note that we use addOperand instead of addReg to keep the flags.
381    const MachineInstrBuilder MIB =
382        BuildMI(&MBB, DL, get(Cond[1].getImm())).add(Cond[2]);
383    if (Cond.size() > 3)
384      MIB.addImm(Cond[3].getImm());
385    MIB.addMBB(TBB);
386  }
387}
388
389unsigned AArch64InstrInfo::insertBranch(
390    MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
391    ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
392  // Shouldn't be a fall through.
393  assert(TBB && "insertBranch must not be told to insert a fallthrough");
394
395  if (!FBB) {
396    if (Cond.empty()) // Unconditional branch?
397      BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB);
398    else
399      instantiateCondBranch(MBB, DL, TBB, Cond);
400
401    if (BytesAdded)
402      *BytesAdded = 4;
403
404    return 1;
405  }
406
407  // Two-way conditional branch.
408  instantiateCondBranch(MBB, DL, TBB, Cond);
409  BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB);
410
411  if (BytesAdded)
412    *BytesAdded = 8;
413
414  return 2;
415}
416
417// Find the original register that VReg is copied from.
418static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
419  while (Register::isVirtualRegister(VReg)) {
420    const MachineInstr *DefMI = MRI.getVRegDef(VReg);
421    if (!DefMI->isFullCopy())
422      return VReg;
423    VReg = DefMI->getOperand(1).getReg();
424  }
425  return VReg;
426}
427
428// Determine if VReg is defined by an instruction that can be folded into a
429// csel instruction. If so, return the folded opcode, and the replacement
430// register.
431static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
432                                unsigned *NewVReg = nullptr) {
433  VReg = removeCopies(MRI, VReg);
434  if (!Register::isVirtualRegister(VReg))
435    return 0;
436
437  bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
438  const MachineInstr *DefMI = MRI.getVRegDef(VReg);
439  unsigned Opc = 0;
440  unsigned SrcOpNum = 0;
441  switch (DefMI->getOpcode()) {
442  case AArch64::ADDSXri:
443  case AArch64::ADDSWri:
444    // if NZCV is used, do not fold.
445    if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
446      return 0;
447    // fall-through to ADDXri and ADDWri.
448    LLVM_FALLTHROUGH;
449  case AArch64::ADDXri:
450  case AArch64::ADDWri:
451    // add x, 1 -> csinc.
452    if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 ||
453        DefMI->getOperand(3).getImm() != 0)
454      return 0;
455    SrcOpNum = 1;
456    Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
457    break;
458
459  case AArch64::ORNXrr:
460  case AArch64::ORNWrr: {
461    // not x -> csinv, represented as orn dst, xzr, src.
462    unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
463    if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
464      return 0;
465    SrcOpNum = 2;
466    Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
467    break;
468  }
469
470  case AArch64::SUBSXrr:
471  case AArch64::SUBSWrr:
472    // if NZCV is used, do not fold.
473    if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
474      return 0;
475    // fall-through to SUBXrr and SUBWrr.
476    LLVM_FALLTHROUGH;
477  case AArch64::SUBXrr:
478  case AArch64::SUBWrr: {
479    // neg x -> csneg, represented as sub dst, xzr, src.
480    unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
481    if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
482      return 0;
483    SrcOpNum = 2;
484    Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
485    break;
486  }
487  default:
488    return 0;
489  }
490  assert(Opc && SrcOpNum && "Missing parameters");
491
492  if (NewVReg)
493    *NewVReg = DefMI->getOperand(SrcOpNum).getReg();
494  return Opc;
495}
496
497bool AArch64InstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
498                                       ArrayRef<MachineOperand> Cond,
499                                       unsigned TrueReg, unsigned FalseReg,
500                                       int &CondCycles, int &TrueCycles,
501                                       int &FalseCycles) const {
502  // Check register classes.
503  const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
504  const TargetRegisterClass *RC =
505      RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
506  if (!RC)
507    return false;
508
509  // Expanding cbz/tbz requires an extra cycle of latency on the condition.
510  unsigned ExtraCondLat = Cond.size() != 1;
511
512  // GPRs are handled by csel.
513  // FIXME: Fold in x+1, -x, and ~x when applicable.
514  if (AArch64::GPR64allRegClass.hasSubClassEq(RC) ||
515      AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
516    // Single-cycle csel, csinc, csinv, and csneg.
517    CondCycles = 1 + ExtraCondLat;
518    TrueCycles = FalseCycles = 1;
519    if (canFoldIntoCSel(MRI, TrueReg))
520      TrueCycles = 0;
521    else if (canFoldIntoCSel(MRI, FalseReg))
522      FalseCycles = 0;
523    return true;
524  }
525
526  // Scalar floating point is handled by fcsel.
527  // FIXME: Form fabs, fmin, and fmax when applicable.
528  if (AArch64::FPR64RegClass.hasSubClassEq(RC) ||
529      AArch64::FPR32RegClass.hasSubClassEq(RC)) {
530    CondCycles = 5 + ExtraCondLat;
531    TrueCycles = FalseCycles = 2;
532    return true;
533  }
534
535  // Can't do vectors.
536  return false;
537}
538
539void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB,
540                                    MachineBasicBlock::iterator I,
541                                    const DebugLoc &DL, unsigned DstReg,
542                                    ArrayRef<MachineOperand> Cond,
543                                    unsigned TrueReg, unsigned FalseReg) const {
544  MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
545
546  // Parse the condition code, see parseCondBranch() above.
547  AArch64CC::CondCode CC;
548  switch (Cond.size()) {
549  default:
550    llvm_unreachable("Unknown condition opcode in Cond");
551  case 1: // b.cc
552    CC = AArch64CC::CondCode(Cond[0].getImm());
553    break;
554  case 3: { // cbz/cbnz
555    // We must insert a compare against 0.
556    bool Is64Bit;
557    switch (Cond[1].getImm()) {
558    default:
559      llvm_unreachable("Unknown branch opcode in Cond");
560    case AArch64::CBZW:
561      Is64Bit = false;
562      CC = AArch64CC::EQ;
563      break;
564    case AArch64::CBZX:
565      Is64Bit = true;
566      CC = AArch64CC::EQ;
567      break;
568    case AArch64::CBNZW:
569      Is64Bit = false;
570      CC = AArch64CC::NE;
571      break;
572    case AArch64::CBNZX:
573      Is64Bit = true;
574      CC = AArch64CC::NE;
575      break;
576    }
577    Register SrcReg = Cond[2].getReg();
578    if (Is64Bit) {
579      // cmp reg, #0 is actually subs xzr, reg, #0.
580      MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass);
581      BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR)
582          .addReg(SrcReg)
583          .addImm(0)
584          .addImm(0);
585    } else {
586      MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass);
587      BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR)
588          .addReg(SrcReg)
589          .addImm(0)
590          .addImm(0);
591    }
592    break;
593  }
594  case 4: { // tbz/tbnz
595    // We must insert a tst instruction.
596    switch (Cond[1].getImm()) {
597    default:
598      llvm_unreachable("Unknown branch opcode in Cond");
599    case AArch64::TBZW:
600    case AArch64::TBZX:
601      CC = AArch64CC::EQ;
602      break;
603    case AArch64::TBNZW:
604    case AArch64::TBNZX:
605      CC = AArch64CC::NE;
606      break;
607    }
608    // cmp reg, #foo is actually ands xzr, reg, #1<<foo.
609    if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW)
610      BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR)
611          .addReg(Cond[2].getReg())
612          .addImm(
613              AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 32));
614    else
615      BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR)
616          .addReg(Cond[2].getReg())
617          .addImm(
618              AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 64));
619    break;
620  }
621  }
622
623  unsigned Opc = 0;
624  const TargetRegisterClass *RC = nullptr;
625  bool TryFold = false;
626  if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) {
627    RC = &AArch64::GPR64RegClass;
628    Opc = AArch64::CSELXr;
629    TryFold = true;
630  } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) {
631    RC = &AArch64::GPR32RegClass;
632    Opc = AArch64::CSELWr;
633    TryFold = true;
634  } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) {
635    RC = &AArch64::FPR64RegClass;
636    Opc = AArch64::FCSELDrrr;
637  } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) {
638    RC = &AArch64::FPR32RegClass;
639    Opc = AArch64::FCSELSrrr;
640  }
641  assert(RC && "Unsupported regclass");
642
643  // Try folding simple instructions into the csel.
644  if (TryFold) {
645    unsigned NewVReg = 0;
646    unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg);
647    if (FoldedOpc) {
648      // The folded opcodes csinc, csinc and csneg apply the operation to
649      // FalseReg, so we need to invert the condition.
650      CC = AArch64CC::getInvertedCondCode(CC);
651      TrueReg = FalseReg;
652    } else
653      FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg);
654
655    // Fold the operation. Leave any dead instructions for DCE to clean up.
656    if (FoldedOpc) {
657      FalseReg = NewVReg;
658      Opc = FoldedOpc;
659      // The extends the live range of NewVReg.
660      MRI.clearKillFlags(NewVReg);
661    }
662  }
663
664  // Pull all virtual register into the appropriate class.
665  MRI.constrainRegClass(TrueReg, RC);
666  MRI.constrainRegClass(FalseReg, RC);
667
668  // Insert the csel.
669  BuildMI(MBB, I, DL, get(Opc), DstReg)
670      .addReg(TrueReg)
671      .addReg(FalseReg)
672      .addImm(CC);
673}
674
675/// Returns true if a MOVi32imm or MOVi64imm can be expanded to an  ORRxx.
676static bool canBeExpandedToORR(const MachineInstr &MI, unsigned BitSize) {
677  uint64_t Imm = MI.getOperand(1).getImm();
678  uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
679  uint64_t Encoding;
680  return AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding);
681}
682
683// FIXME: this implementation should be micro-architecture dependent, so a
684// micro-architecture target hook should be introduced here in future.
685bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
686  if (!Subtarget.hasCustomCheapAsMoveHandling())
687    return MI.isAsCheapAsAMove();
688
689  const unsigned Opcode = MI.getOpcode();
690
691  // Firstly, check cases gated by features.
692
693  if (Subtarget.hasZeroCycleZeroingFP()) {
694    if (Opcode == AArch64::FMOVH0 ||
695        Opcode == AArch64::FMOVS0 ||
696        Opcode == AArch64::FMOVD0)
697      return true;
698  }
699
700  if (Subtarget.hasZeroCycleZeroingGP()) {
701    if (Opcode == TargetOpcode::COPY &&
702        (MI.getOperand(1).getReg() == AArch64::WZR ||
703         MI.getOperand(1).getReg() == AArch64::XZR))
704      return true;
705  }
706
707  // Secondly, check cases specific to sub-targets.
708
709  if (Subtarget.hasExynosCheapAsMoveHandling()) {
710    if (isExynosCheapAsMove(MI))
711      return true;
712
713    return MI.isAsCheapAsAMove();
714  }
715
716  // Finally, check generic cases.
717
718  switch (Opcode) {
719  default:
720    return false;
721
722  // add/sub on register without shift
723  case AArch64::ADDWri:
724  case AArch64::ADDXri:
725  case AArch64::SUBWri:
726  case AArch64::SUBXri:
727    return (MI.getOperand(3).getImm() == 0);
728
729  // logical ops on immediate
730  case AArch64::ANDWri:
731  case AArch64::ANDXri:
732  case AArch64::EORWri:
733  case AArch64::EORXri:
734  case AArch64::ORRWri:
735  case AArch64::ORRXri:
736    return true;
737
738  // logical ops on register without shift
739  case AArch64::ANDWrr:
740  case AArch64::ANDXrr:
741  case AArch64::BICWrr:
742  case AArch64::BICXrr:
743  case AArch64::EONWrr:
744  case AArch64::EONXrr:
745  case AArch64::EORWrr:
746  case AArch64::EORXrr:
747  case AArch64::ORNWrr:
748  case AArch64::ORNXrr:
749  case AArch64::ORRWrr:
750  case AArch64::ORRXrr:
751    return true;
752
753  // If MOVi32imm or MOVi64imm can be expanded into ORRWri or
754  // ORRXri, it is as cheap as MOV
755  case AArch64::MOVi32imm:
756    return canBeExpandedToORR(MI, 32);
757  case AArch64::MOVi64imm:
758    return canBeExpandedToORR(MI, 64);
759  }
760
761  llvm_unreachable("Unknown opcode to check as cheap as a move!");
762}
763
764bool AArch64InstrInfo::isFalkorShiftExtFast(const MachineInstr &MI) {
765  switch (MI.getOpcode()) {
766  default:
767    return false;
768
769  case AArch64::ADDWrs:
770  case AArch64::ADDXrs:
771  case AArch64::ADDSWrs:
772  case AArch64::ADDSXrs: {
773    unsigned Imm = MI.getOperand(3).getImm();
774    unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
775    if (ShiftVal == 0)
776      return true;
777    return AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL && ShiftVal <= 5;
778  }
779
780  case AArch64::ADDWrx:
781  case AArch64::ADDXrx:
782  case AArch64::ADDXrx64:
783  case AArch64::ADDSWrx:
784  case AArch64::ADDSXrx:
785  case AArch64::ADDSXrx64: {
786    unsigned Imm = MI.getOperand(3).getImm();
787    switch (AArch64_AM::getArithExtendType(Imm)) {
788    default:
789      return false;
790    case AArch64_AM::UXTB:
791    case AArch64_AM::UXTH:
792    case AArch64_AM::UXTW:
793    case AArch64_AM::UXTX:
794      return AArch64_AM::getArithShiftValue(Imm) <= 4;
795    }
796  }
797
798  case AArch64::SUBWrs:
799  case AArch64::SUBSWrs: {
800    unsigned Imm = MI.getOperand(3).getImm();
801    unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
802    return ShiftVal == 0 ||
803           (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 31);
804  }
805
806  case AArch64::SUBXrs:
807  case AArch64::SUBSXrs: {
808    unsigned Imm = MI.getOperand(3).getImm();
809    unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
810    return ShiftVal == 0 ||
811           (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 63);
812  }
813
814  case AArch64::SUBWrx:
815  case AArch64::SUBXrx:
816  case AArch64::SUBXrx64:
817  case AArch64::SUBSWrx:
818  case AArch64::SUBSXrx:
819  case AArch64::SUBSXrx64: {
820    unsigned Imm = MI.getOperand(3).getImm();
821    switch (AArch64_AM::getArithExtendType(Imm)) {
822    default:
823      return false;
824    case AArch64_AM::UXTB:
825    case AArch64_AM::UXTH:
826    case AArch64_AM::UXTW:
827    case AArch64_AM::UXTX:
828      return AArch64_AM::getArithShiftValue(Imm) == 0;
829    }
830  }
831
832  case AArch64::LDRBBroW:
833  case AArch64::LDRBBroX:
834  case AArch64::LDRBroW:
835  case AArch64::LDRBroX:
836  case AArch64::LDRDroW:
837  case AArch64::LDRDroX:
838  case AArch64::LDRHHroW:
839  case AArch64::LDRHHroX:
840  case AArch64::LDRHroW:
841  case AArch64::LDRHroX:
842  case AArch64::LDRQroW:
843  case AArch64::LDRQroX:
844  case AArch64::LDRSBWroW:
845  case AArch64::LDRSBWroX:
846  case AArch64::LDRSBXroW:
847  case AArch64::LDRSBXroX:
848  case AArch64::LDRSHWroW:
849  case AArch64::LDRSHWroX:
850  case AArch64::LDRSHXroW:
851  case AArch64::LDRSHXroX:
852  case AArch64::LDRSWroW:
853  case AArch64::LDRSWroX:
854  case AArch64::LDRSroW:
855  case AArch64::LDRSroX:
856  case AArch64::LDRWroW:
857  case AArch64::LDRWroX:
858  case AArch64::LDRXroW:
859  case AArch64::LDRXroX:
860  case AArch64::PRFMroW:
861  case AArch64::PRFMroX:
862  case AArch64::STRBBroW:
863  case AArch64::STRBBroX:
864  case AArch64::STRBroW:
865  case AArch64::STRBroX:
866  case AArch64::STRDroW:
867  case AArch64::STRDroX:
868  case AArch64::STRHHroW:
869  case AArch64::STRHHroX:
870  case AArch64::STRHroW:
871  case AArch64::STRHroX:
872  case AArch64::STRQroW:
873  case AArch64::STRQroX:
874  case AArch64::STRSroW:
875  case AArch64::STRSroX:
876  case AArch64::STRWroW:
877  case AArch64::STRWroX:
878  case AArch64::STRXroW:
879  case AArch64::STRXroX: {
880    unsigned IsSigned = MI.getOperand(3).getImm();
881    return !IsSigned;
882  }
883  }
884}
885
886bool AArch64InstrInfo::isSEHInstruction(const MachineInstr &MI) {
887  unsigned Opc = MI.getOpcode();
888  switch (Opc) {
889    default:
890      return false;
891    case AArch64::SEH_StackAlloc:
892    case AArch64::SEH_SaveFPLR:
893    case AArch64::SEH_SaveFPLR_X:
894    case AArch64::SEH_SaveReg:
895    case AArch64::SEH_SaveReg_X:
896    case AArch64::SEH_SaveRegP:
897    case AArch64::SEH_SaveRegP_X:
898    case AArch64::SEH_SaveFReg:
899    case AArch64::SEH_SaveFReg_X:
900    case AArch64::SEH_SaveFRegP:
901    case AArch64::SEH_SaveFRegP_X:
902    case AArch64::SEH_SetFP:
903    case AArch64::SEH_AddFP:
904    case AArch64::SEH_Nop:
905    case AArch64::SEH_PrologEnd:
906    case AArch64::SEH_EpilogStart:
907    case AArch64::SEH_EpilogEnd:
908      return true;
909  }
910}
911
912bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
913                                             unsigned &SrcReg, unsigned &DstReg,
914                                             unsigned &SubIdx) const {
915  switch (MI.getOpcode()) {
916  default:
917    return false;
918  case AArch64::SBFMXri: // aka sxtw
919  case AArch64::UBFMXri: // aka uxtw
920    // Check for the 32 -> 64 bit extension case, these instructions can do
921    // much more.
922    if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31)
923      return false;
924    // This is a signed or unsigned 32 -> 64 bit extension.
925    SrcReg = MI.getOperand(1).getReg();
926    DstReg = MI.getOperand(0).getReg();
927    SubIdx = AArch64::sub_32;
928    return true;
929  }
930}
931
932bool AArch64InstrInfo::areMemAccessesTriviallyDisjoint(
933    const MachineInstr &MIa, const MachineInstr &MIb) const {
934  const TargetRegisterInfo *TRI = &getRegisterInfo();
935  const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
936  int64_t OffsetA = 0, OffsetB = 0;
937  unsigned WidthA = 0, WidthB = 0;
938
939  assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
940  assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
941
942  if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects() ||
943      MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef())
944    return false;
945
946  // Retrieve the base, offset from the base and width. Width
947  // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8).  If
948  // base are identical, and the offset of a lower memory access +
949  // the width doesn't overlap the offset of a higher memory access,
950  // then the memory accesses are different.
951  if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) &&
952      getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) {
953    if (BaseOpA->isIdenticalTo(*BaseOpB)) {
954      int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
955      int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
956      int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
957      if (LowOffset + LowWidth <= HighOffset)
958        return true;
959    }
960  }
961  return false;
962}
963
964bool AArch64InstrInfo::isSchedulingBoundary(const MachineInstr &MI,
965                                            const MachineBasicBlock *MBB,
966                                            const MachineFunction &MF) const {
967  if (TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF))
968    return true;
969  switch (MI.getOpcode()) {
970  case AArch64::HINT:
971    // CSDB hints are scheduling barriers.
972    if (MI.getOperand(0).getImm() == 0x14)
973      return true;
974    break;
975  case AArch64::DSB:
976  case AArch64::ISB:
977    // DSB and ISB also are scheduling barriers.
978    return true;
979  default:;
980  }
981  return isSEHInstruction(MI);
982}
983
984/// analyzeCompare - For a comparison instruction, return the source registers
985/// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
986/// Return true if the comparison instruction can be analyzed.
987bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
988                                      unsigned &SrcReg2, int &CmpMask,
989                                      int &CmpValue) const {
990  // The first operand can be a frame index where we'd normally expect a
991  // register.
992  assert(MI.getNumOperands() >= 2 && "All AArch64 cmps should have 2 operands");
993  if (!MI.getOperand(1).isReg())
994    return false;
995
996  switch (MI.getOpcode()) {
997  default:
998    break;
999  case AArch64::SUBSWrr:
1000  case AArch64::SUBSWrs:
1001  case AArch64::SUBSWrx:
1002  case AArch64::SUBSXrr:
1003  case AArch64::SUBSXrs:
1004  case AArch64::SUBSXrx:
1005  case AArch64::ADDSWrr:
1006  case AArch64::ADDSWrs:
1007  case AArch64::ADDSWrx:
1008  case AArch64::ADDSXrr:
1009  case AArch64::ADDSXrs:
1010  case AArch64::ADDSXrx:
1011    // Replace SUBSWrr with SUBWrr if NZCV is not used.
1012    SrcReg = MI.getOperand(1).getReg();
1013    SrcReg2 = MI.getOperand(2).getReg();
1014    CmpMask = ~0;
1015    CmpValue = 0;
1016    return true;
1017  case AArch64::SUBSWri:
1018  case AArch64::ADDSWri:
1019  case AArch64::SUBSXri:
1020  case AArch64::ADDSXri:
1021    SrcReg = MI.getOperand(1).getReg();
1022    SrcReg2 = 0;
1023    CmpMask = ~0;
1024    // FIXME: In order to convert CmpValue to 0 or 1
1025    CmpValue = MI.getOperand(2).getImm() != 0;
1026    return true;
1027  case AArch64::ANDSWri:
1028  case AArch64::ANDSXri:
1029    // ANDS does not use the same encoding scheme as the others xxxS
1030    // instructions.
1031    SrcReg = MI.getOperand(1).getReg();
1032    SrcReg2 = 0;
1033    CmpMask = ~0;
1034    // FIXME:The return val type of decodeLogicalImmediate is uint64_t,
1035    // while the type of CmpValue is int. When converting uint64_t to int,
1036    // the high 32 bits of uint64_t will be lost.
1037    // In fact it causes a bug in spec2006-483.xalancbmk
1038    // CmpValue is only used to compare with zero in OptimizeCompareInstr
1039    CmpValue = AArch64_AM::decodeLogicalImmediate(
1040                   MI.getOperand(2).getImm(),
1041                   MI.getOpcode() == AArch64::ANDSWri ? 32 : 64) != 0;
1042    return true;
1043  }
1044
1045  return false;
1046}
1047
1048static bool UpdateOperandRegClass(MachineInstr &Instr) {
1049  MachineBasicBlock *MBB = Instr.getParent();
1050  assert(MBB && "Can't get MachineBasicBlock here");
1051  MachineFunction *MF = MBB->getParent();
1052  assert(MF && "Can't get MachineFunction here");
1053  const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
1054  const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
1055  MachineRegisterInfo *MRI = &MF->getRegInfo();
1056
1057  for (unsigned OpIdx = 0, EndIdx = Instr.getNumOperands(); OpIdx < EndIdx;
1058       ++OpIdx) {
1059    MachineOperand &MO = Instr.getOperand(OpIdx);
1060    const TargetRegisterClass *OpRegCstraints =
1061        Instr.getRegClassConstraint(OpIdx, TII, TRI);
1062
1063    // If there's no constraint, there's nothing to do.
1064    if (!OpRegCstraints)
1065      continue;
1066    // If the operand is a frame index, there's nothing to do here.
1067    // A frame index operand will resolve correctly during PEI.
1068    if (MO.isFI())
1069      continue;
1070
1071    assert(MO.isReg() &&
1072           "Operand has register constraints without being a register!");
1073
1074    Register Reg = MO.getReg();
1075    if (Register::isPhysicalRegister(Reg)) {
1076      if (!OpRegCstraints->contains(Reg))
1077        return false;
1078    } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) &&
1079               !MRI->constrainRegClass(Reg, OpRegCstraints))
1080      return false;
1081  }
1082
1083  return true;
1084}
1085
1086/// Return the opcode that does not set flags when possible - otherwise
1087/// return the original opcode. The caller is responsible to do the actual
1088/// substitution and legality checking.
1089static unsigned convertToNonFlagSettingOpc(const MachineInstr &MI) {
1090  // Don't convert all compare instructions, because for some the zero register
1091  // encoding becomes the sp register.
1092  bool MIDefinesZeroReg = false;
1093  if (MI.definesRegister(AArch64::WZR) || MI.definesRegister(AArch64::XZR))
1094    MIDefinesZeroReg = true;
1095
1096  switch (MI.getOpcode()) {
1097  default:
1098    return MI.getOpcode();
1099  case AArch64::ADDSWrr:
1100    return AArch64::ADDWrr;
1101  case AArch64::ADDSWri:
1102    return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri;
1103  case AArch64::ADDSWrs:
1104    return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs;
1105  case AArch64::ADDSWrx:
1106    return AArch64::ADDWrx;
1107  case AArch64::ADDSXrr:
1108    return AArch64::ADDXrr;
1109  case AArch64::ADDSXri:
1110    return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri;
1111  case AArch64::ADDSXrs:
1112    return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs;
1113  case AArch64::ADDSXrx:
1114    return AArch64::ADDXrx;
1115  case AArch64::SUBSWrr:
1116    return AArch64::SUBWrr;
1117  case AArch64::SUBSWri:
1118    return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri;
1119  case AArch64::SUBSWrs:
1120    return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs;
1121  case AArch64::SUBSWrx:
1122    return AArch64::SUBWrx;
1123  case AArch64::SUBSXrr:
1124    return AArch64::SUBXrr;
1125  case AArch64::SUBSXri:
1126    return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri;
1127  case AArch64::SUBSXrs:
1128    return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs;
1129  case AArch64::SUBSXrx:
1130    return AArch64::SUBXrx;
1131  }
1132}
1133
1134enum AccessKind { AK_Write = 0x01, AK_Read = 0x10, AK_All = 0x11 };
1135
1136/// True when condition flags are accessed (either by writing or reading)
1137/// on the instruction trace starting at From and ending at To.
1138///
1139/// Note: If From and To are from different blocks it's assumed CC are accessed
1140///       on the path.
1141static bool areCFlagsAccessedBetweenInstrs(
1142    MachineBasicBlock::iterator From, MachineBasicBlock::iterator To,
1143    const TargetRegisterInfo *TRI, const AccessKind AccessToCheck = AK_All) {
1144  // Early exit if To is at the beginning of the BB.
1145  if (To == To->getParent()->begin())
1146    return true;
1147
1148  // Check whether the instructions are in the same basic block
1149  // If not, assume the condition flags might get modified somewhere.
1150  if (To->getParent() != From->getParent())
1151    return true;
1152
1153  // From must be above To.
1154  assert(std::find_if(++To.getReverse(), To->getParent()->rend(),
1155                      [From](MachineInstr &MI) {
1156                        return MI.getIterator() == From;
1157                      }) != To->getParent()->rend());
1158
1159  // We iterate backward starting \p To until we hit \p From.
1160  for (--To; To != From; --To) {
1161    const MachineInstr &Instr = *To;
1162
1163    if (((AccessToCheck & AK_Write) &&
1164         Instr.modifiesRegister(AArch64::NZCV, TRI)) ||
1165        ((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI)))
1166      return true;
1167  }
1168  return false;
1169}
1170
1171/// Try to optimize a compare instruction. A compare instruction is an
1172/// instruction which produces AArch64::NZCV. It can be truly compare
1173/// instruction
1174/// when there are no uses of its destination register.
1175///
1176/// The following steps are tried in order:
1177/// 1. Convert CmpInstr into an unconditional version.
1178/// 2. Remove CmpInstr if above there is an instruction producing a needed
1179///    condition code or an instruction which can be converted into such an
1180///    instruction.
1181///    Only comparison with zero is supported.
1182bool AArch64InstrInfo::optimizeCompareInstr(
1183    MachineInstr &CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask,
1184    int CmpValue, const MachineRegisterInfo *MRI) const {
1185  assert(CmpInstr.getParent());
1186  assert(MRI);
1187
1188  // Replace SUBSWrr with SUBWrr if NZCV is not used.
1189  int DeadNZCVIdx = CmpInstr.findRegisterDefOperandIdx(AArch64::NZCV, true);
1190  if (DeadNZCVIdx != -1) {
1191    if (CmpInstr.definesRegister(AArch64::WZR) ||
1192        CmpInstr.definesRegister(AArch64::XZR)) {
1193      CmpInstr.eraseFromParent();
1194      return true;
1195    }
1196    unsigned Opc = CmpInstr.getOpcode();
1197    unsigned NewOpc = convertToNonFlagSettingOpc(CmpInstr);
1198    if (NewOpc == Opc)
1199      return false;
1200    const MCInstrDesc &MCID = get(NewOpc);
1201    CmpInstr.setDesc(MCID);
1202    CmpInstr.RemoveOperand(DeadNZCVIdx);
1203    bool succeeded = UpdateOperandRegClass(CmpInstr);
1204    (void)succeeded;
1205    assert(succeeded && "Some operands reg class are incompatible!");
1206    return true;
1207  }
1208
1209  // Continue only if we have a "ri" where immediate is zero.
1210  // FIXME:CmpValue has already been converted to 0 or 1 in analyzeCompare
1211  // function.
1212  assert((CmpValue == 0 || CmpValue == 1) && "CmpValue must be 0 or 1!");
1213  if (CmpValue != 0 || SrcReg2 != 0)
1214    return false;
1215
1216  // CmpInstr is a Compare instruction if destination register is not used.
1217  if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
1218    return false;
1219
1220  return substituteCmpToZero(CmpInstr, SrcReg, MRI);
1221}
1222
1223/// Get opcode of S version of Instr.
1224/// If Instr is S version its opcode is returned.
1225/// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version
1226/// or we are not interested in it.
1227static unsigned sForm(MachineInstr &Instr) {
1228  switch (Instr.getOpcode()) {
1229  default:
1230    return AArch64::INSTRUCTION_LIST_END;
1231
1232  case AArch64::ADDSWrr:
1233  case AArch64::ADDSWri:
1234  case AArch64::ADDSXrr:
1235  case AArch64::ADDSXri:
1236  case AArch64::SUBSWrr:
1237  case AArch64::SUBSWri:
1238  case AArch64::SUBSXrr:
1239  case AArch64::SUBSXri:
1240    return Instr.getOpcode();
1241
1242  case AArch64::ADDWrr:
1243    return AArch64::ADDSWrr;
1244  case AArch64::ADDWri:
1245    return AArch64::ADDSWri;
1246  case AArch64::ADDXrr:
1247    return AArch64::ADDSXrr;
1248  case AArch64::ADDXri:
1249    return AArch64::ADDSXri;
1250  case AArch64::ADCWr:
1251    return AArch64::ADCSWr;
1252  case AArch64::ADCXr:
1253    return AArch64::ADCSXr;
1254  case AArch64::SUBWrr:
1255    return AArch64::SUBSWrr;
1256  case AArch64::SUBWri:
1257    return AArch64::SUBSWri;
1258  case AArch64::SUBXrr:
1259    return AArch64::SUBSXrr;
1260  case AArch64::SUBXri:
1261    return AArch64::SUBSXri;
1262  case AArch64::SBCWr:
1263    return AArch64::SBCSWr;
1264  case AArch64::SBCXr:
1265    return AArch64::SBCSXr;
1266  case AArch64::ANDWri:
1267    return AArch64::ANDSWri;
1268  case AArch64::ANDXri:
1269    return AArch64::ANDSXri;
1270  }
1271}
1272
1273/// Check if AArch64::NZCV should be alive in successors of MBB.
1274static bool areCFlagsAliveInSuccessors(MachineBasicBlock *MBB) {
1275  for (auto *BB : MBB->successors())
1276    if (BB->isLiveIn(AArch64::NZCV))
1277      return true;
1278  return false;
1279}
1280
1281namespace {
1282
1283struct UsedNZCV {
1284  bool N = false;
1285  bool Z = false;
1286  bool C = false;
1287  bool V = false;
1288
1289  UsedNZCV() = default;
1290
1291  UsedNZCV &operator|=(const UsedNZCV &UsedFlags) {
1292    this->N |= UsedFlags.N;
1293    this->Z |= UsedFlags.Z;
1294    this->C |= UsedFlags.C;
1295    this->V |= UsedFlags.V;
1296    return *this;
1297  }
1298};
1299
1300} // end anonymous namespace
1301
1302/// Find a condition code used by the instruction.
1303/// Returns AArch64CC::Invalid if either the instruction does not use condition
1304/// codes or we don't optimize CmpInstr in the presence of such instructions.
1305static AArch64CC::CondCode findCondCodeUsedByInstr(const MachineInstr &Instr) {
1306  switch (Instr.getOpcode()) {
1307  default:
1308    return AArch64CC::Invalid;
1309
1310  case AArch64::Bcc: {
1311    int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
1312    assert(Idx >= 2);
1313    return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 2).getImm());
1314  }
1315
1316  case AArch64::CSINVWr:
1317  case AArch64::CSINVXr:
1318  case AArch64::CSINCWr:
1319  case AArch64::CSINCXr:
1320  case AArch64::CSELWr:
1321  case AArch64::CSELXr:
1322  case AArch64::CSNEGWr:
1323  case AArch64::CSNEGXr:
1324  case AArch64::FCSELSrrr:
1325  case AArch64::FCSELDrrr: {
1326    int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
1327    assert(Idx >= 1);
1328    return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 1).getImm());
1329  }
1330  }
1331}
1332
1333static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC) {
1334  assert(CC != AArch64CC::Invalid);
1335  UsedNZCV UsedFlags;
1336  switch (CC) {
1337  default:
1338    break;
1339
1340  case AArch64CC::EQ: // Z set
1341  case AArch64CC::NE: // Z clear
1342    UsedFlags.Z = true;
1343    break;
1344
1345  case AArch64CC::HI: // Z clear and C set
1346  case AArch64CC::LS: // Z set   or  C clear
1347    UsedFlags.Z = true;
1348    LLVM_FALLTHROUGH;
1349  case AArch64CC::HS: // C set
1350  case AArch64CC::LO: // C clear
1351    UsedFlags.C = true;
1352    break;
1353
1354  case AArch64CC::MI: // N set
1355  case AArch64CC::PL: // N clear
1356    UsedFlags.N = true;
1357    break;
1358
1359  case AArch64CC::VS: // V set
1360  case AArch64CC::VC: // V clear
1361    UsedFlags.V = true;
1362    break;
1363
1364  case AArch64CC::GT: // Z clear, N and V the same
1365  case AArch64CC::LE: // Z set,   N and V differ
1366    UsedFlags.Z = true;
1367    LLVM_FALLTHROUGH;
1368  case AArch64CC::GE: // N and V the same
1369  case AArch64CC::LT: // N and V differ
1370    UsedFlags.N = true;
1371    UsedFlags.V = true;
1372    break;
1373  }
1374  return UsedFlags;
1375}
1376
1377static bool isADDSRegImm(unsigned Opcode) {
1378  return Opcode == AArch64::ADDSWri || Opcode == AArch64::ADDSXri;
1379}
1380
1381static bool isSUBSRegImm(unsigned Opcode) {
1382  return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri;
1383}
1384
1385/// Check if CmpInstr can be substituted by MI.
1386///
1387/// CmpInstr can be substituted:
1388/// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
1389/// - and, MI and CmpInstr are from the same MachineBB
1390/// - and, condition flags are not alive in successors of the CmpInstr parent
1391/// - and, if MI opcode is the S form there must be no defs of flags between
1392///        MI and CmpInstr
1393///        or if MI opcode is not the S form there must be neither defs of flags
1394///        nor uses of flags between MI and CmpInstr.
1395/// - and  C/V flags are not used after CmpInstr
1396static bool canInstrSubstituteCmpInstr(MachineInstr *MI, MachineInstr *CmpInstr,
1397                                       const TargetRegisterInfo *TRI) {
1398  assert(MI);
1399  assert(sForm(*MI) != AArch64::INSTRUCTION_LIST_END);
1400  assert(CmpInstr);
1401
1402  const unsigned CmpOpcode = CmpInstr->getOpcode();
1403  if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode))
1404    return false;
1405
1406  if (MI->getParent() != CmpInstr->getParent())
1407    return false;
1408
1409  if (areCFlagsAliveInSuccessors(CmpInstr->getParent()))
1410    return false;
1411
1412  AccessKind AccessToCheck = AK_Write;
1413  if (sForm(*MI) != MI->getOpcode())
1414    AccessToCheck = AK_All;
1415  if (areCFlagsAccessedBetweenInstrs(MI, CmpInstr, TRI, AccessToCheck))
1416    return false;
1417
1418  UsedNZCV NZCVUsedAfterCmp;
1419  for (auto I = std::next(CmpInstr->getIterator()),
1420            E = CmpInstr->getParent()->instr_end();
1421       I != E; ++I) {
1422    const MachineInstr &Instr = *I;
1423    if (Instr.readsRegister(AArch64::NZCV, TRI)) {
1424      AArch64CC::CondCode CC = findCondCodeUsedByInstr(Instr);
1425      if (CC == AArch64CC::Invalid) // Unsupported conditional instruction
1426        return false;
1427      NZCVUsedAfterCmp |= getUsedNZCV(CC);
1428    }
1429
1430    if (Instr.modifiesRegister(AArch64::NZCV, TRI))
1431      break;
1432  }
1433
1434  return !NZCVUsedAfterCmp.C && !NZCVUsedAfterCmp.V;
1435}
1436
1437/// Substitute an instruction comparing to zero with another instruction
1438/// which produces needed condition flags.
1439///
1440/// Return true on success.
1441bool AArch64InstrInfo::substituteCmpToZero(
1442    MachineInstr &CmpInstr, unsigned SrcReg,
1443    const MachineRegisterInfo *MRI) const {
1444  assert(MRI);
1445  // Get the unique definition of SrcReg.
1446  MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
1447  if (!MI)
1448    return false;
1449
1450  const TargetRegisterInfo *TRI = &getRegisterInfo();
1451
1452  unsigned NewOpc = sForm(*MI);
1453  if (NewOpc == AArch64::INSTRUCTION_LIST_END)
1454    return false;
1455
1456  if (!canInstrSubstituteCmpInstr(MI, &CmpInstr, TRI))
1457    return false;
1458
1459  // Update the instruction to set NZCV.
1460  MI->setDesc(get(NewOpc));
1461  CmpInstr.eraseFromParent();
1462  bool succeeded = UpdateOperandRegClass(*MI);
1463  (void)succeeded;
1464  assert(succeeded && "Some operands reg class are incompatible!");
1465  MI->addRegisterDefined(AArch64::NZCV, TRI);
1466  return true;
1467}
1468
1469bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
1470  if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD &&
1471      MI.getOpcode() != AArch64::CATCHRET)
1472    return false;
1473
1474  MachineBasicBlock &MBB = *MI.getParent();
1475  auto &Subtarget = MBB.getParent()->getSubtarget<AArch64Subtarget>();
1476  auto TRI = Subtarget.getRegisterInfo();
1477  DebugLoc DL = MI.getDebugLoc();
1478
1479  if (MI.getOpcode() == AArch64::CATCHRET) {
1480    // Skip to the first instruction before the epilog.
1481    const TargetInstrInfo *TII =
1482      MBB.getParent()->getSubtarget().getInstrInfo();
1483    MachineBasicBlock *TargetMBB = MI.getOperand(0).getMBB();
1484    auto MBBI = MachineBasicBlock::iterator(MI);
1485    MachineBasicBlock::iterator FirstEpilogSEH = std::prev(MBBI);
1486    while (FirstEpilogSEH->getFlag(MachineInstr::FrameDestroy) &&
1487           FirstEpilogSEH != MBB.begin())
1488      FirstEpilogSEH = std::prev(FirstEpilogSEH);
1489    if (FirstEpilogSEH != MBB.begin())
1490      FirstEpilogSEH = std::next(FirstEpilogSEH);
1491    BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADRP))
1492        .addReg(AArch64::X0, RegState::Define)
1493        .addMBB(TargetMBB);
1494    BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADDXri))
1495        .addReg(AArch64::X0, RegState::Define)
1496        .addReg(AArch64::X0)
1497        .addMBB(TargetMBB)
1498        .addImm(0);
1499    return true;
1500  }
1501
1502  Register Reg = MI.getOperand(0).getReg();
1503  const GlobalValue *GV =
1504      cast<GlobalValue>((*MI.memoperands_begin())->getValue());
1505  const TargetMachine &TM = MBB.getParent()->getTarget();
1506  unsigned OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
1507  const unsigned char MO_NC = AArch64II::MO_NC;
1508
1509  if ((OpFlags & AArch64II::MO_GOT) != 0) {
1510    BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
1511        .addGlobalAddress(GV, 0, OpFlags);
1512    if (Subtarget.isTargetILP32()) {
1513      unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
1514      BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
1515          .addDef(Reg32, RegState::Dead)
1516          .addUse(Reg, RegState::Kill)
1517          .addImm(0)
1518          .addMemOperand(*MI.memoperands_begin())
1519          .addDef(Reg, RegState::Implicit);
1520    } else {
1521      BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1522          .addReg(Reg, RegState::Kill)
1523          .addImm(0)
1524          .addMemOperand(*MI.memoperands_begin());
1525    }
1526  } else if (TM.getCodeModel() == CodeModel::Large) {
1527    assert(!Subtarget.isTargetILP32() && "how can large exist in ILP32?");
1528    BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
1529        .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC)
1530        .addImm(0);
1531    BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1532        .addReg(Reg, RegState::Kill)
1533        .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC)
1534        .addImm(16);
1535    BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1536        .addReg(Reg, RegState::Kill)
1537        .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC)
1538        .addImm(32);
1539    BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1540        .addReg(Reg, RegState::Kill)
1541        .addGlobalAddress(GV, 0, AArch64II::MO_G3)
1542        .addImm(48);
1543    BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1544        .addReg(Reg, RegState::Kill)
1545        .addImm(0)
1546        .addMemOperand(*MI.memoperands_begin());
1547  } else if (TM.getCodeModel() == CodeModel::Tiny) {
1548    BuildMI(MBB, MI, DL, get(AArch64::ADR), Reg)
1549        .addGlobalAddress(GV, 0, OpFlags);
1550  } else {
1551    BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
1552        .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
1553    unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC;
1554    if (Subtarget.isTargetILP32()) {
1555      unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
1556      BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
1557          .addDef(Reg32, RegState::Dead)
1558          .addUse(Reg, RegState::Kill)
1559          .addGlobalAddress(GV, 0, LoFlags)
1560          .addMemOperand(*MI.memoperands_begin())
1561          .addDef(Reg, RegState::Implicit);
1562    } else {
1563      BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1564          .addReg(Reg, RegState::Kill)
1565          .addGlobalAddress(GV, 0, LoFlags)
1566          .addMemOperand(*MI.memoperands_begin());
1567    }
1568  }
1569
1570  MBB.erase(MI);
1571
1572  return true;
1573}
1574
1575// Return true if this instruction simply sets its single destination register
1576// to zero. This is equivalent to a register rename of the zero-register.
1577bool AArch64InstrInfo::isGPRZero(const MachineInstr &MI) {
1578  switch (MI.getOpcode()) {
1579  default:
1580    break;
1581  case AArch64::MOVZWi:
1582  case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)
1583    if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) {
1584      assert(MI.getDesc().getNumOperands() == 3 &&
1585             MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands");
1586      return true;
1587    }
1588    break;
1589  case AArch64::ANDWri: // and Rd, Rzr, #imm
1590    return MI.getOperand(1).getReg() == AArch64::WZR;
1591  case AArch64::ANDXri:
1592    return MI.getOperand(1).getReg() == AArch64::XZR;
1593  case TargetOpcode::COPY:
1594    return MI.getOperand(1).getReg() == AArch64::WZR;
1595  }
1596  return false;
1597}
1598
1599// Return true if this instruction simply renames a general register without
1600// modifying bits.
1601bool AArch64InstrInfo::isGPRCopy(const MachineInstr &MI) {
1602  switch (MI.getOpcode()) {
1603  default:
1604    break;
1605  case TargetOpcode::COPY: {
1606    // GPR32 copies will by lowered to ORRXrs
1607    Register DstReg = MI.getOperand(0).getReg();
1608    return (AArch64::GPR32RegClass.contains(DstReg) ||
1609            AArch64::GPR64RegClass.contains(DstReg));
1610  }
1611  case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
1612    if (MI.getOperand(1).getReg() == AArch64::XZR) {
1613      assert(MI.getDesc().getNumOperands() == 4 &&
1614             MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands");
1615      return true;
1616    }
1617    break;
1618  case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
1619    if (MI.getOperand(2).getImm() == 0) {
1620      assert(MI.getDesc().getNumOperands() == 4 &&
1621             MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands");
1622      return true;
1623    }
1624    break;
1625  }
1626  return false;
1627}
1628
1629// Return true if this instruction simply renames a general register without
1630// modifying bits.
1631bool AArch64InstrInfo::isFPRCopy(const MachineInstr &MI) {
1632  switch (MI.getOpcode()) {
1633  default:
1634    break;
1635  case TargetOpcode::COPY: {
1636    // FPR64 copies will by lowered to ORR.16b
1637    Register DstReg = MI.getOperand(0).getReg();
1638    return (AArch64::FPR64RegClass.contains(DstReg) ||
1639            AArch64::FPR128RegClass.contains(DstReg));
1640  }
1641  case AArch64::ORRv16i8:
1642    if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
1643      assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() &&
1644             "invalid ORRv16i8 operands");
1645      return true;
1646    }
1647    break;
1648  }
1649  return false;
1650}
1651
1652unsigned AArch64InstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
1653                                               int &FrameIndex) const {
1654  switch (MI.getOpcode()) {
1655  default:
1656    break;
1657  case AArch64::LDRWui:
1658  case AArch64::LDRXui:
1659  case AArch64::LDRBui:
1660  case AArch64::LDRHui:
1661  case AArch64::LDRSui:
1662  case AArch64::LDRDui:
1663  case AArch64::LDRQui:
1664    if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
1665        MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
1666      FrameIndex = MI.getOperand(1).getIndex();
1667      return MI.getOperand(0).getReg();
1668    }
1669    break;
1670  }
1671
1672  return 0;
1673}
1674
1675unsigned AArch64InstrInfo::isStoreToStackSlot(const MachineInstr &MI,
1676                                              int &FrameIndex) const {
1677  switch (MI.getOpcode()) {
1678  default:
1679    break;
1680  case AArch64::STRWui:
1681  case AArch64::STRXui:
1682  case AArch64::STRBui:
1683  case AArch64::STRHui:
1684  case AArch64::STRSui:
1685  case AArch64::STRDui:
1686  case AArch64::STRQui:
1687    if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
1688        MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
1689      FrameIndex = MI.getOperand(1).getIndex();
1690      return MI.getOperand(0).getReg();
1691    }
1692    break;
1693  }
1694  return 0;
1695}
1696
1697/// Check all MachineMemOperands for a hint to suppress pairing.
1698bool AArch64InstrInfo::isLdStPairSuppressed(const MachineInstr &MI) {
1699  return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
1700    return MMO->getFlags() & MOSuppressPair;
1701  });
1702}
1703
1704/// Set a flag on the first MachineMemOperand to suppress pairing.
1705void AArch64InstrInfo::suppressLdStPair(MachineInstr &MI) {
1706  if (MI.memoperands_empty())
1707    return;
1708  (*MI.memoperands_begin())->setFlags(MOSuppressPair);
1709}
1710
1711/// Check all MachineMemOperands for a hint that the load/store is strided.
1712bool AArch64InstrInfo::isStridedAccess(const MachineInstr &MI) {
1713  return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
1714    return MMO->getFlags() & MOStridedAccess;
1715  });
1716}
1717
1718bool AArch64InstrInfo::isUnscaledLdSt(unsigned Opc) {
1719  switch (Opc) {
1720  default:
1721    return false;
1722  case AArch64::STURSi:
1723  case AArch64::STURDi:
1724  case AArch64::STURQi:
1725  case AArch64::STURBBi:
1726  case AArch64::STURHHi:
1727  case AArch64::STURWi:
1728  case AArch64::STURXi:
1729  case AArch64::LDURSi:
1730  case AArch64::LDURDi:
1731  case AArch64::LDURQi:
1732  case AArch64::LDURWi:
1733  case AArch64::LDURXi:
1734  case AArch64::LDURSWi:
1735  case AArch64::LDURHHi:
1736  case AArch64::LDURBBi:
1737  case AArch64::LDURSBWi:
1738  case AArch64::LDURSHWi:
1739    return true;
1740  }
1741}
1742
1743Optional<unsigned> AArch64InstrInfo::getUnscaledLdSt(unsigned Opc) {
1744  switch (Opc) {
1745  default: return {};
1746  case AArch64::PRFMui: return AArch64::PRFUMi;
1747  case AArch64::LDRXui: return AArch64::LDURXi;
1748  case AArch64::LDRWui: return AArch64::LDURWi;
1749  case AArch64::LDRBui: return AArch64::LDURBi;
1750  case AArch64::LDRHui: return AArch64::LDURHi;
1751  case AArch64::LDRSui: return AArch64::LDURSi;
1752  case AArch64::LDRDui: return AArch64::LDURDi;
1753  case AArch64::LDRQui: return AArch64::LDURQi;
1754  case AArch64::LDRBBui: return AArch64::LDURBBi;
1755  case AArch64::LDRHHui: return AArch64::LDURHHi;
1756  case AArch64::LDRSBXui: return AArch64::LDURSBXi;
1757  case AArch64::LDRSBWui: return AArch64::LDURSBWi;
1758  case AArch64::LDRSHXui: return AArch64::LDURSHXi;
1759  case AArch64::LDRSHWui: return AArch64::LDURSHWi;
1760  case AArch64::LDRSWui: return AArch64::LDURSWi;
1761  case AArch64::STRXui: return AArch64::STURXi;
1762  case AArch64::STRWui: return AArch64::STURWi;
1763  case AArch64::STRBui: return AArch64::STURBi;
1764  case AArch64::STRHui: return AArch64::STURHi;
1765  case AArch64::STRSui: return AArch64::STURSi;
1766  case AArch64::STRDui: return AArch64::STURDi;
1767  case AArch64::STRQui: return AArch64::STURQi;
1768  case AArch64::STRBBui: return AArch64::STURBBi;
1769  case AArch64::STRHHui: return AArch64::STURHHi;
1770  }
1771}
1772
1773unsigned AArch64InstrInfo::getLoadStoreImmIdx(unsigned Opc) {
1774  switch (Opc) {
1775  default:
1776    return 2;
1777  case AArch64::LDPXi:
1778  case AArch64::LDPDi:
1779  case AArch64::STPXi:
1780  case AArch64::STPDi:
1781  case AArch64::LDNPXi:
1782  case AArch64::LDNPDi:
1783  case AArch64::STNPXi:
1784  case AArch64::STNPDi:
1785  case AArch64::LDPQi:
1786  case AArch64::STPQi:
1787  case AArch64::LDNPQi:
1788  case AArch64::STNPQi:
1789  case AArch64::LDPWi:
1790  case AArch64::LDPSi:
1791  case AArch64::STPWi:
1792  case AArch64::STPSi:
1793  case AArch64::LDNPWi:
1794  case AArch64::LDNPSi:
1795  case AArch64::STNPWi:
1796  case AArch64::STNPSi:
1797  case AArch64::LDG:
1798  case AArch64::STGPi:
1799    return 3;
1800  case AArch64::ADDG:
1801  case AArch64::STGOffset:
1802    return 2;
1803  }
1804}
1805
1806bool AArch64InstrInfo::isPairableLdStInst(const MachineInstr &MI) {
1807  switch (MI.getOpcode()) {
1808  default:
1809    return false;
1810  // Scaled instructions.
1811  case AArch64::STRSui:
1812  case AArch64::STRDui:
1813  case AArch64::STRQui:
1814  case AArch64::STRXui:
1815  case AArch64::STRWui:
1816  case AArch64::LDRSui:
1817  case AArch64::LDRDui:
1818  case AArch64::LDRQui:
1819  case AArch64::LDRXui:
1820  case AArch64::LDRWui:
1821  case AArch64::LDRSWui:
1822  // Unscaled instructions.
1823  case AArch64::STURSi:
1824  case AArch64::STURDi:
1825  case AArch64::STURQi:
1826  case AArch64::STURWi:
1827  case AArch64::STURXi:
1828  case AArch64::LDURSi:
1829  case AArch64::LDURDi:
1830  case AArch64::LDURQi:
1831  case AArch64::LDURWi:
1832  case AArch64::LDURXi:
1833  case AArch64::LDURSWi:
1834    return true;
1835  }
1836}
1837
1838unsigned AArch64InstrInfo::convertToFlagSettingOpc(unsigned Opc,
1839                                                   bool &Is64Bit) {
1840  switch (Opc) {
1841  default:
1842    llvm_unreachable("Opcode has no flag setting equivalent!");
1843  // 32-bit cases:
1844  case AArch64::ADDWri:
1845    Is64Bit = false;
1846    return AArch64::ADDSWri;
1847  case AArch64::ADDWrr:
1848    Is64Bit = false;
1849    return AArch64::ADDSWrr;
1850  case AArch64::ADDWrs:
1851    Is64Bit = false;
1852    return AArch64::ADDSWrs;
1853  case AArch64::ADDWrx:
1854    Is64Bit = false;
1855    return AArch64::ADDSWrx;
1856  case AArch64::ANDWri:
1857    Is64Bit = false;
1858    return AArch64::ANDSWri;
1859  case AArch64::ANDWrr:
1860    Is64Bit = false;
1861    return AArch64::ANDSWrr;
1862  case AArch64::ANDWrs:
1863    Is64Bit = false;
1864    return AArch64::ANDSWrs;
1865  case AArch64::BICWrr:
1866    Is64Bit = false;
1867    return AArch64::BICSWrr;
1868  case AArch64::BICWrs:
1869    Is64Bit = false;
1870    return AArch64::BICSWrs;
1871  case AArch64::SUBWri:
1872    Is64Bit = false;
1873    return AArch64::SUBSWri;
1874  case AArch64::SUBWrr:
1875    Is64Bit = false;
1876    return AArch64::SUBSWrr;
1877  case AArch64::SUBWrs:
1878    Is64Bit = false;
1879    return AArch64::SUBSWrs;
1880  case AArch64::SUBWrx:
1881    Is64Bit = false;
1882    return AArch64::SUBSWrx;
1883  // 64-bit cases:
1884  case AArch64::ADDXri:
1885    Is64Bit = true;
1886    return AArch64::ADDSXri;
1887  case AArch64::ADDXrr:
1888    Is64Bit = true;
1889    return AArch64::ADDSXrr;
1890  case AArch64::ADDXrs:
1891    Is64Bit = true;
1892    return AArch64::ADDSXrs;
1893  case AArch64::ADDXrx:
1894    Is64Bit = true;
1895    return AArch64::ADDSXrx;
1896  case AArch64::ANDXri:
1897    Is64Bit = true;
1898    return AArch64::ANDSXri;
1899  case AArch64::ANDXrr:
1900    Is64Bit = true;
1901    return AArch64::ANDSXrr;
1902  case AArch64::ANDXrs:
1903    Is64Bit = true;
1904    return AArch64::ANDSXrs;
1905  case AArch64::BICXrr:
1906    Is64Bit = true;
1907    return AArch64::BICSXrr;
1908  case AArch64::BICXrs:
1909    Is64Bit = true;
1910    return AArch64::BICSXrs;
1911  case AArch64::SUBXri:
1912    Is64Bit = true;
1913    return AArch64::SUBSXri;
1914  case AArch64::SUBXrr:
1915    Is64Bit = true;
1916    return AArch64::SUBSXrr;
1917  case AArch64::SUBXrs:
1918    Is64Bit = true;
1919    return AArch64::SUBSXrs;
1920  case AArch64::SUBXrx:
1921    Is64Bit = true;
1922    return AArch64::SUBSXrx;
1923  }
1924}
1925
1926// Is this a candidate for ld/st merging or pairing?  For example, we don't
1927// touch volatiles or load/stores that have a hint to avoid pair formation.
1928bool AArch64InstrInfo::isCandidateToMergeOrPair(const MachineInstr &MI) const {
1929  // If this is a volatile load/store, don't mess with it.
1930  if (MI.hasOrderedMemoryRef())
1931    return false;
1932
1933  // Make sure this is a reg/fi+imm (as opposed to an address reloc).
1934  assert((MI.getOperand(1).isReg() || MI.getOperand(1).isFI()) &&
1935         "Expected a reg or frame index operand.");
1936  if (!MI.getOperand(2).isImm())
1937    return false;
1938
1939  // Can't merge/pair if the instruction modifies the base register.
1940  // e.g., ldr x0, [x0]
1941  // This case will never occur with an FI base.
1942  if (MI.getOperand(1).isReg()) {
1943    Register BaseReg = MI.getOperand(1).getReg();
1944    const TargetRegisterInfo *TRI = &getRegisterInfo();
1945    if (MI.modifiesRegister(BaseReg, TRI))
1946      return false;
1947  }
1948
1949  // Check if this load/store has a hint to avoid pair formation.
1950  // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
1951  if (isLdStPairSuppressed(MI))
1952    return false;
1953
1954  // Do not pair any callee-save store/reload instructions in the
1955  // prologue/epilogue if the CFI information encoded the operations as separate
1956  // instructions, as that will cause the size of the actual prologue to mismatch
1957  // with the prologue size recorded in the Windows CFI.
1958  const MCAsmInfo *MAI = MI.getMF()->getTarget().getMCAsmInfo();
1959  bool NeedsWinCFI = MAI->usesWindowsCFI() &&
1960                     MI.getMF()->getFunction().needsUnwindTableEntry();
1961  if (NeedsWinCFI && (MI.getFlag(MachineInstr::FrameSetup) ||
1962                      MI.getFlag(MachineInstr::FrameDestroy)))
1963    return false;
1964
1965  // On some CPUs quad load/store pairs are slower than two single load/stores.
1966  if (Subtarget.isPaired128Slow()) {
1967    switch (MI.getOpcode()) {
1968    default:
1969      break;
1970    case AArch64::LDURQi:
1971    case AArch64::STURQi:
1972    case AArch64::LDRQui:
1973    case AArch64::STRQui:
1974      return false;
1975    }
1976  }
1977
1978  return true;
1979}
1980
1981bool AArch64InstrInfo::getMemOperandWithOffset(const MachineInstr &LdSt,
1982                                          const MachineOperand *&BaseOp,
1983                                          int64_t &Offset,
1984                                          const TargetRegisterInfo *TRI) const {
1985  if (!LdSt.mayLoadOrStore())
1986    return false;
1987
1988  unsigned Width;
1989  return getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI);
1990}
1991
1992bool AArch64InstrInfo::getMemOperandWithOffsetWidth(
1993    const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset,
1994    unsigned &Width, const TargetRegisterInfo *TRI) const {
1995  assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
1996  // Handle only loads/stores with base register followed by immediate offset.
1997  if (LdSt.getNumExplicitOperands() == 3) {
1998    // Non-paired instruction (e.g., ldr x1, [x0, #8]).
1999    if ((!LdSt.getOperand(1).isReg() && !LdSt.getOperand(1).isFI()) ||
2000        !LdSt.getOperand(2).isImm())
2001      return false;
2002  } else if (LdSt.getNumExplicitOperands() == 4) {
2003    // Paired instruction (e.g., ldp x1, x2, [x0, #8]).
2004    if (!LdSt.getOperand(1).isReg() ||
2005        (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()) ||
2006        !LdSt.getOperand(3).isImm())
2007      return false;
2008  } else
2009    return false;
2010
2011  // Get the scaling factor for the instruction and set the width for the
2012  // instruction.
2013  unsigned Scale = 0;
2014  int64_t Dummy1, Dummy2;
2015
2016  // If this returns false, then it's an instruction we don't want to handle.
2017  if (!getMemOpInfo(LdSt.getOpcode(), Scale, Width, Dummy1, Dummy2))
2018    return false;
2019
2020  // Compute the offset. Offset is calculated as the immediate operand
2021  // multiplied by the scaling factor. Unscaled instructions have scaling factor
2022  // set to 1.
2023  if (LdSt.getNumExplicitOperands() == 3) {
2024    BaseOp = &LdSt.getOperand(1);
2025    Offset = LdSt.getOperand(2).getImm() * Scale;
2026  } else {
2027    assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");
2028    BaseOp = &LdSt.getOperand(2);
2029    Offset = LdSt.getOperand(3).getImm() * Scale;
2030  }
2031
2032  if (!BaseOp->isReg() && !BaseOp->isFI())
2033    return false;
2034
2035  return true;
2036}
2037
2038MachineOperand &
2039AArch64InstrInfo::getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const {
2040  assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
2041  MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands() - 1);
2042  assert(OfsOp.isImm() && "Offset operand wasn't immediate.");
2043  return OfsOp;
2044}
2045
2046bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,
2047                                    unsigned &Width, int64_t &MinOffset,
2048                                    int64_t &MaxOffset) {
2049  switch (Opcode) {
2050  // Not a memory operation or something we want to handle.
2051  default:
2052    Scale = Width = 0;
2053    MinOffset = MaxOffset = 0;
2054    return false;
2055  case AArch64::STRWpost:
2056  case AArch64::LDRWpost:
2057    Width = 32;
2058    Scale = 4;
2059    MinOffset = -256;
2060    MaxOffset = 255;
2061    break;
2062  case AArch64::LDURQi:
2063  case AArch64::STURQi:
2064    Width = 16;
2065    Scale = 1;
2066    MinOffset = -256;
2067    MaxOffset = 255;
2068    break;
2069  case AArch64::PRFUMi:
2070  case AArch64::LDURXi:
2071  case AArch64::LDURDi:
2072  case AArch64::STURXi:
2073  case AArch64::STURDi:
2074    Width = 8;
2075    Scale = 1;
2076    MinOffset = -256;
2077    MaxOffset = 255;
2078    break;
2079  case AArch64::LDURWi:
2080  case AArch64::LDURSi:
2081  case AArch64::LDURSWi:
2082  case AArch64::STURWi:
2083  case AArch64::STURSi:
2084    Width = 4;
2085    Scale = 1;
2086    MinOffset = -256;
2087    MaxOffset = 255;
2088    break;
2089  case AArch64::LDURHi:
2090  case AArch64::LDURHHi:
2091  case AArch64::LDURSHXi:
2092  case AArch64::LDURSHWi:
2093  case AArch64::STURHi:
2094  case AArch64::STURHHi:
2095    Width = 2;
2096    Scale = 1;
2097    MinOffset = -256;
2098    MaxOffset = 255;
2099    break;
2100  case AArch64::LDURBi:
2101  case AArch64::LDURBBi:
2102  case AArch64::LDURSBXi:
2103  case AArch64::LDURSBWi:
2104  case AArch64::STURBi:
2105  case AArch64::STURBBi:
2106    Width = 1;
2107    Scale = 1;
2108    MinOffset = -256;
2109    MaxOffset = 255;
2110    break;
2111  case AArch64::LDPQi:
2112  case AArch64::LDNPQi:
2113  case AArch64::STPQi:
2114  case AArch64::STNPQi:
2115    Scale = 16;
2116    Width = 32;
2117    MinOffset = -64;
2118    MaxOffset = 63;
2119    break;
2120  case AArch64::LDRQui:
2121  case AArch64::STRQui:
2122    Scale = Width = 16;
2123    MinOffset = 0;
2124    MaxOffset = 4095;
2125    break;
2126  case AArch64::LDPXi:
2127  case AArch64::LDPDi:
2128  case AArch64::LDNPXi:
2129  case AArch64::LDNPDi:
2130  case AArch64::STPXi:
2131  case AArch64::STPDi:
2132  case AArch64::STNPXi:
2133  case AArch64::STNPDi:
2134    Scale = 8;
2135    Width = 16;
2136    MinOffset = -64;
2137    MaxOffset = 63;
2138    break;
2139  case AArch64::PRFMui:
2140  case AArch64::LDRXui:
2141  case AArch64::LDRDui:
2142  case AArch64::STRXui:
2143  case AArch64::STRDui:
2144    Scale = Width = 8;
2145    MinOffset = 0;
2146    MaxOffset = 4095;
2147    break;
2148  case AArch64::LDPWi:
2149  case AArch64::LDPSi:
2150  case AArch64::LDNPWi:
2151  case AArch64::LDNPSi:
2152  case AArch64::STPWi:
2153  case AArch64::STPSi:
2154  case AArch64::STNPWi:
2155  case AArch64::STNPSi:
2156    Scale = 4;
2157    Width = 8;
2158    MinOffset = -64;
2159    MaxOffset = 63;
2160    break;
2161  case AArch64::LDRWui:
2162  case AArch64::LDRSui:
2163  case AArch64::LDRSWui:
2164  case AArch64::STRWui:
2165  case AArch64::STRSui:
2166    Scale = Width = 4;
2167    MinOffset = 0;
2168    MaxOffset = 4095;
2169    break;
2170  case AArch64::LDRHui:
2171  case AArch64::LDRHHui:
2172  case AArch64::LDRSHWui:
2173  case AArch64::LDRSHXui:
2174  case AArch64::STRHui:
2175  case AArch64::STRHHui:
2176    Scale = Width = 2;
2177    MinOffset = 0;
2178    MaxOffset = 4095;
2179    break;
2180  case AArch64::LDRBui:
2181  case AArch64::LDRBBui:
2182  case AArch64::LDRSBWui:
2183  case AArch64::LDRSBXui:
2184  case AArch64::STRBui:
2185  case AArch64::STRBBui:
2186    Scale = Width = 1;
2187    MinOffset = 0;
2188    MaxOffset = 4095;
2189    break;
2190  case AArch64::ADDG:
2191    Scale = 16;
2192    Width = 0;
2193    MinOffset = 0;
2194    MaxOffset = 63;
2195    break;
2196  case AArch64::TAGPstack:
2197    Scale = 16;
2198    Width = 0;
2199    // TAGP with a negative offset turns into SUBP, which has a maximum offset
2200    // of 63 (not 64!).
2201    MinOffset = -63;
2202    MaxOffset = 63;
2203    break;
2204  case AArch64::LDG:
2205  case AArch64::STGOffset:
2206  case AArch64::STZGOffset:
2207    Scale = Width = 16;
2208    MinOffset = -256;
2209    MaxOffset = 255;
2210    break;
2211  case AArch64::LDR_PXI:
2212  case AArch64::STR_PXI:
2213    Scale = Width = 2;
2214    MinOffset = -256;
2215    MaxOffset = 255;
2216    break;
2217  case AArch64::LDR_ZXI:
2218  case AArch64::STR_ZXI:
2219    Scale = Width = 16;
2220    MinOffset = -256;
2221    MaxOffset = 255;
2222    break;
2223  case AArch64::ST2GOffset:
2224  case AArch64::STZ2GOffset:
2225    Scale = 16;
2226    Width = 32;
2227    MinOffset = -256;
2228    MaxOffset = 255;
2229    break;
2230  case AArch64::STGPi:
2231    Scale = Width = 16;
2232    MinOffset = -64;
2233    MaxOffset = 63;
2234    break;
2235  }
2236
2237  return true;
2238}
2239
2240// Scaling factor for unscaled load or store.
2241int AArch64InstrInfo::getMemScale(unsigned Opc) {
2242  switch (Opc) {
2243  default:
2244    llvm_unreachable("Opcode has unknown scale!");
2245  case AArch64::LDRBBui:
2246  case AArch64::LDURBBi:
2247  case AArch64::LDRSBWui:
2248  case AArch64::LDURSBWi:
2249  case AArch64::STRBBui:
2250  case AArch64::STURBBi:
2251    return 1;
2252  case AArch64::LDRHHui:
2253  case AArch64::LDURHHi:
2254  case AArch64::LDRSHWui:
2255  case AArch64::LDURSHWi:
2256  case AArch64::STRHHui:
2257  case AArch64::STURHHi:
2258    return 2;
2259  case AArch64::LDRSui:
2260  case AArch64::LDURSi:
2261  case AArch64::LDRSWui:
2262  case AArch64::LDURSWi:
2263  case AArch64::LDRWui:
2264  case AArch64::LDURWi:
2265  case AArch64::STRSui:
2266  case AArch64::STURSi:
2267  case AArch64::STRWui:
2268  case AArch64::STURWi:
2269  case AArch64::LDPSi:
2270  case AArch64::LDPSWi:
2271  case AArch64::LDPWi:
2272  case AArch64::STPSi:
2273  case AArch64::STPWi:
2274    return 4;
2275  case AArch64::LDRDui:
2276  case AArch64::LDURDi:
2277  case AArch64::LDRXui:
2278  case AArch64::LDURXi:
2279  case AArch64::STRDui:
2280  case AArch64::STURDi:
2281  case AArch64::STRXui:
2282  case AArch64::STURXi:
2283  case AArch64::LDPDi:
2284  case AArch64::LDPXi:
2285  case AArch64::STPDi:
2286  case AArch64::STPXi:
2287    return 8;
2288  case AArch64::LDRQui:
2289  case AArch64::LDURQi:
2290  case AArch64::STRQui:
2291  case AArch64::STURQi:
2292  case AArch64::LDPQi:
2293  case AArch64::STPQi:
2294  case AArch64::STGOffset:
2295  case AArch64::STZGOffset:
2296  case AArch64::ST2GOffset:
2297  case AArch64::STZ2GOffset:
2298  case AArch64::STGPi:
2299    return 16;
2300  }
2301}
2302
2303// Scale the unscaled offsets.  Returns false if the unscaled offset can't be
2304// scaled.
2305static bool scaleOffset(unsigned Opc, int64_t &Offset) {
2306  int Scale = AArch64InstrInfo::getMemScale(Opc);
2307
2308  // If the byte-offset isn't a multiple of the stride, we can't scale this
2309  // offset.
2310  if (Offset % Scale != 0)
2311    return false;
2312
2313  // Convert the byte-offset used by unscaled into an "element" offset used
2314  // by the scaled pair load/store instructions.
2315  Offset /= Scale;
2316  return true;
2317}
2318
2319static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
2320  if (FirstOpc == SecondOpc)
2321    return true;
2322  // We can also pair sign-ext and zero-ext instructions.
2323  switch (FirstOpc) {
2324  default:
2325    return false;
2326  case AArch64::LDRWui:
2327  case AArch64::LDURWi:
2328    return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi;
2329  case AArch64::LDRSWui:
2330  case AArch64::LDURSWi:
2331    return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi;
2332  }
2333  // These instructions can't be paired based on their opcodes.
2334  return false;
2335}
2336
2337static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1,
2338                            int64_t Offset1, unsigned Opcode1, int FI2,
2339                            int64_t Offset2, unsigned Opcode2) {
2340  // Accesses through fixed stack object frame indices may access a different
2341  // fixed stack slot. Check that the object offsets + offsets match.
2342  if (MFI.isFixedObjectIndex(FI1) && MFI.isFixedObjectIndex(FI2)) {
2343    int64_t ObjectOffset1 = MFI.getObjectOffset(FI1);
2344    int64_t ObjectOffset2 = MFI.getObjectOffset(FI2);
2345    assert(ObjectOffset1 <= ObjectOffset2 && "Object offsets are not ordered.");
2346    // Convert to scaled object offsets.
2347    int Scale1 = AArch64InstrInfo::getMemScale(Opcode1);
2348    if (ObjectOffset1 % Scale1 != 0)
2349      return false;
2350    ObjectOffset1 /= Scale1;
2351    int Scale2 = AArch64InstrInfo::getMemScale(Opcode2);
2352    if (ObjectOffset2 % Scale2 != 0)
2353      return false;
2354    ObjectOffset2 /= Scale2;
2355    ObjectOffset1 += Offset1;
2356    ObjectOffset2 += Offset2;
2357    return ObjectOffset1 + 1 == ObjectOffset2;
2358  }
2359
2360  return FI1 == FI2;
2361}
2362
2363/// Detect opportunities for ldp/stp formation.
2364///
2365/// Only called for LdSt for which getMemOperandWithOffset returns true.
2366bool AArch64InstrInfo::shouldClusterMemOps(const MachineOperand &BaseOp1,
2367                                           const MachineOperand &BaseOp2,
2368                                           unsigned NumLoads) const {
2369  const MachineInstr &FirstLdSt = *BaseOp1.getParent();
2370  const MachineInstr &SecondLdSt = *BaseOp2.getParent();
2371  if (BaseOp1.getType() != BaseOp2.getType())
2372    return false;
2373
2374  assert((BaseOp1.isReg() || BaseOp1.isFI()) &&
2375         "Only base registers and frame indices are supported.");
2376
2377  // Check for both base regs and base FI.
2378  if (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg())
2379    return false;
2380
2381  // Only cluster up to a single pair.
2382  if (NumLoads > 1)
2383    return false;
2384
2385  if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt))
2386    return false;
2387
2388  // Can we pair these instructions based on their opcodes?
2389  unsigned FirstOpc = FirstLdSt.getOpcode();
2390  unsigned SecondOpc = SecondLdSt.getOpcode();
2391  if (!canPairLdStOpc(FirstOpc, SecondOpc))
2392    return false;
2393
2394  // Can't merge volatiles or load/stores that have a hint to avoid pair
2395  // formation, for example.
2396  if (!isCandidateToMergeOrPair(FirstLdSt) ||
2397      !isCandidateToMergeOrPair(SecondLdSt))
2398    return false;
2399
2400  // isCandidateToMergeOrPair guarantees that operand 2 is an immediate.
2401  int64_t Offset1 = FirstLdSt.getOperand(2).getImm();
2402  if (isUnscaledLdSt(FirstOpc) && !scaleOffset(FirstOpc, Offset1))
2403    return false;
2404
2405  int64_t Offset2 = SecondLdSt.getOperand(2).getImm();
2406  if (isUnscaledLdSt(SecondOpc) && !scaleOffset(SecondOpc, Offset2))
2407    return false;
2408
2409  // Pairwise instructions have a 7-bit signed offset field.
2410  if (Offset1 > 63 || Offset1 < -64)
2411    return false;
2412
2413  // The caller should already have ordered First/SecondLdSt by offset.
2414  // Note: except for non-equal frame index bases
2415  if (BaseOp1.isFI()) {
2416    assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 <= Offset2) &&
2417           "Caller should have ordered offsets.");
2418
2419    const MachineFrameInfo &MFI =
2420        FirstLdSt.getParent()->getParent()->getFrameInfo();
2421    return shouldClusterFI(MFI, BaseOp1.getIndex(), Offset1, FirstOpc,
2422                           BaseOp2.getIndex(), Offset2, SecondOpc);
2423  }
2424
2425  assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
2426
2427  return Offset1 + 1 == Offset2;
2428}
2429
2430static const MachineInstrBuilder &AddSubReg(const MachineInstrBuilder &MIB,
2431                                            unsigned Reg, unsigned SubIdx,
2432                                            unsigned State,
2433                                            const TargetRegisterInfo *TRI) {
2434  if (!SubIdx)
2435    return MIB.addReg(Reg, State);
2436
2437  if (Register::isPhysicalRegister(Reg))
2438    return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
2439  return MIB.addReg(Reg, State, SubIdx);
2440}
2441
2442static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
2443                                        unsigned NumRegs) {
2444  // We really want the positive remainder mod 32 here, that happens to be
2445  // easily obtainable with a mask.
2446  return ((DestReg - SrcReg) & 0x1f) < NumRegs;
2447}
2448
2449void AArch64InstrInfo::copyPhysRegTuple(MachineBasicBlock &MBB,
2450                                        MachineBasicBlock::iterator I,
2451                                        const DebugLoc &DL, MCRegister DestReg,
2452                                        MCRegister SrcReg, bool KillSrc,
2453                                        unsigned Opcode,
2454                                        ArrayRef<unsigned> Indices) const {
2455  assert(Subtarget.hasNEON() && "Unexpected register copy without NEON");
2456  const TargetRegisterInfo *TRI = &getRegisterInfo();
2457  uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
2458  uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
2459  unsigned NumRegs = Indices.size();
2460
2461  int SubReg = 0, End = NumRegs, Incr = 1;
2462  if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) {
2463    SubReg = NumRegs - 1;
2464    End = -1;
2465    Incr = -1;
2466  }
2467
2468  for (; SubReg != End; SubReg += Incr) {
2469    const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
2470    AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
2471    AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);
2472    AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
2473  }
2474}
2475
2476void AArch64InstrInfo::copyGPRRegTuple(MachineBasicBlock &MBB,
2477                                       MachineBasicBlock::iterator I,
2478                                       DebugLoc DL, unsigned DestReg,
2479                                       unsigned SrcReg, bool KillSrc,
2480                                       unsigned Opcode, unsigned ZeroReg,
2481                                       llvm::ArrayRef<unsigned> Indices) const {
2482  const TargetRegisterInfo *TRI = &getRegisterInfo();
2483  unsigned NumRegs = Indices.size();
2484
2485#ifndef NDEBUG
2486  uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
2487  uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
2488  assert(DestEncoding % NumRegs == 0 && SrcEncoding % NumRegs == 0 &&
2489         "GPR reg sequences should not be able to overlap");
2490#endif
2491
2492  for (unsigned SubReg = 0; SubReg != NumRegs; ++SubReg) {
2493    const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
2494    AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
2495    MIB.addReg(ZeroReg);
2496    AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
2497    MIB.addImm(0);
2498  }
2499}
2500
2501void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
2502                                   MachineBasicBlock::iterator I,
2503                                   const DebugLoc &DL, MCRegister DestReg,
2504                                   MCRegister SrcReg, bool KillSrc) const {
2505  if (AArch64::GPR32spRegClass.contains(DestReg) &&
2506      (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) {
2507    const TargetRegisterInfo *TRI = &getRegisterInfo();
2508
2509    if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
2510      // If either operand is WSP, expand to ADD #0.
2511      if (Subtarget.hasZeroCycleRegMove()) {
2512        // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
2513        MCRegister DestRegX = TRI->getMatchingSuperReg(
2514            DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
2515        MCRegister SrcRegX = TRI->getMatchingSuperReg(
2516            SrcReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
2517        // This instruction is reading and writing X registers.  This may upset
2518        // the register scavenger and machine verifier, so we need to indicate
2519        // that we are reading an undefined value from SrcRegX, but a proper
2520        // value from SrcReg.
2521        BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX)
2522            .addReg(SrcRegX, RegState::Undef)
2523            .addImm(0)
2524            .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
2525            .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
2526      } else {
2527        BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg)
2528            .addReg(SrcReg, getKillRegState(KillSrc))
2529            .addImm(0)
2530            .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
2531      }
2532    } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroingGP()) {
2533      BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg)
2534          .addImm(0)
2535          .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
2536    } else {
2537      if (Subtarget.hasZeroCycleRegMove()) {
2538        // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
2539        MCRegister DestRegX = TRI->getMatchingSuperReg(
2540            DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
2541        MCRegister SrcRegX = TRI->getMatchingSuperReg(
2542            SrcReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
2543        // This instruction is reading and writing X registers.  This may upset
2544        // the register scavenger and machine verifier, so we need to indicate
2545        // that we are reading an undefined value from SrcRegX, but a proper
2546        // value from SrcReg.
2547        BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX)
2548            .addReg(AArch64::XZR)
2549            .addReg(SrcRegX, RegState::Undef)
2550            .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
2551      } else {
2552        // Otherwise, expand to ORR WZR.
2553        BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
2554            .addReg(AArch64::WZR)
2555            .addReg(SrcReg, getKillRegState(KillSrc));
2556      }
2557    }
2558    return;
2559  }
2560
2561  // Copy a Predicate register by ORRing with itself.
2562  if (AArch64::PPRRegClass.contains(DestReg) &&
2563      AArch64::PPRRegClass.contains(SrcReg)) {
2564    assert(Subtarget.hasSVE() && "Unexpected SVE register.");
2565    BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), DestReg)
2566      .addReg(SrcReg) // Pg
2567      .addReg(SrcReg)
2568      .addReg(SrcReg, getKillRegState(KillSrc));
2569    return;
2570  }
2571
2572  // Copy a Z register by ORRing with itself.
2573  if (AArch64::ZPRRegClass.contains(DestReg) &&
2574      AArch64::ZPRRegClass.contains(SrcReg)) {
2575    assert(Subtarget.hasSVE() && "Unexpected SVE register.");
2576    BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ), DestReg)
2577      .addReg(SrcReg)
2578      .addReg(SrcReg, getKillRegState(KillSrc));
2579    return;
2580  }
2581
2582  if (AArch64::GPR64spRegClass.contains(DestReg) &&
2583      (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) {
2584    if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
2585      // If either operand is SP, expand to ADD #0.
2586      BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg)
2587          .addReg(SrcReg, getKillRegState(KillSrc))
2588          .addImm(0)
2589          .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
2590    } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroingGP()) {
2591      BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg)
2592          .addImm(0)
2593          .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
2594    } else {
2595      // Otherwise, expand to ORR XZR.
2596      BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
2597          .addReg(AArch64::XZR)
2598          .addReg(SrcReg, getKillRegState(KillSrc));
2599    }
2600    return;
2601  }
2602
2603  // Copy a DDDD register quad by copying the individual sub-registers.
2604  if (AArch64::DDDDRegClass.contains(DestReg) &&
2605      AArch64::DDDDRegClass.contains(SrcReg)) {
2606    static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
2607                                       AArch64::dsub2, AArch64::dsub3};
2608    copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
2609                     Indices);
2610    return;
2611  }
2612
2613  // Copy a DDD register triple by copying the individual sub-registers.
2614  if (AArch64::DDDRegClass.contains(DestReg) &&
2615      AArch64::DDDRegClass.contains(SrcReg)) {
2616    static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
2617                                       AArch64::dsub2};
2618    copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
2619                     Indices);
2620    return;
2621  }
2622
2623  // Copy a DD register pair by copying the individual sub-registers.
2624  if (AArch64::DDRegClass.contains(DestReg) &&
2625      AArch64::DDRegClass.contains(SrcReg)) {
2626    static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1};
2627    copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
2628                     Indices);
2629    return;
2630  }
2631
2632  // Copy a QQQQ register quad by copying the individual sub-registers.
2633  if (AArch64::QQQQRegClass.contains(DestReg) &&
2634      AArch64::QQQQRegClass.contains(SrcReg)) {
2635    static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
2636                                       AArch64::qsub2, AArch64::qsub3};
2637    copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2638                     Indices);
2639    return;
2640  }
2641
2642  // Copy a QQQ register triple by copying the individual sub-registers.
2643  if (AArch64::QQQRegClass.contains(DestReg) &&
2644      AArch64::QQQRegClass.contains(SrcReg)) {
2645    static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
2646                                       AArch64::qsub2};
2647    copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2648                     Indices);
2649    return;
2650  }
2651
2652  // Copy a QQ register pair by copying the individual sub-registers.
2653  if (AArch64::QQRegClass.contains(DestReg) &&
2654      AArch64::QQRegClass.contains(SrcReg)) {
2655    static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1};
2656    copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2657                     Indices);
2658    return;
2659  }
2660
2661  if (AArch64::XSeqPairsClassRegClass.contains(DestReg) &&
2662      AArch64::XSeqPairsClassRegClass.contains(SrcReg)) {
2663    static const unsigned Indices[] = {AArch64::sube64, AArch64::subo64};
2664    copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRXrs,
2665                    AArch64::XZR, Indices);
2666    return;
2667  }
2668
2669  if (AArch64::WSeqPairsClassRegClass.contains(DestReg) &&
2670      AArch64::WSeqPairsClassRegClass.contains(SrcReg)) {
2671    static const unsigned Indices[] = {AArch64::sube32, AArch64::subo32};
2672    copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRWrs,
2673                    AArch64::WZR, Indices);
2674    return;
2675  }
2676
2677  if (AArch64::FPR128RegClass.contains(DestReg) &&
2678      AArch64::FPR128RegClass.contains(SrcReg)) {
2679    if (Subtarget.hasNEON()) {
2680      BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2681          .addReg(SrcReg)
2682          .addReg(SrcReg, getKillRegState(KillSrc));
2683    } else {
2684      BuildMI(MBB, I, DL, get(AArch64::STRQpre))
2685          .addReg(AArch64::SP, RegState::Define)
2686          .addReg(SrcReg, getKillRegState(KillSrc))
2687          .addReg(AArch64::SP)
2688          .addImm(-16);
2689      BuildMI(MBB, I, DL, get(AArch64::LDRQpre))
2690          .addReg(AArch64::SP, RegState::Define)
2691          .addReg(DestReg, RegState::Define)
2692          .addReg(AArch64::SP)
2693          .addImm(16);
2694    }
2695    return;
2696  }
2697
2698  if (AArch64::FPR64RegClass.contains(DestReg) &&
2699      AArch64::FPR64RegClass.contains(SrcReg)) {
2700    if (Subtarget.hasNEON()) {
2701      DestReg = RI.getMatchingSuperReg(DestReg, AArch64::dsub,
2702                                       &AArch64::FPR128RegClass);
2703      SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::dsub,
2704                                      &AArch64::FPR128RegClass);
2705      BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2706          .addReg(SrcReg)
2707          .addReg(SrcReg, getKillRegState(KillSrc));
2708    } else {
2709      BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
2710          .addReg(SrcReg, getKillRegState(KillSrc));
2711    }
2712    return;
2713  }
2714
2715  if (AArch64::FPR32RegClass.contains(DestReg) &&
2716      AArch64::FPR32RegClass.contains(SrcReg)) {
2717    if (Subtarget.hasNEON()) {
2718      DestReg = RI.getMatchingSuperReg(DestReg, AArch64::ssub,
2719                                       &AArch64::FPR128RegClass);
2720      SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::ssub,
2721                                      &AArch64::FPR128RegClass);
2722      BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2723          .addReg(SrcReg)
2724          .addReg(SrcReg, getKillRegState(KillSrc));
2725    } else {
2726      BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2727          .addReg(SrcReg, getKillRegState(KillSrc));
2728    }
2729    return;
2730  }
2731
2732  if (AArch64::FPR16RegClass.contains(DestReg) &&
2733      AArch64::FPR16RegClass.contains(SrcReg)) {
2734    if (Subtarget.hasNEON()) {
2735      DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
2736                                       &AArch64::FPR128RegClass);
2737      SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
2738                                      &AArch64::FPR128RegClass);
2739      BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2740          .addReg(SrcReg)
2741          .addReg(SrcReg, getKillRegState(KillSrc));
2742    } else {
2743      DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
2744                                       &AArch64::FPR32RegClass);
2745      SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
2746                                      &AArch64::FPR32RegClass);
2747      BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2748          .addReg(SrcReg, getKillRegState(KillSrc));
2749    }
2750    return;
2751  }
2752
2753  if (AArch64::FPR8RegClass.contains(DestReg) &&
2754      AArch64::FPR8RegClass.contains(SrcReg)) {
2755    if (Subtarget.hasNEON()) {
2756      DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
2757                                       &AArch64::FPR128RegClass);
2758      SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
2759                                      &AArch64::FPR128RegClass);
2760      BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2761          .addReg(SrcReg)
2762          .addReg(SrcReg, getKillRegState(KillSrc));
2763    } else {
2764      DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
2765                                       &AArch64::FPR32RegClass);
2766      SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
2767                                      &AArch64::FPR32RegClass);
2768      BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2769          .addReg(SrcReg, getKillRegState(KillSrc));
2770    }
2771    return;
2772  }
2773
2774  // Copies between GPR64 and FPR64.
2775  if (AArch64::FPR64RegClass.contains(DestReg) &&
2776      AArch64::GPR64RegClass.contains(SrcReg)) {
2777    BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg)
2778        .addReg(SrcReg, getKillRegState(KillSrc));
2779    return;
2780  }
2781  if (AArch64::GPR64RegClass.contains(DestReg) &&
2782      AArch64::FPR64RegClass.contains(SrcReg)) {
2783    BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg)
2784        .addReg(SrcReg, getKillRegState(KillSrc));
2785    return;
2786  }
2787  // Copies between GPR32 and FPR32.
2788  if (AArch64::FPR32RegClass.contains(DestReg) &&
2789      AArch64::GPR32RegClass.contains(SrcReg)) {
2790    BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg)
2791        .addReg(SrcReg, getKillRegState(KillSrc));
2792    return;
2793  }
2794  if (AArch64::GPR32RegClass.contains(DestReg) &&
2795      AArch64::FPR32RegClass.contains(SrcReg)) {
2796    BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg)
2797        .addReg(SrcReg, getKillRegState(KillSrc));
2798    return;
2799  }
2800
2801  if (DestReg == AArch64::NZCV) {
2802    assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy");
2803    BuildMI(MBB, I, DL, get(AArch64::MSR))
2804        .addImm(AArch64SysReg::NZCV)
2805        .addReg(SrcReg, getKillRegState(KillSrc))
2806        .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define);
2807    return;
2808  }
2809
2810  if (SrcReg == AArch64::NZCV) {
2811    assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy");
2812    BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg)
2813        .addImm(AArch64SysReg::NZCV)
2814        .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc));
2815    return;
2816  }
2817
2818  llvm_unreachable("unimplemented reg-to-reg copy");
2819}
2820
2821static void storeRegPairToStackSlot(const TargetRegisterInfo &TRI,
2822                                    MachineBasicBlock &MBB,
2823                                    MachineBasicBlock::iterator InsertBefore,
2824                                    const MCInstrDesc &MCID,
2825                                    unsigned SrcReg, bool IsKill,
2826                                    unsigned SubIdx0, unsigned SubIdx1, int FI,
2827                                    MachineMemOperand *MMO) {
2828  unsigned SrcReg0 = SrcReg;
2829  unsigned SrcReg1 = SrcReg;
2830  if (Register::isPhysicalRegister(SrcReg)) {
2831    SrcReg0 = TRI.getSubReg(SrcReg, SubIdx0);
2832    SubIdx0 = 0;
2833    SrcReg1 = TRI.getSubReg(SrcReg, SubIdx1);
2834    SubIdx1 = 0;
2835  }
2836  BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
2837      .addReg(SrcReg0, getKillRegState(IsKill), SubIdx0)
2838      .addReg(SrcReg1, getKillRegState(IsKill), SubIdx1)
2839      .addFrameIndex(FI)
2840      .addImm(0)
2841      .addMemOperand(MMO);
2842}
2843
2844void AArch64InstrInfo::storeRegToStackSlot(
2845    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg,
2846    bool isKill, int FI, const TargetRegisterClass *RC,
2847    const TargetRegisterInfo *TRI) const {
2848  MachineFunction &MF = *MBB.getParent();
2849  MachineFrameInfo &MFI = MF.getFrameInfo();
2850  unsigned Align = MFI.getObjectAlignment(FI);
2851
2852  MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
2853  MachineMemOperand *MMO = MF.getMachineMemOperand(
2854      PtrInfo, MachineMemOperand::MOStore, MFI.getObjectSize(FI), Align);
2855  unsigned Opc = 0;
2856  bool Offset = true;
2857  switch (TRI->getSpillSize(*RC)) {
2858  case 1:
2859    if (AArch64::FPR8RegClass.hasSubClassEq(RC))
2860      Opc = AArch64::STRBui;
2861    break;
2862  case 2:
2863    if (AArch64::FPR16RegClass.hasSubClassEq(RC))
2864      Opc = AArch64::STRHui;
2865    break;
2866  case 4:
2867    if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
2868      Opc = AArch64::STRWui;
2869      if (Register::isVirtualRegister(SrcReg))
2870        MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
2871      else
2872        assert(SrcReg != AArch64::WSP);
2873    } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
2874      Opc = AArch64::STRSui;
2875    break;
2876  case 8:
2877    if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
2878      Opc = AArch64::STRXui;
2879      if (Register::isVirtualRegister(SrcReg))
2880        MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
2881      else
2882        assert(SrcReg != AArch64::SP);
2883    } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
2884      Opc = AArch64::STRDui;
2885    } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
2886      storeRegPairToStackSlot(getRegisterInfo(), MBB, MBBI,
2887                              get(AArch64::STPWi), SrcReg, isKill,
2888                              AArch64::sube32, AArch64::subo32, FI, MMO);
2889      return;
2890    }
2891    break;
2892  case 16:
2893    if (AArch64::FPR128RegClass.hasSubClassEq(RC))
2894      Opc = AArch64::STRQui;
2895    else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
2896      assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2897      Opc = AArch64::ST1Twov1d;
2898      Offset = false;
2899    } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
2900      storeRegPairToStackSlot(getRegisterInfo(), MBB, MBBI,
2901                              get(AArch64::STPXi), SrcReg, isKill,
2902                              AArch64::sube64, AArch64::subo64, FI, MMO);
2903      return;
2904    }
2905    break;
2906  case 24:
2907    if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
2908      assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2909      Opc = AArch64::ST1Threev1d;
2910      Offset = false;
2911    }
2912    break;
2913  case 32:
2914    if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
2915      assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2916      Opc = AArch64::ST1Fourv1d;
2917      Offset = false;
2918    } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
2919      assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2920      Opc = AArch64::ST1Twov2d;
2921      Offset = false;
2922    }
2923    break;
2924  case 48:
2925    if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
2926      assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2927      Opc = AArch64::ST1Threev2d;
2928      Offset = false;
2929    }
2930    break;
2931  case 64:
2932    if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
2933      assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2934      Opc = AArch64::ST1Fourv2d;
2935      Offset = false;
2936    }
2937    break;
2938  }
2939  unsigned StackID = TargetStackID::Default;
2940  if (AArch64::PPRRegClass.hasSubClassEq(RC)) {
2941    assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
2942    Opc = AArch64::STR_PXI;
2943    StackID = TargetStackID::SVEVector;
2944  } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
2945    assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
2946    Opc = AArch64::STR_ZXI;
2947    StackID = TargetStackID::SVEVector;
2948  }
2949  assert(Opc && "Unknown register class");
2950  MFI.setStackID(FI, StackID);
2951
2952  const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc(), get(Opc))
2953                                     .addReg(SrcReg, getKillRegState(isKill))
2954                                     .addFrameIndex(FI);
2955
2956  if (Offset)
2957    MI.addImm(0);
2958  MI.addMemOperand(MMO);
2959}
2960
2961static void loadRegPairFromStackSlot(const TargetRegisterInfo &TRI,
2962                                     MachineBasicBlock &MBB,
2963                                     MachineBasicBlock::iterator InsertBefore,
2964                                     const MCInstrDesc &MCID,
2965                                     unsigned DestReg, unsigned SubIdx0,
2966                                     unsigned SubIdx1, int FI,
2967                                     MachineMemOperand *MMO) {
2968  unsigned DestReg0 = DestReg;
2969  unsigned DestReg1 = DestReg;
2970  bool IsUndef = true;
2971  if (Register::isPhysicalRegister(DestReg)) {
2972    DestReg0 = TRI.getSubReg(DestReg, SubIdx0);
2973    SubIdx0 = 0;
2974    DestReg1 = TRI.getSubReg(DestReg, SubIdx1);
2975    SubIdx1 = 0;
2976    IsUndef = false;
2977  }
2978  BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
2979      .addReg(DestReg0, RegState::Define | getUndefRegState(IsUndef), SubIdx0)
2980      .addReg(DestReg1, RegState::Define | getUndefRegState(IsUndef), SubIdx1)
2981      .addFrameIndex(FI)
2982      .addImm(0)
2983      .addMemOperand(MMO);
2984}
2985
2986void AArch64InstrInfo::loadRegFromStackSlot(
2987    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg,
2988    int FI, const TargetRegisterClass *RC,
2989    const TargetRegisterInfo *TRI) const {
2990  MachineFunction &MF = *MBB.getParent();
2991  MachineFrameInfo &MFI = MF.getFrameInfo();
2992  unsigned Align = MFI.getObjectAlignment(FI);
2993  MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
2994  MachineMemOperand *MMO = MF.getMachineMemOperand(
2995      PtrInfo, MachineMemOperand::MOLoad, MFI.getObjectSize(FI), Align);
2996
2997  unsigned Opc = 0;
2998  bool Offset = true;
2999  switch (TRI->getSpillSize(*RC)) {
3000  case 1:
3001    if (AArch64::FPR8RegClass.hasSubClassEq(RC))
3002      Opc = AArch64::LDRBui;
3003    break;
3004  case 2:
3005    if (AArch64::FPR16RegClass.hasSubClassEq(RC))
3006      Opc = AArch64::LDRHui;
3007    break;
3008  case 4:
3009    if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
3010      Opc = AArch64::LDRWui;
3011      if (Register::isVirtualRegister(DestReg))
3012        MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass);
3013      else
3014        assert(DestReg != AArch64::WSP);
3015    } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
3016      Opc = AArch64::LDRSui;
3017    break;
3018  case 8:
3019    if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
3020      Opc = AArch64::LDRXui;
3021      if (Register::isVirtualRegister(DestReg))
3022        MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass);
3023      else
3024        assert(DestReg != AArch64::SP);
3025    } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
3026      Opc = AArch64::LDRDui;
3027    } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
3028      loadRegPairFromStackSlot(getRegisterInfo(), MBB, MBBI,
3029                               get(AArch64::LDPWi), DestReg, AArch64::sube32,
3030                               AArch64::subo32, FI, MMO);
3031      return;
3032    }
3033    break;
3034  case 16:
3035    if (AArch64::FPR128RegClass.hasSubClassEq(RC))
3036      Opc = AArch64::LDRQui;
3037    else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
3038      assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
3039      Opc = AArch64::LD1Twov1d;
3040      Offset = false;
3041    } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
3042      loadRegPairFromStackSlot(getRegisterInfo(), MBB, MBBI,
3043                               get(AArch64::LDPXi), DestReg, AArch64::sube64,
3044                               AArch64::subo64, FI, MMO);
3045      return;
3046    }
3047    break;
3048  case 24:
3049    if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
3050      assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
3051      Opc = AArch64::LD1Threev1d;
3052      Offset = false;
3053    }
3054    break;
3055  case 32:
3056    if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
3057      assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
3058      Opc = AArch64::LD1Fourv1d;
3059      Offset = false;
3060    } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
3061      assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
3062      Opc = AArch64::LD1Twov2d;
3063      Offset = false;
3064    }
3065    break;
3066  case 48:
3067    if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
3068      assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
3069      Opc = AArch64::LD1Threev2d;
3070      Offset = false;
3071    }
3072    break;
3073  case 64:
3074    if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
3075      assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
3076      Opc = AArch64::LD1Fourv2d;
3077      Offset = false;
3078    }
3079    break;
3080  }
3081
3082  unsigned StackID = TargetStackID::Default;
3083  if (AArch64::PPRRegClass.hasSubClassEq(RC)) {
3084    assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
3085    Opc = AArch64::LDR_PXI;
3086    StackID = TargetStackID::SVEVector;
3087  } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
3088    assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
3089    Opc = AArch64::LDR_ZXI;
3090    StackID = TargetStackID::SVEVector;
3091  }
3092  assert(Opc && "Unknown register class");
3093  MFI.setStackID(FI, StackID);
3094
3095  const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc(), get(Opc))
3096                                     .addReg(DestReg, getDefRegState(true))
3097                                     .addFrameIndex(FI);
3098  if (Offset)
3099    MI.addImm(0);
3100  MI.addMemOperand(MMO);
3101}
3102
3103// Helper function to emit a frame offset adjustment from a given
3104// pointer (SrcReg), stored into DestReg. This function is explicit
3105// in that it requires the opcode.
3106static void emitFrameOffsetAdj(MachineBasicBlock &MBB,
3107                               MachineBasicBlock::iterator MBBI,
3108                               const DebugLoc &DL, unsigned DestReg,
3109                               unsigned SrcReg, int64_t Offset, unsigned Opc,
3110                               const TargetInstrInfo *TII,
3111                               MachineInstr::MIFlag Flag, bool NeedsWinCFI,
3112                               bool *HasWinCFI) {
3113  int Sign = 1;
3114  unsigned MaxEncoding, ShiftSize;
3115  switch (Opc) {
3116  case AArch64::ADDXri:
3117  case AArch64::ADDSXri:
3118  case AArch64::SUBXri:
3119  case AArch64::SUBSXri:
3120    MaxEncoding = 0xfff;
3121    ShiftSize = 12;
3122    break;
3123  case AArch64::ADDVL_XXI:
3124  case AArch64::ADDPL_XXI:
3125    MaxEncoding = 31;
3126    ShiftSize = 0;
3127    if (Offset < 0) {
3128      MaxEncoding = 32;
3129      Sign = -1;
3130      Offset = -Offset;
3131    }
3132    break;
3133  default:
3134    llvm_unreachable("Unsupported opcode");
3135  }
3136
3137  // FIXME: If the offset won't fit in 24-bits, compute the offset into a
3138  // scratch register.  If DestReg is a virtual register, use it as the
3139  // scratch register; otherwise, create a new virtual register (to be
3140  // replaced by the scavenger at the end of PEI).  That case can be optimized
3141  // slightly if DestReg is SP which is always 16-byte aligned, so the scratch
3142  // register can be loaded with offset%8 and the add/sub can use an extending
3143  // instruction with LSL#3.
3144  // Currently the function handles any offsets but generates a poor sequence
3145  // of code.
3146  //  assert(Offset < (1 << 24) && "unimplemented reg plus immediate");
3147
3148  const unsigned MaxEncodableValue = MaxEncoding << ShiftSize;
3149  do {
3150    uint64_t ThisVal = std::min<uint64_t>(Offset, MaxEncodableValue);
3151    unsigned LocalShiftSize = 0;
3152    if (ThisVal > MaxEncoding) {
3153      ThisVal = ThisVal >> ShiftSize;
3154      LocalShiftSize = ShiftSize;
3155    }
3156    assert((ThisVal >> ShiftSize) <= MaxEncoding &&
3157           "Encoding cannot handle value that big");
3158    auto MBI = BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
3159                   .addReg(SrcReg)
3160                   .addImm(Sign * (int)ThisVal);
3161    if (ShiftSize)
3162      MBI = MBI.addImm(
3163          AArch64_AM::getShifterImm(AArch64_AM::LSL, LocalShiftSize));
3164    MBI = MBI.setMIFlag(Flag);
3165
3166    if (NeedsWinCFI) {
3167      assert(Sign == 1 && "SEH directives should always have a positive sign");
3168      int Imm = (int)(ThisVal << LocalShiftSize);
3169      if ((DestReg == AArch64::FP && SrcReg == AArch64::SP) ||
3170          (SrcReg == AArch64::FP && DestReg == AArch64::SP)) {
3171        if (HasWinCFI)
3172          *HasWinCFI = true;
3173        if (Imm == 0)
3174          BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_SetFP)).setMIFlag(Flag);
3175        else
3176          BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_AddFP))
3177              .addImm(Imm)
3178              .setMIFlag(Flag);
3179        assert((Offset - Imm) == 0 && "Expected remaining offset to be zero to "
3180                                      "emit a single SEH directive");
3181      } else if (DestReg == AArch64::SP) {
3182        if (HasWinCFI)
3183          *HasWinCFI = true;
3184        assert(SrcReg == AArch64::SP && "Unexpected SrcReg for SEH_StackAlloc");
3185        BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
3186            .addImm(Imm)
3187            .setMIFlag(Flag);
3188      }
3189      if (HasWinCFI)
3190        *HasWinCFI = true;
3191    }
3192
3193    SrcReg = DestReg;
3194    Offset -= ThisVal << LocalShiftSize;
3195  } while (Offset);
3196}
3197
3198void llvm::emitFrameOffset(MachineBasicBlock &MBB,
3199                           MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
3200                           unsigned DestReg, unsigned SrcReg,
3201                           StackOffset Offset, const TargetInstrInfo *TII,
3202                           MachineInstr::MIFlag Flag, bool SetNZCV,
3203                           bool NeedsWinCFI, bool *HasWinCFI) {
3204  int64_t Bytes, NumPredicateVectors, NumDataVectors;
3205  Offset.getForFrameOffset(Bytes, NumPredicateVectors, NumDataVectors);
3206
3207  // First emit non-scalable frame offsets, or a simple 'mov'.
3208  if (Bytes || (!Offset && SrcReg != DestReg)) {
3209    assert((DestReg != AArch64::SP || Bytes % 16 == 0) &&
3210           "SP increment/decrement not 16-byte aligned");
3211    unsigned Opc = SetNZCV ? AArch64::ADDSXri : AArch64::ADDXri;
3212    if (Bytes < 0) {
3213      Bytes = -Bytes;
3214      Opc = SetNZCV ? AArch64::SUBSXri : AArch64::SUBXri;
3215    }
3216    emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, Bytes, Opc, TII, Flag,
3217                       NeedsWinCFI, HasWinCFI);
3218    SrcReg = DestReg;
3219  }
3220
3221  assert(!(SetNZCV && (NumPredicateVectors || NumDataVectors)) &&
3222         "SetNZCV not supported with SVE vectors");
3223  assert(!(NeedsWinCFI && (NumPredicateVectors || NumDataVectors)) &&
3224         "WinCFI not supported with SVE vectors");
3225
3226  if (NumDataVectors) {
3227    emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumDataVectors,
3228                       AArch64::ADDVL_XXI, TII, Flag, NeedsWinCFI, nullptr);
3229    SrcReg = DestReg;
3230  }
3231
3232  if (NumPredicateVectors) {
3233    assert(DestReg != AArch64::SP && "Unaligned access to SP");
3234    emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumPredicateVectors,
3235                       AArch64::ADDPL_XXI, TII, Flag, NeedsWinCFI, nullptr);
3236  }
3237}
3238
3239MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
3240    MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
3241    MachineBasicBlock::iterator InsertPt, int FrameIndex,
3242    LiveIntervals *LIS, VirtRegMap *VRM) const {
3243  // This is a bit of a hack. Consider this instruction:
3244  //
3245  //   %0 = COPY %sp; GPR64all:%0
3246  //
3247  // We explicitly chose GPR64all for the virtual register so such a copy might
3248  // be eliminated by RegisterCoalescer. However, that may not be possible, and
3249  // %0 may even spill. We can't spill %sp, and since it is in the GPR64all
3250  // register class, TargetInstrInfo::foldMemoryOperand() is going to try.
3251  //
3252  // To prevent that, we are going to constrain the %0 register class here.
3253  //
3254  // <rdar://problem/11522048>
3255  //
3256  if (MI.isFullCopy()) {
3257    Register DstReg = MI.getOperand(0).getReg();
3258    Register SrcReg = MI.getOperand(1).getReg();
3259    if (SrcReg == AArch64::SP && Register::isVirtualRegister(DstReg)) {
3260      MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass);
3261      return nullptr;
3262    }
3263    if (DstReg == AArch64::SP && Register::isVirtualRegister(SrcReg)) {
3264      MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
3265      return nullptr;
3266    }
3267  }
3268
3269  // Handle the case where a copy is being spilled or filled but the source
3270  // and destination register class don't match.  For example:
3271  //
3272  //   %0 = COPY %xzr; GPR64common:%0
3273  //
3274  // In this case we can still safely fold away the COPY and generate the
3275  // following spill code:
3276  //
3277  //   STRXui %xzr, %stack.0
3278  //
3279  // This also eliminates spilled cross register class COPYs (e.g. between x and
3280  // d regs) of the same size.  For example:
3281  //
3282  //   %0 = COPY %1; GPR64:%0, FPR64:%1
3283  //
3284  // will be filled as
3285  //
3286  //   LDRDui %0, fi<#0>
3287  //
3288  // instead of
3289  //
3290  //   LDRXui %Temp, fi<#0>
3291  //   %0 = FMOV %Temp
3292  //
3293  if (MI.isCopy() && Ops.size() == 1 &&
3294      // Make sure we're only folding the explicit COPY defs/uses.
3295      (Ops[0] == 0 || Ops[0] == 1)) {
3296    bool IsSpill = Ops[0] == 0;
3297    bool IsFill = !IsSpill;
3298    const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
3299    const MachineRegisterInfo &MRI = MF.getRegInfo();
3300    MachineBasicBlock &MBB = *MI.getParent();
3301    const MachineOperand &DstMO = MI.getOperand(0);
3302    const MachineOperand &SrcMO = MI.getOperand(1);
3303    Register DstReg = DstMO.getReg();
3304    Register SrcReg = SrcMO.getReg();
3305    // This is slightly expensive to compute for physical regs since
3306    // getMinimalPhysRegClass is slow.
3307    auto getRegClass = [&](unsigned Reg) {
3308      return Register::isVirtualRegister(Reg) ? MRI.getRegClass(Reg)
3309                                              : TRI.getMinimalPhysRegClass(Reg);
3310    };
3311
3312    if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) {
3313      assert(TRI.getRegSizeInBits(*getRegClass(DstReg)) ==
3314                 TRI.getRegSizeInBits(*getRegClass(SrcReg)) &&
3315             "Mismatched register size in non subreg COPY");
3316      if (IsSpill)
3317        storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex,
3318                            getRegClass(SrcReg), &TRI);
3319      else
3320        loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex,
3321                             getRegClass(DstReg), &TRI);
3322      return &*--InsertPt;
3323    }
3324
3325    // Handle cases like spilling def of:
3326    //
3327    //   %0:sub_32<def,read-undef> = COPY %wzr; GPR64common:%0
3328    //
3329    // where the physical register source can be widened and stored to the full
3330    // virtual reg destination stack slot, in this case producing:
3331    //
3332    //   STRXui %xzr, %stack.0
3333    //
3334    if (IsSpill && DstMO.isUndef() && Register::isPhysicalRegister(SrcReg)) {
3335      assert(SrcMO.getSubReg() == 0 &&
3336             "Unexpected subreg on physical register");
3337      const TargetRegisterClass *SpillRC;
3338      unsigned SpillSubreg;
3339      switch (DstMO.getSubReg()) {
3340      default:
3341        SpillRC = nullptr;
3342        break;
3343      case AArch64::sub_32:
3344      case AArch64::ssub:
3345        if (AArch64::GPR32RegClass.contains(SrcReg)) {
3346          SpillRC = &AArch64::GPR64RegClass;
3347          SpillSubreg = AArch64::sub_32;
3348        } else if (AArch64::FPR32RegClass.contains(SrcReg)) {
3349          SpillRC = &AArch64::FPR64RegClass;
3350          SpillSubreg = AArch64::ssub;
3351        } else
3352          SpillRC = nullptr;
3353        break;
3354      case AArch64::dsub:
3355        if (AArch64::FPR64RegClass.contains(SrcReg)) {
3356          SpillRC = &AArch64::FPR128RegClass;
3357          SpillSubreg = AArch64::dsub;
3358        } else
3359          SpillRC = nullptr;
3360        break;
3361      }
3362
3363      if (SpillRC)
3364        if (unsigned WidenedSrcReg =
3365                TRI.getMatchingSuperReg(SrcReg, SpillSubreg, SpillRC)) {
3366          storeRegToStackSlot(MBB, InsertPt, WidenedSrcReg, SrcMO.isKill(),
3367                              FrameIndex, SpillRC, &TRI);
3368          return &*--InsertPt;
3369        }
3370    }
3371
3372    // Handle cases like filling use of:
3373    //
3374    //   %0:sub_32<def,read-undef> = COPY %1; GPR64:%0, GPR32:%1
3375    //
3376    // where we can load the full virtual reg source stack slot, into the subreg
3377    // destination, in this case producing:
3378    //
3379    //   LDRWui %0:sub_32<def,read-undef>, %stack.0
3380    //
3381    if (IsFill && SrcMO.getSubReg() == 0 && DstMO.isUndef()) {
3382      const TargetRegisterClass *FillRC;
3383      switch (DstMO.getSubReg()) {
3384      default:
3385        FillRC = nullptr;
3386        break;
3387      case AArch64::sub_32:
3388        FillRC = &AArch64::GPR32RegClass;
3389        break;
3390      case AArch64::ssub:
3391        FillRC = &AArch64::FPR32RegClass;
3392        break;
3393      case AArch64::dsub:
3394        FillRC = &AArch64::FPR64RegClass;
3395        break;
3396      }
3397
3398      if (FillRC) {
3399        assert(TRI.getRegSizeInBits(*getRegClass(SrcReg)) ==
3400                   TRI.getRegSizeInBits(*FillRC) &&
3401               "Mismatched regclass size on folded subreg COPY");
3402        loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC, &TRI);
3403        MachineInstr &LoadMI = *--InsertPt;
3404        MachineOperand &LoadDst = LoadMI.getOperand(0);
3405        assert(LoadDst.getSubReg() == 0 && "unexpected subreg on fill load");
3406        LoadDst.setSubReg(DstMO.getSubReg());
3407        LoadDst.setIsUndef();
3408        return &LoadMI;
3409      }
3410    }
3411  }
3412
3413  // Cannot fold.
3414  return nullptr;
3415}
3416
3417static bool isSVEScaledImmInstruction(unsigned Opcode) {
3418  switch (Opcode) {
3419  case AArch64::LDR_ZXI:
3420  case AArch64::STR_ZXI:
3421  case AArch64::LDR_PXI:
3422  case AArch64::STR_PXI:
3423    return true;
3424  default:
3425    return false;
3426  }
3427}
3428
3429int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI,
3430                                    StackOffset &SOffset,
3431                                    bool *OutUseUnscaledOp,
3432                                    unsigned *OutUnscaledOp,
3433                                    int64_t *EmittableOffset) {
3434  // Set output values in case of early exit.
3435  if (EmittableOffset)
3436    *EmittableOffset = 0;
3437  if (OutUseUnscaledOp)
3438    *OutUseUnscaledOp = false;
3439  if (OutUnscaledOp)
3440    *OutUnscaledOp = 0;
3441
3442  // Exit early for structured vector spills/fills as they can't take an
3443  // immediate offset.
3444  switch (MI.getOpcode()) {
3445  default:
3446    break;
3447  case AArch64::LD1Twov2d:
3448  case AArch64::LD1Threev2d:
3449  case AArch64::LD1Fourv2d:
3450  case AArch64::LD1Twov1d:
3451  case AArch64::LD1Threev1d:
3452  case AArch64::LD1Fourv1d:
3453  case AArch64::ST1Twov2d:
3454  case AArch64::ST1Threev2d:
3455  case AArch64::ST1Fourv2d:
3456  case AArch64::ST1Twov1d:
3457  case AArch64::ST1Threev1d:
3458  case AArch64::ST1Fourv1d:
3459  case AArch64::IRG:
3460  case AArch64::IRGstack:
3461    return AArch64FrameOffsetCannotUpdate;
3462  }
3463
3464  // Get the min/max offset and the scale.
3465  unsigned Scale, Width;
3466  int64_t MinOff, MaxOff;
3467  if (!AArch64InstrInfo::getMemOpInfo(MI.getOpcode(), Scale, Width, MinOff,
3468                                      MaxOff))
3469    llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
3470
3471  // Construct the complete offset.
3472  bool IsMulVL = isSVEScaledImmInstruction(MI.getOpcode());
3473  int64_t Offset =
3474      IsMulVL ? (SOffset.getScalableBytes()) : (SOffset.getBytes());
3475
3476  const MachineOperand &ImmOpnd =
3477      MI.getOperand(AArch64InstrInfo::getLoadStoreImmIdx(MI.getOpcode()));
3478  Offset += ImmOpnd.getImm() * Scale;
3479
3480  // If the offset doesn't match the scale, we rewrite the instruction to
3481  // use the unscaled instruction instead. Likewise, if we have a negative
3482  // offset and there is an unscaled op to use.
3483  Optional<unsigned> UnscaledOp =
3484      AArch64InstrInfo::getUnscaledLdSt(MI.getOpcode());
3485  bool useUnscaledOp = UnscaledOp && (Offset % Scale || Offset < 0);
3486  if (useUnscaledOp &&
3487      !AArch64InstrInfo::getMemOpInfo(*UnscaledOp, Scale, Width, MinOff, MaxOff))
3488    llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
3489
3490  int64_t Remainder = Offset % Scale;
3491  assert(!(Remainder && useUnscaledOp) &&
3492         "Cannot have remainder when using unscaled op");
3493
3494  assert(MinOff < MaxOff && "Unexpected Min/Max offsets");
3495  int64_t NewOffset = Offset / Scale;
3496  if (MinOff <= NewOffset && NewOffset <= MaxOff)
3497    Offset = Remainder;
3498  else {
3499    NewOffset = NewOffset < 0 ? MinOff : MaxOff;
3500    Offset = Offset - NewOffset * Scale + Remainder;
3501  }
3502
3503  if (EmittableOffset)
3504    *EmittableOffset = NewOffset;
3505  if (OutUseUnscaledOp)
3506    *OutUseUnscaledOp = useUnscaledOp;
3507  if (OutUnscaledOp && UnscaledOp)
3508    *OutUnscaledOp = *UnscaledOp;
3509
3510  if (IsMulVL)
3511    SOffset = StackOffset(Offset, MVT::nxv1i8) +
3512              StackOffset(SOffset.getBytes(), MVT::i8);
3513  else
3514    SOffset = StackOffset(Offset, MVT::i8) +
3515              StackOffset(SOffset.getScalableBytes(), MVT::nxv1i8);
3516  return AArch64FrameOffsetCanUpdate |
3517         (SOffset ? 0 : AArch64FrameOffsetIsLegal);
3518}
3519
3520bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
3521                                    unsigned FrameReg, StackOffset &Offset,
3522                                    const AArch64InstrInfo *TII) {
3523  unsigned Opcode = MI.getOpcode();
3524  unsigned ImmIdx = FrameRegIdx + 1;
3525
3526  if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) {
3527    Offset += StackOffset(MI.getOperand(ImmIdx).getImm(), MVT::i8);
3528    emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(),
3529                    MI.getOperand(0).getReg(), FrameReg, Offset, TII,
3530                    MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri));
3531    MI.eraseFromParent();
3532    Offset = StackOffset();
3533    return true;
3534  }
3535
3536  int64_t NewOffset;
3537  unsigned UnscaledOp;
3538  bool UseUnscaledOp;
3539  int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp,
3540                                         &UnscaledOp, &NewOffset);
3541  if (Status & AArch64FrameOffsetCanUpdate) {
3542    if (Status & AArch64FrameOffsetIsLegal)
3543      // Replace the FrameIndex with FrameReg.
3544      MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
3545    if (UseUnscaledOp)
3546      MI.setDesc(TII->get(UnscaledOp));
3547
3548    MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset);
3549    return !Offset;
3550  }
3551
3552  return false;
3553}
3554
3555void AArch64InstrInfo::getNoop(MCInst &NopInst) const {
3556  NopInst.setOpcode(AArch64::HINT);
3557  NopInst.addOperand(MCOperand::createImm(0));
3558}
3559
3560// AArch64 supports MachineCombiner.
3561bool AArch64InstrInfo::useMachineCombiner() const { return true; }
3562
3563// True when Opc sets flag
3564static bool isCombineInstrSettingFlag(unsigned Opc) {
3565  switch (Opc) {
3566  case AArch64::ADDSWrr:
3567  case AArch64::ADDSWri:
3568  case AArch64::ADDSXrr:
3569  case AArch64::ADDSXri:
3570  case AArch64::SUBSWrr:
3571  case AArch64::SUBSXrr:
3572  // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
3573  case AArch64::SUBSWri:
3574  case AArch64::SUBSXri:
3575    return true;
3576  default:
3577    break;
3578  }
3579  return false;
3580}
3581
3582// 32b Opcodes that can be combined with a MUL
3583static bool isCombineInstrCandidate32(unsigned Opc) {
3584  switch (Opc) {
3585  case AArch64::ADDWrr:
3586  case AArch64::ADDWri:
3587  case AArch64::SUBWrr:
3588  case AArch64::ADDSWrr:
3589  case AArch64::ADDSWri:
3590  case AArch64::SUBSWrr:
3591  // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
3592  case AArch64::SUBWri:
3593  case AArch64::SUBSWri:
3594    return true;
3595  default:
3596    break;
3597  }
3598  return false;
3599}
3600
3601// 64b Opcodes that can be combined with a MUL
3602static bool isCombineInstrCandidate64(unsigned Opc) {
3603  switch (Opc) {
3604  case AArch64::ADDXrr:
3605  case AArch64::ADDXri:
3606  case AArch64::SUBXrr:
3607  case AArch64::ADDSXrr:
3608  case AArch64::ADDSXri:
3609  case AArch64::SUBSXrr:
3610  // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
3611  case AArch64::SUBXri:
3612  case AArch64::SUBSXri:
3613  case AArch64::ADDv8i8:
3614  case AArch64::ADDv16i8:
3615  case AArch64::ADDv4i16:
3616  case AArch64::ADDv8i16:
3617  case AArch64::ADDv2i32:
3618  case AArch64::ADDv4i32:
3619  case AArch64::SUBv8i8:
3620  case AArch64::SUBv16i8:
3621  case AArch64::SUBv4i16:
3622  case AArch64::SUBv8i16:
3623  case AArch64::SUBv2i32:
3624  case AArch64::SUBv4i32:
3625    return true;
3626  default:
3627    break;
3628  }
3629  return false;
3630}
3631
3632// FP Opcodes that can be combined with a FMUL
3633static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
3634  switch (Inst.getOpcode()) {
3635  default:
3636    break;
3637  case AArch64::FADDHrr:
3638  case AArch64::FADDSrr:
3639  case AArch64::FADDDrr:
3640  case AArch64::FADDv4f16:
3641  case AArch64::FADDv8f16:
3642  case AArch64::FADDv2f32:
3643  case AArch64::FADDv2f64:
3644  case AArch64::FADDv4f32:
3645  case AArch64::FSUBHrr:
3646  case AArch64::FSUBSrr:
3647  case AArch64::FSUBDrr:
3648  case AArch64::FSUBv4f16:
3649  case AArch64::FSUBv8f16:
3650  case AArch64::FSUBv2f32:
3651  case AArch64::FSUBv2f64:
3652  case AArch64::FSUBv4f32:
3653    TargetOptions Options = Inst.getParent()->getParent()->getTarget().Options;
3654    return (Options.UnsafeFPMath ||
3655            Options.AllowFPOpFusion == FPOpFusion::Fast);
3656  }
3657  return false;
3658}
3659
3660// Opcodes that can be combined with a MUL
3661static bool isCombineInstrCandidate(unsigned Opc) {
3662  return (isCombineInstrCandidate32(Opc) || isCombineInstrCandidate64(Opc));
3663}
3664
3665//
3666// Utility routine that checks if \param MO is defined by an
3667// \param CombineOpc instruction in the basic block \param MBB
3668static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO,
3669                       unsigned CombineOpc, unsigned ZeroReg = 0,
3670                       bool CheckZeroReg = false) {
3671  MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
3672  MachineInstr *MI = nullptr;
3673
3674  if (MO.isReg() && Register::isVirtualRegister(MO.getReg()))
3675    MI = MRI.getUniqueVRegDef(MO.getReg());
3676  // And it needs to be in the trace (otherwise, it won't have a depth).
3677  if (!MI || MI->getParent() != &MBB || (unsigned)MI->getOpcode() != CombineOpc)
3678    return false;
3679  // Must only used by the user we combine with.
3680  if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
3681    return false;
3682
3683  if (CheckZeroReg) {
3684    assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() &&
3685           MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
3686           MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs");
3687    // The third input reg must be zero.
3688    if (MI->getOperand(3).getReg() != ZeroReg)
3689      return false;
3690  }
3691
3692  return true;
3693}
3694
3695//
3696// Is \param MO defined by an integer multiply and can be combined?
3697static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO,
3698                              unsigned MulOpc, unsigned ZeroReg) {
3699  return canCombine(MBB, MO, MulOpc, ZeroReg, true);
3700}
3701
3702//
3703// Is \param MO defined by a floating-point multiply and can be combined?
3704static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO,
3705                               unsigned MulOpc) {
3706  return canCombine(MBB, MO, MulOpc);
3707}
3708
3709// TODO: There are many more machine instruction opcodes to match:
3710//       1. Other data types (integer, vectors)
3711//       2. Other math / logic operations (xor, or)
3712//       3. Other forms of the same operation (intrinsics and other variants)
3713bool AArch64InstrInfo::isAssociativeAndCommutative(
3714    const MachineInstr &Inst) const {
3715  switch (Inst.getOpcode()) {
3716  case AArch64::FADDDrr:
3717  case AArch64::FADDSrr:
3718  case AArch64::FADDv2f32:
3719  case AArch64::FADDv2f64:
3720  case AArch64::FADDv4f32:
3721  case AArch64::FMULDrr:
3722  case AArch64::FMULSrr:
3723  case AArch64::FMULX32:
3724  case AArch64::FMULX64:
3725  case AArch64::FMULXv2f32:
3726  case AArch64::FMULXv2f64:
3727  case AArch64::FMULXv4f32:
3728  case AArch64::FMULv2f32:
3729  case AArch64::FMULv2f64:
3730  case AArch64::FMULv4f32:
3731    return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath;
3732  default:
3733    return false;
3734  }
3735}
3736
3737/// Find instructions that can be turned into madd.
3738static bool getMaddPatterns(MachineInstr &Root,
3739                            SmallVectorImpl<MachineCombinerPattern> &Patterns) {
3740  unsigned Opc = Root.getOpcode();
3741  MachineBasicBlock &MBB = *Root.getParent();
3742  bool Found = false;
3743
3744  if (!isCombineInstrCandidate(Opc))
3745    return false;
3746  if (isCombineInstrSettingFlag(Opc)) {
3747    int Cmp_NZCV = Root.findRegisterDefOperandIdx(AArch64::NZCV, true);
3748    // When NZCV is live bail out.
3749    if (Cmp_NZCV == -1)
3750      return false;
3751    unsigned NewOpc = convertToNonFlagSettingOpc(Root);
3752    // When opcode can't change bail out.
3753    // CHECKME: do we miss any cases for opcode conversion?
3754    if (NewOpc == Opc)
3755      return false;
3756    Opc = NewOpc;
3757  }
3758
3759  auto setFound = [&](int Opcode, int Operand, unsigned ZeroReg,
3760                      MachineCombinerPattern Pattern) {
3761    if (canCombineWithMUL(MBB, Root.getOperand(Operand), Opcode, ZeroReg)) {
3762      Patterns.push_back(Pattern);
3763      Found = true;
3764    }
3765  };
3766
3767  auto setVFound = [&](int Opcode, int Operand, MachineCombinerPattern Pattern) {
3768    if (canCombine(MBB, Root.getOperand(Operand), Opcode)) {
3769      Patterns.push_back(Pattern);
3770      Found = true;
3771    }
3772  };
3773
3774  typedef MachineCombinerPattern MCP;
3775
3776  switch (Opc) {
3777  default:
3778    break;
3779  case AArch64::ADDWrr:
3780    assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
3781           "ADDWrr does not have register operands");
3782    setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDW_OP1);
3783    setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULADDW_OP2);
3784    break;
3785  case AArch64::ADDXrr:
3786    setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDX_OP1);
3787    setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULADDX_OP2);
3788    break;
3789  case AArch64::SUBWrr:
3790    setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBW_OP1);
3791    setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULSUBW_OP2);
3792    break;
3793  case AArch64::SUBXrr:
3794    setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBX_OP1);
3795    setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULSUBX_OP2);
3796    break;
3797  case AArch64::ADDWri:
3798    setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDWI_OP1);
3799    break;
3800  case AArch64::ADDXri:
3801    setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDXI_OP1);
3802    break;
3803  case AArch64::SUBWri:
3804    setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBWI_OP1);
3805    break;
3806  case AArch64::SUBXri:
3807    setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBXI_OP1);
3808    break;
3809  case AArch64::ADDv8i8:
3810    setVFound(AArch64::MULv8i8, 1, MCP::MULADDv8i8_OP1);
3811    setVFound(AArch64::MULv8i8, 2, MCP::MULADDv8i8_OP2);
3812    break;
3813  case AArch64::ADDv16i8:
3814    setVFound(AArch64::MULv16i8, 1, MCP::MULADDv16i8_OP1);
3815    setVFound(AArch64::MULv16i8, 2, MCP::MULADDv16i8_OP2);
3816    break;
3817  case AArch64::ADDv4i16:
3818    setVFound(AArch64::MULv4i16, 1, MCP::MULADDv4i16_OP1);
3819    setVFound(AArch64::MULv4i16, 2, MCP::MULADDv4i16_OP2);
3820    setVFound(AArch64::MULv4i16_indexed, 1, MCP::MULADDv4i16_indexed_OP1);
3821    setVFound(AArch64::MULv4i16_indexed, 2, MCP::MULADDv4i16_indexed_OP2);
3822    break;
3823  case AArch64::ADDv8i16:
3824    setVFound(AArch64::MULv8i16, 1, MCP::MULADDv8i16_OP1);
3825    setVFound(AArch64::MULv8i16, 2, MCP::MULADDv8i16_OP2);
3826    setVFound(AArch64::MULv8i16_indexed, 1, MCP::MULADDv8i16_indexed_OP1);
3827    setVFound(AArch64::MULv8i16_indexed, 2, MCP::MULADDv8i16_indexed_OP2);
3828    break;
3829  case AArch64::ADDv2i32:
3830    setVFound(AArch64::MULv2i32, 1, MCP::MULADDv2i32_OP1);
3831    setVFound(AArch64::MULv2i32, 2, MCP::MULADDv2i32_OP2);
3832    setVFound(AArch64::MULv2i32_indexed, 1, MCP::MULADDv2i32_indexed_OP1);
3833    setVFound(AArch64::MULv2i32_indexed, 2, MCP::MULADDv2i32_indexed_OP2);
3834    break;
3835  case AArch64::ADDv4i32:
3836    setVFound(AArch64::MULv4i32, 1, MCP::MULADDv4i32_OP1);
3837    setVFound(AArch64::MULv4i32, 2, MCP::MULADDv4i32_OP2);
3838    setVFound(AArch64::MULv4i32_indexed, 1, MCP::MULADDv4i32_indexed_OP1);
3839    setVFound(AArch64::MULv4i32_indexed, 2, MCP::MULADDv4i32_indexed_OP2);
3840    break;
3841  case AArch64::SUBv8i8:
3842    setVFound(AArch64::MULv8i8, 1, MCP::MULSUBv8i8_OP1);
3843    setVFound(AArch64::MULv8i8, 2, MCP::MULSUBv8i8_OP2);
3844    break;
3845  case AArch64::SUBv16i8:
3846    setVFound(AArch64::MULv16i8, 1, MCP::MULSUBv16i8_OP1);
3847    setVFound(AArch64::MULv16i8, 2, MCP::MULSUBv16i8_OP2);
3848    break;
3849  case AArch64::SUBv4i16:
3850    setVFound(AArch64::MULv4i16, 1, MCP::MULSUBv4i16_OP1);
3851    setVFound(AArch64::MULv4i16, 2, MCP::MULSUBv4i16_OP2);
3852    setVFound(AArch64::MULv4i16_indexed, 1, MCP::MULSUBv4i16_indexed_OP1);
3853    setVFound(AArch64::MULv4i16_indexed, 2, MCP::MULSUBv4i16_indexed_OP2);
3854    break;
3855  case AArch64::SUBv8i16:
3856    setVFound(AArch64::MULv8i16, 1, MCP::MULSUBv8i16_OP1);
3857    setVFound(AArch64::MULv8i16, 2, MCP::MULSUBv8i16_OP2);
3858    setVFound(AArch64::MULv8i16_indexed, 1, MCP::MULSUBv8i16_indexed_OP1);
3859    setVFound(AArch64::MULv8i16_indexed, 2, MCP::MULSUBv8i16_indexed_OP2);
3860    break;
3861  case AArch64::SUBv2i32:
3862    setVFound(AArch64::MULv2i32, 1, MCP::MULSUBv2i32_OP1);
3863    setVFound(AArch64::MULv2i32, 2, MCP::MULSUBv2i32_OP2);
3864    setVFound(AArch64::MULv2i32_indexed, 1, MCP::MULSUBv2i32_indexed_OP1);
3865    setVFound(AArch64::MULv2i32_indexed, 2, MCP::MULSUBv2i32_indexed_OP2);
3866    break;
3867  case AArch64::SUBv4i32:
3868    setVFound(AArch64::MULv4i32, 1, MCP::MULSUBv4i32_OP1);
3869    setVFound(AArch64::MULv4i32, 2, MCP::MULSUBv4i32_OP2);
3870    setVFound(AArch64::MULv4i32_indexed, 1, MCP::MULSUBv4i32_indexed_OP1);
3871    setVFound(AArch64::MULv4i32_indexed, 2, MCP::MULSUBv4i32_indexed_OP2);
3872    break;
3873  }
3874  return Found;
3875}
3876/// Floating-Point Support
3877
3878/// Find instructions that can be turned into madd.
3879static bool getFMAPatterns(MachineInstr &Root,
3880                           SmallVectorImpl<MachineCombinerPattern> &Patterns) {
3881
3882  if (!isCombineInstrCandidateFP(Root))
3883    return false;
3884
3885  MachineBasicBlock &MBB = *Root.getParent();
3886  bool Found = false;
3887
3888  auto Match = [&](int Opcode, int Operand,
3889                   MachineCombinerPattern Pattern) -> bool {
3890    if (canCombineWithFMUL(MBB, Root.getOperand(Operand), Opcode)) {
3891      Patterns.push_back(Pattern);
3892      return true;
3893    }
3894    return false;
3895  };
3896
3897  typedef MachineCombinerPattern MCP;
3898
3899  switch (Root.getOpcode()) {
3900  default:
3901    assert(false && "Unsupported FP instruction in combiner\n");
3902    break;
3903  case AArch64::FADDHrr:
3904    assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
3905           "FADDHrr does not have register operands");
3906
3907    Found  = Match(AArch64::FMULHrr, 1, MCP::FMULADDH_OP1);
3908    Found |= Match(AArch64::FMULHrr, 2, MCP::FMULADDH_OP2);
3909    break;
3910  case AArch64::FADDSrr:
3911    assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
3912           "FADDSrr does not have register operands");
3913
3914    Found |= Match(AArch64::FMULSrr, 1, MCP::FMULADDS_OP1) ||
3915             Match(AArch64::FMULv1i32_indexed, 1, MCP::FMLAv1i32_indexed_OP1);
3916
3917    Found |= Match(AArch64::FMULSrr, 2, MCP::FMULADDS_OP2) ||
3918             Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLAv1i32_indexed_OP2);
3919    break;
3920  case AArch64::FADDDrr:
3921    Found |= Match(AArch64::FMULDrr, 1, MCP::FMULADDD_OP1) ||
3922             Match(AArch64::FMULv1i64_indexed, 1, MCP::FMLAv1i64_indexed_OP1);
3923
3924    Found |= Match(AArch64::FMULDrr, 2, MCP::FMULADDD_OP2) ||
3925             Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLAv1i64_indexed_OP2);
3926    break;
3927  case AArch64::FADDv4f16:
3928    Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLAv4i16_indexed_OP1) ||
3929             Match(AArch64::FMULv4f16, 1, MCP::FMLAv4f16_OP1);
3930
3931    Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLAv4i16_indexed_OP2) ||
3932             Match(AArch64::FMULv4f16, 2, MCP::FMLAv4f16_OP2);
3933    break;
3934  case AArch64::FADDv8f16:
3935    Found |= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLAv8i16_indexed_OP1) ||
3936             Match(AArch64::FMULv8f16, 1, MCP::FMLAv8f16_OP1);
3937
3938    Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLAv8i16_indexed_OP2) ||
3939             Match(AArch64::FMULv8f16, 2, MCP::FMLAv8f16_OP2);
3940    break;
3941  case AArch64::FADDv2f32:
3942    Found |= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLAv2i32_indexed_OP1) ||
3943             Match(AArch64::FMULv2f32, 1, MCP::FMLAv2f32_OP1);
3944
3945    Found |= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLAv2i32_indexed_OP2) ||
3946             Match(AArch64::FMULv2f32, 2, MCP::FMLAv2f32_OP2);
3947    break;
3948  case AArch64::FADDv2f64:
3949    Found |= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLAv2i64_indexed_OP1) ||
3950             Match(AArch64::FMULv2f64, 1, MCP::FMLAv2f64_OP1);
3951
3952    Found |= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLAv2i64_indexed_OP2) ||
3953             Match(AArch64::FMULv2f64, 2, MCP::FMLAv2f64_OP2);
3954    break;
3955  case AArch64::FADDv4f32:
3956    Found |= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLAv4i32_indexed_OP1) ||
3957             Match(AArch64::FMULv4f32, 1, MCP::FMLAv4f32_OP1);
3958
3959    Found |= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLAv4i32_indexed_OP2) ||
3960             Match(AArch64::FMULv4f32, 2, MCP::FMLAv4f32_OP2);
3961    break;
3962  case AArch64::FSUBHrr:
3963    Found  = Match(AArch64::FMULHrr, 1, MCP::FMULSUBH_OP1);
3964    Found |= Match(AArch64::FMULHrr, 2, MCP::FMULSUBH_OP2);
3965    Found |= Match(AArch64::FNMULHrr, 1, MCP::FNMULSUBH_OP1);
3966    break;
3967  case AArch64::FSUBSrr:
3968    Found = Match(AArch64::FMULSrr, 1, MCP::FMULSUBS_OP1);
3969
3970    Found |= Match(AArch64::FMULSrr, 2, MCP::FMULSUBS_OP2) ||
3971             Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLSv1i32_indexed_OP2);
3972
3973    Found |= Match(AArch64::FNMULSrr, 1, MCP::FNMULSUBS_OP1);
3974    break;
3975  case AArch64::FSUBDrr:
3976    Found = Match(AArch64::FMULDrr, 1, MCP::FMULSUBD_OP1);
3977
3978    Found |= Match(AArch64::FMULDrr, 2, MCP::FMULSUBD_OP2) ||
3979             Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLSv1i64_indexed_OP2);
3980
3981    Found |= Match(AArch64::FNMULDrr, 1, MCP::FNMULSUBD_OP1);
3982    break;
3983  case AArch64::FSUBv4f16:
3984    Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLSv4i16_indexed_OP2) ||
3985             Match(AArch64::FMULv4f16, 2, MCP::FMLSv4f16_OP2);
3986
3987    Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLSv4i16_indexed_OP1) ||
3988             Match(AArch64::FMULv4f16, 1, MCP::FMLSv4f16_OP1);
3989    break;
3990  case AArch64::FSUBv8f16:
3991    Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLSv8i16_indexed_OP2) ||
3992             Match(AArch64::FMULv8f16, 2, MCP::FMLSv8f16_OP2);
3993
3994    Found |= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLSv8i16_indexed_OP1) ||
3995             Match(AArch64::FMULv8f16, 1, MCP::FMLSv8f16_OP1);
3996    break;
3997  case AArch64::FSUBv2f32:
3998    Found |= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLSv2i32_indexed_OP2) ||
3999             Match(AArch64::FMULv2f32, 2, MCP::FMLSv2f32_OP2);
4000
4001    Found |= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLSv2i32_indexed_OP1) ||
4002             Match(AArch64::FMULv2f32, 1, MCP::FMLSv2f32_OP1);
4003    break;
4004  case AArch64::FSUBv2f64:
4005    Found |= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLSv2i64_indexed_OP2) ||
4006             Match(AArch64::FMULv2f64, 2, MCP::FMLSv2f64_OP2);
4007
4008    Found |= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLSv2i64_indexed_OP1) ||
4009             Match(AArch64::FMULv2f64, 1, MCP::FMLSv2f64_OP1);
4010    break;
4011  case AArch64::FSUBv4f32:
4012    Found |= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLSv4i32_indexed_OP2) ||
4013             Match(AArch64::FMULv4f32, 2, MCP::FMLSv4f32_OP2);
4014
4015    Found |= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLSv4i32_indexed_OP1) ||
4016             Match(AArch64::FMULv4f32, 1, MCP::FMLSv4f32_OP1);
4017    break;
4018  }
4019  return Found;
4020}
4021
4022/// Return true when a code sequence can improve throughput. It
4023/// should be called only for instructions in loops.
4024/// \param Pattern - combiner pattern
4025bool AArch64InstrInfo::isThroughputPattern(
4026    MachineCombinerPattern Pattern) const {
4027  switch (Pattern) {
4028  default:
4029    break;
4030  case MachineCombinerPattern::FMULADDH_OP1:
4031  case MachineCombinerPattern::FMULADDH_OP2:
4032  case MachineCombinerPattern::FMULSUBH_OP1:
4033  case MachineCombinerPattern::FMULSUBH_OP2:
4034  case MachineCombinerPattern::FMULADDS_OP1:
4035  case MachineCombinerPattern::FMULADDS_OP2:
4036  case MachineCombinerPattern::FMULSUBS_OP1:
4037  case MachineCombinerPattern::FMULSUBS_OP2:
4038  case MachineCombinerPattern::FMULADDD_OP1:
4039  case MachineCombinerPattern::FMULADDD_OP2:
4040  case MachineCombinerPattern::FMULSUBD_OP1:
4041  case MachineCombinerPattern::FMULSUBD_OP2:
4042  case MachineCombinerPattern::FNMULSUBH_OP1:
4043  case MachineCombinerPattern::FNMULSUBS_OP1:
4044  case MachineCombinerPattern::FNMULSUBD_OP1:
4045  case MachineCombinerPattern::FMLAv4i16_indexed_OP1:
4046  case MachineCombinerPattern::FMLAv4i16_indexed_OP2:
4047  case MachineCombinerPattern::FMLAv8i16_indexed_OP1:
4048  case MachineCombinerPattern::FMLAv8i16_indexed_OP2:
4049  case MachineCombinerPattern::FMLAv1i32_indexed_OP1:
4050  case MachineCombinerPattern::FMLAv1i32_indexed_OP2:
4051  case MachineCombinerPattern::FMLAv1i64_indexed_OP1:
4052  case MachineCombinerPattern::FMLAv1i64_indexed_OP2:
4053  case MachineCombinerPattern::FMLAv4f16_OP2:
4054  case MachineCombinerPattern::FMLAv4f16_OP1:
4055  case MachineCombinerPattern::FMLAv8f16_OP1:
4056  case MachineCombinerPattern::FMLAv8f16_OP2:
4057  case MachineCombinerPattern::FMLAv2f32_OP2:
4058  case MachineCombinerPattern::FMLAv2f32_OP1:
4059  case MachineCombinerPattern::FMLAv2f64_OP1:
4060  case MachineCombinerPattern::FMLAv2f64_OP2:
4061  case MachineCombinerPattern::FMLAv2i32_indexed_OP1:
4062  case MachineCombinerPattern::FMLAv2i32_indexed_OP2:
4063  case MachineCombinerPattern::FMLAv2i64_indexed_OP1:
4064  case MachineCombinerPattern::FMLAv2i64_indexed_OP2:
4065  case MachineCombinerPattern::FMLAv4f32_OP1:
4066  case MachineCombinerPattern::FMLAv4f32_OP2:
4067  case MachineCombinerPattern::FMLAv4i32_indexed_OP1:
4068  case MachineCombinerPattern::FMLAv4i32_indexed_OP2:
4069  case MachineCombinerPattern::FMLSv4i16_indexed_OP1:
4070  case MachineCombinerPattern::FMLSv4i16_indexed_OP2:
4071  case MachineCombinerPattern::FMLSv8i16_indexed_OP1:
4072  case MachineCombinerPattern::FMLSv8i16_indexed_OP2:
4073  case MachineCombinerPattern::FMLSv1i32_indexed_OP2:
4074  case MachineCombinerPattern::FMLSv1i64_indexed_OP2:
4075  case MachineCombinerPattern::FMLSv2i32_indexed_OP2:
4076  case MachineCombinerPattern::FMLSv2i64_indexed_OP2:
4077  case MachineCombinerPattern::FMLSv4f16_OP1:
4078  case MachineCombinerPattern::FMLSv4f16_OP2:
4079  case MachineCombinerPattern::FMLSv8f16_OP1:
4080  case MachineCombinerPattern::FMLSv8f16_OP2:
4081  case MachineCombinerPattern::FMLSv2f32_OP2:
4082  case MachineCombinerPattern::FMLSv2f64_OP2:
4083  case MachineCombinerPattern::FMLSv4i32_indexed_OP2:
4084  case MachineCombinerPattern::FMLSv4f32_OP2:
4085  case MachineCombinerPattern::MULADDv8i8_OP1:
4086  case MachineCombinerPattern::MULADDv8i8_OP2:
4087  case MachineCombinerPattern::MULADDv16i8_OP1:
4088  case MachineCombinerPattern::MULADDv16i8_OP2:
4089  case MachineCombinerPattern::MULADDv4i16_OP1:
4090  case MachineCombinerPattern::MULADDv4i16_OP2:
4091  case MachineCombinerPattern::MULADDv8i16_OP1:
4092  case MachineCombinerPattern::MULADDv8i16_OP2:
4093  case MachineCombinerPattern::MULADDv2i32_OP1:
4094  case MachineCombinerPattern::MULADDv2i32_OP2:
4095  case MachineCombinerPattern::MULADDv4i32_OP1:
4096  case MachineCombinerPattern::MULADDv4i32_OP2:
4097  case MachineCombinerPattern::MULSUBv8i8_OP1:
4098  case MachineCombinerPattern::MULSUBv8i8_OP2:
4099  case MachineCombinerPattern::MULSUBv16i8_OP1:
4100  case MachineCombinerPattern::MULSUBv16i8_OP2:
4101  case MachineCombinerPattern::MULSUBv4i16_OP1:
4102  case MachineCombinerPattern::MULSUBv4i16_OP2:
4103  case MachineCombinerPattern::MULSUBv8i16_OP1:
4104  case MachineCombinerPattern::MULSUBv8i16_OP2:
4105  case MachineCombinerPattern::MULSUBv2i32_OP1:
4106  case MachineCombinerPattern::MULSUBv2i32_OP2:
4107  case MachineCombinerPattern::MULSUBv4i32_OP1:
4108  case MachineCombinerPattern::MULSUBv4i32_OP2:
4109  case MachineCombinerPattern::MULADDv4i16_indexed_OP1:
4110  case MachineCombinerPattern::MULADDv4i16_indexed_OP2:
4111  case MachineCombinerPattern::MULADDv8i16_indexed_OP1:
4112  case MachineCombinerPattern::MULADDv8i16_indexed_OP2:
4113  case MachineCombinerPattern::MULADDv2i32_indexed_OP1:
4114  case MachineCombinerPattern::MULADDv2i32_indexed_OP2:
4115  case MachineCombinerPattern::MULADDv4i32_indexed_OP1:
4116  case MachineCombinerPattern::MULADDv4i32_indexed_OP2:
4117  case MachineCombinerPattern::MULSUBv4i16_indexed_OP1:
4118  case MachineCombinerPattern::MULSUBv4i16_indexed_OP2:
4119  case MachineCombinerPattern::MULSUBv8i16_indexed_OP1:
4120  case MachineCombinerPattern::MULSUBv8i16_indexed_OP2:
4121  case MachineCombinerPattern::MULSUBv2i32_indexed_OP1:
4122  case MachineCombinerPattern::MULSUBv2i32_indexed_OP2:
4123  case MachineCombinerPattern::MULSUBv4i32_indexed_OP1:
4124  case MachineCombinerPattern::MULSUBv4i32_indexed_OP2:
4125    return true;
4126  } // end switch (Pattern)
4127  return false;
4128}
4129/// Return true when there is potentially a faster code sequence for an
4130/// instruction chain ending in \p Root. All potential patterns are listed in
4131/// the \p Pattern vector. Pattern should be sorted in priority order since the
4132/// pattern evaluator stops checking as soon as it finds a faster sequence.
4133
4134bool AArch64InstrInfo::getMachineCombinerPatterns(
4135    MachineInstr &Root,
4136    SmallVectorImpl<MachineCombinerPattern> &Patterns) const {
4137  // Integer patterns
4138  if (getMaddPatterns(Root, Patterns))
4139    return true;
4140  // Floating point patterns
4141  if (getFMAPatterns(Root, Patterns))
4142    return true;
4143
4144  return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns);
4145}
4146
4147enum class FMAInstKind { Default, Indexed, Accumulator };
4148/// genFusedMultiply - Generate fused multiply instructions.
4149/// This function supports both integer and floating point instructions.
4150/// A typical example:
4151///  F|MUL I=A,B,0
4152///  F|ADD R,I,C
4153///  ==> F|MADD R,A,B,C
4154/// \param MF Containing MachineFunction
4155/// \param MRI Register information
4156/// \param TII Target information
4157/// \param Root is the F|ADD instruction
4158/// \param [out] InsInstrs is a vector of machine instructions and will
4159/// contain the generated madd instruction
4160/// \param IdxMulOpd is index of operand in Root that is the result of
4161/// the F|MUL. In the example above IdxMulOpd is 1.
4162/// \param MaddOpc the opcode fo the f|madd instruction
4163/// \param RC Register class of operands
4164/// \param kind of fma instruction (addressing mode) to be generated
4165/// \param ReplacedAddend is the result register from the instruction
4166/// replacing the non-combined operand, if any.
4167static MachineInstr *
4168genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI,
4169                 const TargetInstrInfo *TII, MachineInstr &Root,
4170                 SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IdxMulOpd,
4171                 unsigned MaddOpc, const TargetRegisterClass *RC,
4172                 FMAInstKind kind = FMAInstKind::Default,
4173                 const Register *ReplacedAddend = nullptr) {
4174  assert(IdxMulOpd == 1 || IdxMulOpd == 2);
4175
4176  unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1;
4177  MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
4178  Register ResultReg = Root.getOperand(0).getReg();
4179  Register SrcReg0 = MUL->getOperand(1).getReg();
4180  bool Src0IsKill = MUL->getOperand(1).isKill();
4181  Register SrcReg1 = MUL->getOperand(2).getReg();
4182  bool Src1IsKill = MUL->getOperand(2).isKill();
4183
4184  unsigned SrcReg2;
4185  bool Src2IsKill;
4186  if (ReplacedAddend) {
4187    // If we just generated a new addend, we must be it's only use.
4188    SrcReg2 = *ReplacedAddend;
4189    Src2IsKill = true;
4190  } else {
4191    SrcReg2 = Root.getOperand(IdxOtherOpd).getReg();
4192    Src2IsKill = Root.getOperand(IdxOtherOpd).isKill();
4193  }
4194
4195  if (Register::isVirtualRegister(ResultReg))
4196    MRI.constrainRegClass(ResultReg, RC);
4197  if (Register::isVirtualRegister(SrcReg0))
4198    MRI.constrainRegClass(SrcReg0, RC);
4199  if (Register::isVirtualRegister(SrcReg1))
4200    MRI.constrainRegClass(SrcReg1, RC);
4201  if (Register::isVirtualRegister(SrcReg2))
4202    MRI.constrainRegClass(SrcReg2, RC);
4203
4204  MachineInstrBuilder MIB;
4205  if (kind == FMAInstKind::Default)
4206    MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
4207              .addReg(SrcReg0, getKillRegState(Src0IsKill))
4208              .addReg(SrcReg1, getKillRegState(Src1IsKill))
4209              .addReg(SrcReg2, getKillRegState(Src2IsKill));
4210  else if (kind == FMAInstKind::Indexed)
4211    MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
4212              .addReg(SrcReg2, getKillRegState(Src2IsKill))
4213              .addReg(SrcReg0, getKillRegState(Src0IsKill))
4214              .addReg(SrcReg1, getKillRegState(Src1IsKill))
4215              .addImm(MUL->getOperand(3).getImm());
4216  else if (kind == FMAInstKind::Accumulator)
4217    MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
4218              .addReg(SrcReg2, getKillRegState(Src2IsKill))
4219              .addReg(SrcReg0, getKillRegState(Src0IsKill))
4220              .addReg(SrcReg1, getKillRegState(Src1IsKill));
4221  else
4222    assert(false && "Invalid FMA instruction kind \n");
4223  // Insert the MADD (MADD, FMA, FMS, FMLA, FMSL)
4224  InsInstrs.push_back(MIB);
4225  return MUL;
4226}
4227
4228/// genFusedMultiplyAcc - Helper to generate fused multiply accumulate
4229/// instructions.
4230///
4231/// \see genFusedMultiply
4232static MachineInstr *genFusedMultiplyAcc(
4233    MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII,
4234    MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs,
4235    unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC) {
4236  return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
4237                          FMAInstKind::Accumulator);
4238}
4239
4240/// genNeg - Helper to generate an intermediate negation of the second operand
4241/// of Root
4242static Register genNeg(MachineFunction &MF, MachineRegisterInfo &MRI,
4243                       const TargetInstrInfo *TII, MachineInstr &Root,
4244                       SmallVectorImpl<MachineInstr *> &InsInstrs,
4245                       DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
4246                       unsigned MnegOpc, const TargetRegisterClass *RC) {
4247  Register NewVR = MRI.createVirtualRegister(RC);
4248  MachineInstrBuilder MIB =
4249      BuildMI(MF, Root.getDebugLoc(), TII->get(MnegOpc), NewVR)
4250          .add(Root.getOperand(2));
4251  InsInstrs.push_back(MIB);
4252
4253  assert(InstrIdxForVirtReg.empty());
4254  InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4255
4256  return NewVR;
4257}
4258
4259/// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate
4260/// instructions with an additional negation of the accumulator
4261static MachineInstr *genFusedMultiplyAccNeg(
4262    MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII,
4263    MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs,
4264    DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd,
4265    unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) {
4266  assert(IdxMulOpd == 1);
4267
4268  Register NewVR =
4269      genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC);
4270  return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
4271                          FMAInstKind::Accumulator, &NewVR);
4272}
4273
4274/// genFusedMultiplyIdx - Helper to generate fused multiply accumulate
4275/// instructions.
4276///
4277/// \see genFusedMultiply
4278static MachineInstr *genFusedMultiplyIdx(
4279    MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII,
4280    MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs,
4281    unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC) {
4282  return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
4283                          FMAInstKind::Indexed);
4284}
4285
4286/// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate
4287/// instructions with an additional negation of the accumulator
4288static MachineInstr *genFusedMultiplyIdxNeg(
4289    MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII,
4290    MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs,
4291    DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd,
4292    unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) {
4293  assert(IdxMulOpd == 1);
4294
4295  Register NewVR =
4296      genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC);
4297
4298  return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
4299                          FMAInstKind::Indexed, &NewVR);
4300}
4301
4302/// genMaddR - Generate madd instruction and combine mul and add using
4303/// an extra virtual register
4304/// Example - an ADD intermediate needs to be stored in a register:
4305///   MUL I=A,B,0
4306///   ADD R,I,Imm
4307///   ==> ORR  V, ZR, Imm
4308///   ==> MADD R,A,B,V
4309/// \param MF Containing MachineFunction
4310/// \param MRI Register information
4311/// \param TII Target information
4312/// \param Root is the ADD instruction
4313/// \param [out] InsInstrs is a vector of machine instructions and will
4314/// contain the generated madd instruction
4315/// \param IdxMulOpd is index of operand in Root that is the result of
4316/// the MUL. In the example above IdxMulOpd is 1.
4317/// \param MaddOpc the opcode fo the madd instruction
4318/// \param VR is a virtual register that holds the value of an ADD operand
4319/// (V in the example above).
4320/// \param RC Register class of operands
4321static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI,
4322                              const TargetInstrInfo *TII, MachineInstr &Root,
4323                              SmallVectorImpl<MachineInstr *> &InsInstrs,
4324                              unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR,
4325                              const TargetRegisterClass *RC) {
4326  assert(IdxMulOpd == 1 || IdxMulOpd == 2);
4327
4328  MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
4329  Register ResultReg = Root.getOperand(0).getReg();
4330  Register SrcReg0 = MUL->getOperand(1).getReg();
4331  bool Src0IsKill = MUL->getOperand(1).isKill();
4332  Register SrcReg1 = MUL->getOperand(2).getReg();
4333  bool Src1IsKill = MUL->getOperand(2).isKill();
4334
4335  if (Register::isVirtualRegister(ResultReg))
4336    MRI.constrainRegClass(ResultReg, RC);
4337  if (Register::isVirtualRegister(SrcReg0))
4338    MRI.constrainRegClass(SrcReg0, RC);
4339  if (Register::isVirtualRegister(SrcReg1))
4340    MRI.constrainRegClass(SrcReg1, RC);
4341  if (Register::isVirtualRegister(VR))
4342    MRI.constrainRegClass(VR, RC);
4343
4344  MachineInstrBuilder MIB =
4345      BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
4346          .addReg(SrcReg0, getKillRegState(Src0IsKill))
4347          .addReg(SrcReg1, getKillRegState(Src1IsKill))
4348          .addReg(VR);
4349  // Insert the MADD
4350  InsInstrs.push_back(MIB);
4351  return MUL;
4352}
4353
4354/// When getMachineCombinerPatterns() finds potential patterns,
4355/// this function generates the instructions that could replace the
4356/// original code sequence
4357void AArch64InstrInfo::genAlternativeCodeSequence(
4358    MachineInstr &Root, MachineCombinerPattern Pattern,
4359    SmallVectorImpl<MachineInstr *> &InsInstrs,
4360    SmallVectorImpl<MachineInstr *> &DelInstrs,
4361    DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
4362  MachineBasicBlock &MBB = *Root.getParent();
4363  MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
4364  MachineFunction &MF = *MBB.getParent();
4365  const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
4366
4367  MachineInstr *MUL;
4368  const TargetRegisterClass *RC;
4369  unsigned Opc;
4370  switch (Pattern) {
4371  default:
4372    // Reassociate instructions.
4373    TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,
4374                                                DelInstrs, InstrIdxForVirtReg);
4375    return;
4376  case MachineCombinerPattern::MULADDW_OP1:
4377  case MachineCombinerPattern::MULADDX_OP1:
4378    // MUL I=A,B,0
4379    // ADD R,I,C
4380    // ==> MADD R,A,B,C
4381    // --- Create(MADD);
4382    if (Pattern == MachineCombinerPattern::MULADDW_OP1) {
4383      Opc = AArch64::MADDWrrr;
4384      RC = &AArch64::GPR32RegClass;
4385    } else {
4386      Opc = AArch64::MADDXrrr;
4387      RC = &AArch64::GPR64RegClass;
4388    }
4389    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4390    break;
4391  case MachineCombinerPattern::MULADDW_OP2:
4392  case MachineCombinerPattern::MULADDX_OP2:
4393    // MUL I=A,B,0
4394    // ADD R,C,I
4395    // ==> MADD R,A,B,C
4396    // --- Create(MADD);
4397    if (Pattern == MachineCombinerPattern::MULADDW_OP2) {
4398      Opc = AArch64::MADDWrrr;
4399      RC = &AArch64::GPR32RegClass;
4400    } else {
4401      Opc = AArch64::MADDXrrr;
4402      RC = &AArch64::GPR64RegClass;
4403    }
4404    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4405    break;
4406  case MachineCombinerPattern::MULADDWI_OP1:
4407  case MachineCombinerPattern::MULADDXI_OP1: {
4408    // MUL I=A,B,0
4409    // ADD R,I,Imm
4410    // ==> ORR  V, ZR, Imm
4411    // ==> MADD R,A,B,V
4412    // --- Create(MADD);
4413    const TargetRegisterClass *OrrRC;
4414    unsigned BitSize, OrrOpc, ZeroReg;
4415    if (Pattern == MachineCombinerPattern::MULADDWI_OP1) {
4416      OrrOpc = AArch64::ORRWri;
4417      OrrRC = &AArch64::GPR32spRegClass;
4418      BitSize = 32;
4419      ZeroReg = AArch64::WZR;
4420      Opc = AArch64::MADDWrrr;
4421      RC = &AArch64::GPR32RegClass;
4422    } else {
4423      OrrOpc = AArch64::ORRXri;
4424      OrrRC = &AArch64::GPR64spRegClass;
4425      BitSize = 64;
4426      ZeroReg = AArch64::XZR;
4427      Opc = AArch64::MADDXrrr;
4428      RC = &AArch64::GPR64RegClass;
4429    }
4430    Register NewVR = MRI.createVirtualRegister(OrrRC);
4431    uint64_t Imm = Root.getOperand(2).getImm();
4432
4433    if (Root.getOperand(3).isImm()) {
4434      unsigned Val = Root.getOperand(3).getImm();
4435      Imm = Imm << Val;
4436    }
4437    uint64_t UImm = SignExtend64(Imm, BitSize);
4438    uint64_t Encoding;
4439    if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
4440      MachineInstrBuilder MIB1 =
4441          BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
4442              .addReg(ZeroReg)
4443              .addImm(Encoding);
4444      InsInstrs.push_back(MIB1);
4445      InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4446      MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
4447    }
4448    break;
4449  }
4450  case MachineCombinerPattern::MULSUBW_OP1:
4451  case MachineCombinerPattern::MULSUBX_OP1: {
4452    // MUL I=A,B,0
4453    // SUB R,I, C
4454    // ==> SUB  V, 0, C
4455    // ==> MADD R,A,B,V // = -C + A*B
4456    // --- Create(MADD);
4457    const TargetRegisterClass *SubRC;
4458    unsigned SubOpc, ZeroReg;
4459    if (Pattern == MachineCombinerPattern::MULSUBW_OP1) {
4460      SubOpc = AArch64::SUBWrr;
4461      SubRC = &AArch64::GPR32spRegClass;
4462      ZeroReg = AArch64::WZR;
4463      Opc = AArch64::MADDWrrr;
4464      RC = &AArch64::GPR32RegClass;
4465    } else {
4466      SubOpc = AArch64::SUBXrr;
4467      SubRC = &AArch64::GPR64spRegClass;
4468      ZeroReg = AArch64::XZR;
4469      Opc = AArch64::MADDXrrr;
4470      RC = &AArch64::GPR64RegClass;
4471    }
4472    Register NewVR = MRI.createVirtualRegister(SubRC);
4473    // SUB NewVR, 0, C
4474    MachineInstrBuilder MIB1 =
4475        BuildMI(MF, Root.getDebugLoc(), TII->get(SubOpc), NewVR)
4476            .addReg(ZeroReg)
4477            .add(Root.getOperand(2));
4478    InsInstrs.push_back(MIB1);
4479    InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4480    MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
4481    break;
4482  }
4483  case MachineCombinerPattern::MULSUBW_OP2:
4484  case MachineCombinerPattern::MULSUBX_OP2:
4485    // MUL I=A,B,0
4486    // SUB R,C,I
4487    // ==> MSUB R,A,B,C (computes C - A*B)
4488    // --- Create(MSUB);
4489    if (Pattern == MachineCombinerPattern::MULSUBW_OP2) {
4490      Opc = AArch64::MSUBWrrr;
4491      RC = &AArch64::GPR32RegClass;
4492    } else {
4493      Opc = AArch64::MSUBXrrr;
4494      RC = &AArch64::GPR64RegClass;
4495    }
4496    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4497    break;
4498  case MachineCombinerPattern::MULSUBWI_OP1:
4499  case MachineCombinerPattern::MULSUBXI_OP1: {
4500    // MUL I=A,B,0
4501    // SUB R,I, Imm
4502    // ==> ORR  V, ZR, -Imm
4503    // ==> MADD R,A,B,V // = -Imm + A*B
4504    // --- Create(MADD);
4505    const TargetRegisterClass *OrrRC;
4506    unsigned BitSize, OrrOpc, ZeroReg;
4507    if (Pattern == MachineCombinerPattern::MULSUBWI_OP1) {
4508      OrrOpc = AArch64::ORRWri;
4509      OrrRC = &AArch64::GPR32spRegClass;
4510      BitSize = 32;
4511      ZeroReg = AArch64::WZR;
4512      Opc = AArch64::MADDWrrr;
4513      RC = &AArch64::GPR32RegClass;
4514    } else {
4515      OrrOpc = AArch64::ORRXri;
4516      OrrRC = &AArch64::GPR64spRegClass;
4517      BitSize = 64;
4518      ZeroReg = AArch64::XZR;
4519      Opc = AArch64::MADDXrrr;
4520      RC = &AArch64::GPR64RegClass;
4521    }
4522    Register NewVR = MRI.createVirtualRegister(OrrRC);
4523    uint64_t Imm = Root.getOperand(2).getImm();
4524    if (Root.getOperand(3).isImm()) {
4525      unsigned Val = Root.getOperand(3).getImm();
4526      Imm = Imm << Val;
4527    }
4528    uint64_t UImm = SignExtend64(-Imm, BitSize);
4529    uint64_t Encoding;
4530    if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
4531      MachineInstrBuilder MIB1 =
4532          BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
4533              .addReg(ZeroReg)
4534              .addImm(Encoding);
4535      InsInstrs.push_back(MIB1);
4536      InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4537      MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
4538    }
4539    break;
4540  }
4541
4542  case MachineCombinerPattern::MULADDv8i8_OP1:
4543    Opc = AArch64::MLAv8i8;
4544    RC = &AArch64::FPR64RegClass;
4545    MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4546    break;
4547  case MachineCombinerPattern::MULADDv8i8_OP2:
4548    Opc = AArch64::MLAv8i8;
4549    RC = &AArch64::FPR64RegClass;
4550    MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4551    break;
4552  case MachineCombinerPattern::MULADDv16i8_OP1:
4553    Opc = AArch64::MLAv16i8;
4554    RC = &AArch64::FPR128RegClass;
4555    MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4556    break;
4557  case MachineCombinerPattern::MULADDv16i8_OP2:
4558    Opc = AArch64::MLAv16i8;
4559    RC = &AArch64::FPR128RegClass;
4560    MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4561    break;
4562  case MachineCombinerPattern::MULADDv4i16_OP1:
4563    Opc = AArch64::MLAv4i16;
4564    RC = &AArch64::FPR64RegClass;
4565    MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4566    break;
4567  case MachineCombinerPattern::MULADDv4i16_OP2:
4568    Opc = AArch64::MLAv4i16;
4569    RC = &AArch64::FPR64RegClass;
4570    MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4571    break;
4572  case MachineCombinerPattern::MULADDv8i16_OP1:
4573    Opc = AArch64::MLAv8i16;
4574    RC = &AArch64::FPR128RegClass;
4575    MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4576    break;
4577  case MachineCombinerPattern::MULADDv8i16_OP2:
4578    Opc = AArch64::MLAv8i16;
4579    RC = &AArch64::FPR128RegClass;
4580    MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4581    break;
4582  case MachineCombinerPattern::MULADDv2i32_OP1:
4583    Opc = AArch64::MLAv2i32;
4584    RC = &AArch64::FPR64RegClass;
4585    MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4586    break;
4587  case MachineCombinerPattern::MULADDv2i32_OP2:
4588    Opc = AArch64::MLAv2i32;
4589    RC = &AArch64::FPR64RegClass;
4590    MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4591    break;
4592  case MachineCombinerPattern::MULADDv4i32_OP1:
4593    Opc = AArch64::MLAv4i32;
4594    RC = &AArch64::FPR128RegClass;
4595    MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4596    break;
4597  case MachineCombinerPattern::MULADDv4i32_OP2:
4598    Opc = AArch64::MLAv4i32;
4599    RC = &AArch64::FPR128RegClass;
4600    MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4601    break;
4602
4603  case MachineCombinerPattern::MULSUBv8i8_OP1:
4604    Opc = AArch64::MLAv8i8;
4605    RC = &AArch64::FPR64RegClass;
4606    MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
4607                                 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i8,
4608                                 RC);
4609    break;
4610  case MachineCombinerPattern::MULSUBv8i8_OP2:
4611    Opc = AArch64::MLSv8i8;
4612    RC = &AArch64::FPR64RegClass;
4613    MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4614    break;
4615  case MachineCombinerPattern::MULSUBv16i8_OP1:
4616    Opc = AArch64::MLAv16i8;
4617    RC = &AArch64::FPR128RegClass;
4618    MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
4619                                 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv16i8,
4620                                 RC);
4621    break;
4622  case MachineCombinerPattern::MULSUBv16i8_OP2:
4623    Opc = AArch64::MLSv16i8;
4624    RC = &AArch64::FPR128RegClass;
4625    MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4626    break;
4627  case MachineCombinerPattern::MULSUBv4i16_OP1:
4628    Opc = AArch64::MLAv4i16;
4629    RC = &AArch64::FPR64RegClass;
4630    MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
4631                                 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16,
4632                                 RC);
4633    break;
4634  case MachineCombinerPattern::MULSUBv4i16_OP2:
4635    Opc = AArch64::MLSv4i16;
4636    RC = &AArch64::FPR64RegClass;
4637    MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4638    break;
4639  case MachineCombinerPattern::MULSUBv8i16_OP1:
4640    Opc = AArch64::MLAv8i16;
4641    RC = &AArch64::FPR128RegClass;
4642    MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
4643                                 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16,
4644                                 RC);
4645    break;
4646  case MachineCombinerPattern::MULSUBv8i16_OP2:
4647    Opc = AArch64::MLSv8i16;
4648    RC = &AArch64::FPR128RegClass;
4649    MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4650    break;
4651  case MachineCombinerPattern::MULSUBv2i32_OP1:
4652    Opc = AArch64::MLAv2i32;
4653    RC = &AArch64::FPR64RegClass;
4654    MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
4655                                 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32,
4656                                 RC);
4657    break;
4658  case MachineCombinerPattern::MULSUBv2i32_OP2:
4659    Opc = AArch64::MLSv2i32;
4660    RC = &AArch64::FPR64RegClass;
4661    MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4662    break;
4663  case MachineCombinerPattern::MULSUBv4i32_OP1:
4664    Opc = AArch64::MLAv4i32;
4665    RC = &AArch64::FPR128RegClass;
4666    MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
4667                                 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32,
4668                                 RC);
4669    break;
4670  case MachineCombinerPattern::MULSUBv4i32_OP2:
4671    Opc = AArch64::MLSv4i32;
4672    RC = &AArch64::FPR128RegClass;
4673    MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4674    break;
4675
4676  case MachineCombinerPattern::MULADDv4i16_indexed_OP1:
4677    Opc = AArch64::MLAv4i16_indexed;
4678    RC = &AArch64::FPR64RegClass;
4679    MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4680    break;
4681  case MachineCombinerPattern::MULADDv4i16_indexed_OP2:
4682    Opc = AArch64::MLAv4i16_indexed;
4683    RC = &AArch64::FPR64RegClass;
4684    MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4685    break;
4686  case MachineCombinerPattern::MULADDv8i16_indexed_OP1:
4687    Opc = AArch64::MLAv8i16_indexed;
4688    RC = &AArch64::FPR128RegClass;
4689    MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4690    break;
4691  case MachineCombinerPattern::MULADDv8i16_indexed_OP2:
4692    Opc = AArch64::MLAv8i16_indexed;
4693    RC = &AArch64::FPR128RegClass;
4694    MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4695    break;
4696  case MachineCombinerPattern::MULADDv2i32_indexed_OP1:
4697    Opc = AArch64::MLAv2i32_indexed;
4698    RC = &AArch64::FPR64RegClass;
4699    MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4700    break;
4701  case MachineCombinerPattern::MULADDv2i32_indexed_OP2:
4702    Opc = AArch64::MLAv2i32_indexed;
4703    RC = &AArch64::FPR64RegClass;
4704    MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4705    break;
4706  case MachineCombinerPattern::MULADDv4i32_indexed_OP1:
4707    Opc = AArch64::MLAv4i32_indexed;
4708    RC = &AArch64::FPR128RegClass;
4709    MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4710    break;
4711  case MachineCombinerPattern::MULADDv4i32_indexed_OP2:
4712    Opc = AArch64::MLAv4i32_indexed;
4713    RC = &AArch64::FPR128RegClass;
4714    MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4715    break;
4716
4717  case MachineCombinerPattern::MULSUBv4i16_indexed_OP1:
4718    Opc = AArch64::MLAv4i16_indexed;
4719    RC = &AArch64::FPR64RegClass;
4720    MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
4721                                 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16,
4722                                 RC);
4723    break;
4724  case MachineCombinerPattern::MULSUBv4i16_indexed_OP2:
4725    Opc = AArch64::MLSv4i16_indexed;
4726    RC = &AArch64::FPR64RegClass;
4727    MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4728    break;
4729  case MachineCombinerPattern::MULSUBv8i16_indexed_OP1:
4730    Opc = AArch64::MLAv8i16_indexed;
4731    RC = &AArch64::FPR128RegClass;
4732    MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
4733                                 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16,
4734                                 RC);
4735    break;
4736  case MachineCombinerPattern::MULSUBv8i16_indexed_OP2:
4737    Opc = AArch64::MLSv8i16_indexed;
4738    RC = &AArch64::FPR128RegClass;
4739    MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4740    break;
4741  case MachineCombinerPattern::MULSUBv2i32_indexed_OP1:
4742    Opc = AArch64::MLAv2i32_indexed;
4743    RC = &AArch64::FPR64RegClass;
4744    MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
4745                                 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32,
4746                                 RC);
4747    break;
4748  case MachineCombinerPattern::MULSUBv2i32_indexed_OP2:
4749    Opc = AArch64::MLSv2i32_indexed;
4750    RC = &AArch64::FPR64RegClass;
4751    MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4752    break;
4753  case MachineCombinerPattern::MULSUBv4i32_indexed_OP1:
4754    Opc = AArch64::MLAv4i32_indexed;
4755    RC = &AArch64::FPR128RegClass;
4756    MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
4757                                 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32,
4758                                 RC);
4759    break;
4760  case MachineCombinerPattern::MULSUBv4i32_indexed_OP2:
4761    Opc = AArch64::MLSv4i32_indexed;
4762    RC = &AArch64::FPR128RegClass;
4763    MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4764    break;
4765
4766  // Floating Point Support
4767  case MachineCombinerPattern::FMULADDH_OP1:
4768    Opc = AArch64::FMADDHrrr;
4769    RC = &AArch64::FPR16RegClass;
4770    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4771    break;
4772  case MachineCombinerPattern::FMULADDS_OP1:
4773    Opc = AArch64::FMADDSrrr;
4774    RC = &AArch64::FPR32RegClass;
4775    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4776    break;
4777  case MachineCombinerPattern::FMULADDD_OP1:
4778    Opc = AArch64::FMADDDrrr;
4779    RC = &AArch64::FPR64RegClass;
4780    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4781    break;
4782
4783  case MachineCombinerPattern::FMULADDH_OP2:
4784    Opc = AArch64::FMADDHrrr;
4785    RC = &AArch64::FPR16RegClass;
4786    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4787    break;
4788  case MachineCombinerPattern::FMULADDS_OP2:
4789    Opc = AArch64::FMADDSrrr;
4790    RC = &AArch64::FPR32RegClass;
4791    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4792    break;
4793  case MachineCombinerPattern::FMULADDD_OP2:
4794    Opc = AArch64::FMADDDrrr;
4795    RC = &AArch64::FPR64RegClass;
4796    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4797    break;
4798
4799  case MachineCombinerPattern::FMLAv1i32_indexed_OP1:
4800    Opc = AArch64::FMLAv1i32_indexed;
4801    RC = &AArch64::FPR32RegClass;
4802    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4803                           FMAInstKind::Indexed);
4804    break;
4805  case MachineCombinerPattern::FMLAv1i32_indexed_OP2:
4806    Opc = AArch64::FMLAv1i32_indexed;
4807    RC = &AArch64::FPR32RegClass;
4808    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4809                           FMAInstKind::Indexed);
4810    break;
4811
4812  case MachineCombinerPattern::FMLAv1i64_indexed_OP1:
4813    Opc = AArch64::FMLAv1i64_indexed;
4814    RC = &AArch64::FPR64RegClass;
4815    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4816                           FMAInstKind::Indexed);
4817    break;
4818  case MachineCombinerPattern::FMLAv1i64_indexed_OP2:
4819    Opc = AArch64::FMLAv1i64_indexed;
4820    RC = &AArch64::FPR64RegClass;
4821    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4822                           FMAInstKind::Indexed);
4823    break;
4824
4825  case MachineCombinerPattern::FMLAv4i16_indexed_OP1:
4826    RC = &AArch64::FPR64RegClass;
4827    Opc = AArch64::FMLAv4i16_indexed;
4828    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4829                           FMAInstKind::Indexed);
4830    break;
4831  case MachineCombinerPattern::FMLAv4f16_OP1:
4832    RC = &AArch64::FPR64RegClass;
4833    Opc = AArch64::FMLAv4f16;
4834    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4835                           FMAInstKind::Accumulator);
4836    break;
4837  case MachineCombinerPattern::FMLAv4i16_indexed_OP2:
4838    RC = &AArch64::FPR64RegClass;
4839    Opc = AArch64::FMLAv4i16_indexed;
4840    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4841                           FMAInstKind::Indexed);
4842    break;
4843  case MachineCombinerPattern::FMLAv4f16_OP2:
4844    RC = &AArch64::FPR64RegClass;
4845    Opc = AArch64::FMLAv4f16;
4846    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4847                           FMAInstKind::Accumulator);
4848    break;
4849
4850  case MachineCombinerPattern::FMLAv2i32_indexed_OP1:
4851  case MachineCombinerPattern::FMLAv2f32_OP1:
4852    RC = &AArch64::FPR64RegClass;
4853    if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP1) {
4854      Opc = AArch64::FMLAv2i32_indexed;
4855      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4856                             FMAInstKind::Indexed);
4857    } else {
4858      Opc = AArch64::FMLAv2f32;
4859      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4860                             FMAInstKind::Accumulator);
4861    }
4862    break;
4863  case MachineCombinerPattern::FMLAv2i32_indexed_OP2:
4864  case MachineCombinerPattern::FMLAv2f32_OP2:
4865    RC = &AArch64::FPR64RegClass;
4866    if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP2) {
4867      Opc = AArch64::FMLAv2i32_indexed;
4868      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4869                             FMAInstKind::Indexed);
4870    } else {
4871      Opc = AArch64::FMLAv2f32;
4872      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4873                             FMAInstKind::Accumulator);
4874    }
4875    break;
4876
4877  case MachineCombinerPattern::FMLAv8i16_indexed_OP1:
4878    RC = &AArch64::FPR128RegClass;
4879    Opc = AArch64::FMLAv8i16_indexed;
4880    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4881                           FMAInstKind::Indexed);
4882    break;
4883  case MachineCombinerPattern::FMLAv8f16_OP1:
4884    RC = &AArch64::FPR128RegClass;
4885    Opc = AArch64::FMLAv8f16;
4886    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4887                           FMAInstKind::Accumulator);
4888    break;
4889  case MachineCombinerPattern::FMLAv8i16_indexed_OP2:
4890    RC = &AArch64::FPR128RegClass;
4891    Opc = AArch64::FMLAv8i16_indexed;
4892    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4893                           FMAInstKind::Indexed);
4894    break;
4895  case MachineCombinerPattern::FMLAv8f16_OP2:
4896    RC = &AArch64::FPR128RegClass;
4897    Opc = AArch64::FMLAv8f16;
4898    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4899                           FMAInstKind::Accumulator);
4900    break;
4901
4902  case MachineCombinerPattern::FMLAv2i64_indexed_OP1:
4903  case MachineCombinerPattern::FMLAv2f64_OP1:
4904    RC = &AArch64::FPR128RegClass;
4905    if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP1) {
4906      Opc = AArch64::FMLAv2i64_indexed;
4907      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4908                             FMAInstKind::Indexed);
4909    } else {
4910      Opc = AArch64::FMLAv2f64;
4911      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4912                             FMAInstKind::Accumulator);
4913    }
4914    break;
4915  case MachineCombinerPattern::FMLAv2i64_indexed_OP2:
4916  case MachineCombinerPattern::FMLAv2f64_OP2:
4917    RC = &AArch64::FPR128RegClass;
4918    if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP2) {
4919      Opc = AArch64::FMLAv2i64_indexed;
4920      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4921                             FMAInstKind::Indexed);
4922    } else {
4923      Opc = AArch64::FMLAv2f64;
4924      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4925                             FMAInstKind::Accumulator);
4926    }
4927    break;
4928
4929  case MachineCombinerPattern::FMLAv4i32_indexed_OP1:
4930  case MachineCombinerPattern::FMLAv4f32_OP1:
4931    RC = &AArch64::FPR128RegClass;
4932    if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP1) {
4933      Opc = AArch64::FMLAv4i32_indexed;
4934      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4935                             FMAInstKind::Indexed);
4936    } else {
4937      Opc = AArch64::FMLAv4f32;
4938      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4939                             FMAInstKind::Accumulator);
4940    }
4941    break;
4942
4943  case MachineCombinerPattern::FMLAv4i32_indexed_OP2:
4944  case MachineCombinerPattern::FMLAv4f32_OP2:
4945    RC = &AArch64::FPR128RegClass;
4946    if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP2) {
4947      Opc = AArch64::FMLAv4i32_indexed;
4948      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4949                             FMAInstKind::Indexed);
4950    } else {
4951      Opc = AArch64::FMLAv4f32;
4952      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4953                             FMAInstKind::Accumulator);
4954    }
4955    break;
4956
4957  case MachineCombinerPattern::FMULSUBH_OP1:
4958    Opc = AArch64::FNMSUBHrrr;
4959    RC = &AArch64::FPR16RegClass;
4960    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4961    break;
4962  case MachineCombinerPattern::FMULSUBS_OP1:
4963    Opc = AArch64::FNMSUBSrrr;
4964    RC = &AArch64::FPR32RegClass;
4965    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4966    break;
4967  case MachineCombinerPattern::FMULSUBD_OP1:
4968    Opc = AArch64::FNMSUBDrrr;
4969    RC = &AArch64::FPR64RegClass;
4970    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4971    break;
4972
4973  case MachineCombinerPattern::FNMULSUBH_OP1:
4974    Opc = AArch64::FNMADDHrrr;
4975    RC = &AArch64::FPR16RegClass;
4976    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4977    break;
4978  case MachineCombinerPattern::FNMULSUBS_OP1:
4979    Opc = AArch64::FNMADDSrrr;
4980    RC = &AArch64::FPR32RegClass;
4981    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4982    break;
4983  case MachineCombinerPattern::FNMULSUBD_OP1:
4984    Opc = AArch64::FNMADDDrrr;
4985    RC = &AArch64::FPR64RegClass;
4986    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4987    break;
4988
4989  case MachineCombinerPattern::FMULSUBH_OP2:
4990    Opc = AArch64::FMSUBHrrr;
4991    RC = &AArch64::FPR16RegClass;
4992    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4993    break;
4994  case MachineCombinerPattern::FMULSUBS_OP2:
4995    Opc = AArch64::FMSUBSrrr;
4996    RC = &AArch64::FPR32RegClass;
4997    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4998    break;
4999  case MachineCombinerPattern::FMULSUBD_OP2:
5000    Opc = AArch64::FMSUBDrrr;
5001    RC = &AArch64::FPR64RegClass;
5002    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
5003    break;
5004
5005  case MachineCombinerPattern::FMLSv1i32_indexed_OP2:
5006    Opc = AArch64::FMLSv1i32_indexed;
5007    RC = &AArch64::FPR32RegClass;
5008    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5009                           FMAInstKind::Indexed);
5010    break;
5011
5012  case MachineCombinerPattern::FMLSv1i64_indexed_OP2:
5013    Opc = AArch64::FMLSv1i64_indexed;
5014    RC = &AArch64::FPR64RegClass;
5015    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5016                           FMAInstKind::Indexed);
5017    break;
5018
5019  case MachineCombinerPattern::FMLSv4f16_OP1:
5020  case MachineCombinerPattern::FMLSv4i16_indexed_OP1: {
5021    RC = &AArch64::FPR64RegClass;
5022    Register NewVR = MRI.createVirtualRegister(RC);
5023    MachineInstrBuilder MIB1 =
5024        BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv4f16), NewVR)
5025            .add(Root.getOperand(2));
5026    InsInstrs.push_back(MIB1);
5027    InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
5028    if (Pattern == MachineCombinerPattern::FMLSv4f16_OP1) {
5029      Opc = AArch64::FMLAv4f16;
5030      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5031                             FMAInstKind::Accumulator, &NewVR);
5032    } else {
5033      Opc = AArch64::FMLAv4i16_indexed;
5034      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5035                             FMAInstKind::Indexed, &NewVR);
5036    }
5037    break;
5038  }
5039  case MachineCombinerPattern::FMLSv4f16_OP2:
5040    RC = &AArch64::FPR64RegClass;
5041    Opc = AArch64::FMLSv4f16;
5042    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5043                           FMAInstKind::Accumulator);
5044    break;
5045  case MachineCombinerPattern::FMLSv4i16_indexed_OP2:
5046    RC = &AArch64::FPR64RegClass;
5047    Opc = AArch64::FMLSv4i16_indexed;
5048    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5049                           FMAInstKind::Indexed);
5050    break;
5051
5052  case MachineCombinerPattern::FMLSv2f32_OP2:
5053  case MachineCombinerPattern::FMLSv2i32_indexed_OP2:
5054    RC = &AArch64::FPR64RegClass;
5055    if (Pattern == MachineCombinerPattern::FMLSv2i32_indexed_OP2) {
5056      Opc = AArch64::FMLSv2i32_indexed;
5057      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5058                             FMAInstKind::Indexed);
5059    } else {
5060      Opc = AArch64::FMLSv2f32;
5061      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5062                             FMAInstKind::Accumulator);
5063    }
5064    break;
5065
5066  case MachineCombinerPattern::FMLSv8f16_OP1:
5067  case MachineCombinerPattern::FMLSv8i16_indexed_OP1: {
5068    RC = &AArch64::FPR128RegClass;
5069    Register NewVR = MRI.createVirtualRegister(RC);
5070    MachineInstrBuilder MIB1 =
5071        BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv8f16), NewVR)
5072            .add(Root.getOperand(2));
5073    InsInstrs.push_back(MIB1);
5074    InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
5075    if (Pattern == MachineCombinerPattern::FMLSv8f16_OP1) {
5076      Opc = AArch64::FMLAv8f16;
5077      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5078                             FMAInstKind::Accumulator, &NewVR);
5079    } else {
5080      Opc = AArch64::FMLAv8i16_indexed;
5081      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5082                             FMAInstKind::Indexed, &NewVR);
5083    }
5084    break;
5085  }
5086  case MachineCombinerPattern::FMLSv8f16_OP2:
5087    RC = &AArch64::FPR128RegClass;
5088    Opc = AArch64::FMLSv8f16;
5089    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5090                           FMAInstKind::Accumulator);
5091    break;
5092  case MachineCombinerPattern::FMLSv8i16_indexed_OP2:
5093    RC = &AArch64::FPR128RegClass;
5094    Opc = AArch64::FMLSv8i16_indexed;
5095    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5096                           FMAInstKind::Indexed);
5097    break;
5098
5099  case MachineCombinerPattern::FMLSv2f64_OP2:
5100  case MachineCombinerPattern::FMLSv2i64_indexed_OP2:
5101    RC = &AArch64::FPR128RegClass;
5102    if (Pattern == MachineCombinerPattern::FMLSv2i64_indexed_OP2) {
5103      Opc = AArch64::FMLSv2i64_indexed;
5104      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5105                             FMAInstKind::Indexed);
5106    } else {
5107      Opc = AArch64::FMLSv2f64;
5108      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5109                             FMAInstKind::Accumulator);
5110    }
5111    break;
5112
5113  case MachineCombinerPattern::FMLSv4f32_OP2:
5114  case MachineCombinerPattern::FMLSv4i32_indexed_OP2:
5115    RC = &AArch64::FPR128RegClass;
5116    if (Pattern == MachineCombinerPattern::FMLSv4i32_indexed_OP2) {
5117      Opc = AArch64::FMLSv4i32_indexed;
5118      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5119                             FMAInstKind::Indexed);
5120    } else {
5121      Opc = AArch64::FMLSv4f32;
5122      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5123                             FMAInstKind::Accumulator);
5124    }
5125    break;
5126  case MachineCombinerPattern::FMLSv2f32_OP1:
5127  case MachineCombinerPattern::FMLSv2i32_indexed_OP1: {
5128    RC = &AArch64::FPR64RegClass;
5129    Register NewVR = MRI.createVirtualRegister(RC);
5130    MachineInstrBuilder MIB1 =
5131        BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f32), NewVR)
5132            .add(Root.getOperand(2));
5133    InsInstrs.push_back(MIB1);
5134    InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
5135    if (Pattern == MachineCombinerPattern::FMLSv2i32_indexed_OP1) {
5136      Opc = AArch64::FMLAv2i32_indexed;
5137      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5138                             FMAInstKind::Indexed, &NewVR);
5139    } else {
5140      Opc = AArch64::FMLAv2f32;
5141      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5142                             FMAInstKind::Accumulator, &NewVR);
5143    }
5144    break;
5145  }
5146  case MachineCombinerPattern::FMLSv4f32_OP1:
5147  case MachineCombinerPattern::FMLSv4i32_indexed_OP1: {
5148    RC = &AArch64::FPR128RegClass;
5149    Register NewVR = MRI.createVirtualRegister(RC);
5150    MachineInstrBuilder MIB1 =
5151        BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv4f32), NewVR)
5152            .add(Root.getOperand(2));
5153    InsInstrs.push_back(MIB1);
5154    InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
5155    if (Pattern == MachineCombinerPattern::FMLSv4i32_indexed_OP1) {
5156      Opc = AArch64::FMLAv4i32_indexed;
5157      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5158                             FMAInstKind::Indexed, &NewVR);
5159    } else {
5160      Opc = AArch64::FMLAv4f32;
5161      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5162                             FMAInstKind::Accumulator, &NewVR);
5163    }
5164    break;
5165  }
5166  case MachineCombinerPattern::FMLSv2f64_OP1:
5167  case MachineCombinerPattern::FMLSv2i64_indexed_OP1: {
5168    RC = &AArch64::FPR128RegClass;
5169    Register NewVR = MRI.createVirtualRegister(RC);
5170    MachineInstrBuilder MIB1 =
5171        BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f64), NewVR)
5172            .add(Root.getOperand(2));
5173    InsInstrs.push_back(MIB1);
5174    InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
5175    if (Pattern == MachineCombinerPattern::FMLSv2i64_indexed_OP1) {
5176      Opc = AArch64::FMLAv2i64_indexed;
5177      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5178                             FMAInstKind::Indexed, &NewVR);
5179    } else {
5180      Opc = AArch64::FMLAv2f64;
5181      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5182                             FMAInstKind::Accumulator, &NewVR);
5183    }
5184    break;
5185  }
5186  } // end switch (Pattern)
5187  // Record MUL and ADD/SUB for deletion
5188  DelInstrs.push_back(MUL);
5189  DelInstrs.push_back(&Root);
5190}
5191
5192/// Replace csincr-branch sequence by simple conditional branch
5193///
5194/// Examples:
5195/// 1. \code
5196///   csinc  w9, wzr, wzr, <condition code>
5197///   tbnz   w9, #0, 0x44
5198///    \endcode
5199/// to
5200///    \code
5201///   b.<inverted condition code>
5202///    \endcode
5203///
5204/// 2. \code
5205///   csinc w9, wzr, wzr, <condition code>
5206///   tbz   w9, #0, 0x44
5207///    \endcode
5208/// to
5209///    \code
5210///   b.<condition code>
5211///    \endcode
5212///
5213/// Replace compare and branch sequence by TBZ/TBNZ instruction when the
5214/// compare's constant operand is power of 2.
5215///
5216/// Examples:
5217///    \code
5218///   and  w8, w8, #0x400
5219///   cbnz w8, L1
5220///    \endcode
5221/// to
5222///    \code
5223///   tbnz w8, #10, L1
5224///    \endcode
5225///
5226/// \param  MI Conditional Branch
5227/// \return True when the simple conditional branch is generated
5228///
5229bool AArch64InstrInfo::optimizeCondBranch(MachineInstr &MI) const {
5230  bool IsNegativeBranch = false;
5231  bool IsTestAndBranch = false;
5232  unsigned TargetBBInMI = 0;
5233  switch (MI.getOpcode()) {
5234  default:
5235    llvm_unreachable("Unknown branch instruction?");
5236  case AArch64::Bcc:
5237    return false;
5238  case AArch64::CBZW:
5239  case AArch64::CBZX:
5240    TargetBBInMI = 1;
5241    break;
5242  case AArch64::CBNZW:
5243  case AArch64::CBNZX:
5244    TargetBBInMI = 1;
5245    IsNegativeBranch = true;
5246    break;
5247  case AArch64::TBZW:
5248  case AArch64::TBZX:
5249    TargetBBInMI = 2;
5250    IsTestAndBranch = true;
5251    break;
5252  case AArch64::TBNZW:
5253  case AArch64::TBNZX:
5254    TargetBBInMI = 2;
5255    IsNegativeBranch = true;
5256    IsTestAndBranch = true;
5257    break;
5258  }
5259  // So we increment a zero register and test for bits other
5260  // than bit 0? Conservatively bail out in case the verifier
5261  // missed this case.
5262  if (IsTestAndBranch && MI.getOperand(1).getImm())
5263    return false;
5264
5265  // Find Definition.
5266  assert(MI.getParent() && "Incomplete machine instruciton\n");
5267  MachineBasicBlock *MBB = MI.getParent();
5268  MachineFunction *MF = MBB->getParent();
5269  MachineRegisterInfo *MRI = &MF->getRegInfo();
5270  Register VReg = MI.getOperand(0).getReg();
5271  if (!Register::isVirtualRegister(VReg))
5272    return false;
5273
5274  MachineInstr *DefMI = MRI->getVRegDef(VReg);
5275
5276  // Look through COPY instructions to find definition.
5277  while (DefMI->isCopy()) {
5278    Register CopyVReg = DefMI->getOperand(1).getReg();
5279    if (!MRI->hasOneNonDBGUse(CopyVReg))
5280      return false;
5281    if (!MRI->hasOneDef(CopyVReg))
5282      return false;
5283    DefMI = MRI->getVRegDef(CopyVReg);
5284  }
5285
5286  switch (DefMI->getOpcode()) {
5287  default:
5288    return false;
5289  // Fold AND into a TBZ/TBNZ if constant operand is power of 2.
5290  case AArch64::ANDWri:
5291  case AArch64::ANDXri: {
5292    if (IsTestAndBranch)
5293      return false;
5294    if (DefMI->getParent() != MBB)
5295      return false;
5296    if (!MRI->hasOneNonDBGUse(VReg))
5297      return false;
5298
5299    bool Is32Bit = (DefMI->getOpcode() == AArch64::ANDWri);
5300    uint64_t Mask = AArch64_AM::decodeLogicalImmediate(
5301        DefMI->getOperand(2).getImm(), Is32Bit ? 32 : 64);
5302    if (!isPowerOf2_64(Mask))
5303      return false;
5304
5305    MachineOperand &MO = DefMI->getOperand(1);
5306    Register NewReg = MO.getReg();
5307    if (!Register::isVirtualRegister(NewReg))
5308      return false;
5309
5310    assert(!MRI->def_empty(NewReg) && "Register must be defined.");
5311
5312    MachineBasicBlock &RefToMBB = *MBB;
5313    MachineBasicBlock *TBB = MI.getOperand(1).getMBB();
5314    DebugLoc DL = MI.getDebugLoc();
5315    unsigned Imm = Log2_64(Mask);
5316    unsigned Opc = (Imm < 32)
5317                       ? (IsNegativeBranch ? AArch64::TBNZW : AArch64::TBZW)
5318                       : (IsNegativeBranch ? AArch64::TBNZX : AArch64::TBZX);
5319    MachineInstr *NewMI = BuildMI(RefToMBB, MI, DL, get(Opc))
5320                              .addReg(NewReg)
5321                              .addImm(Imm)
5322                              .addMBB(TBB);
5323    // Register lives on to the CBZ now.
5324    MO.setIsKill(false);
5325
5326    // For immediate smaller than 32, we need to use the 32-bit
5327    // variant (W) in all cases. Indeed the 64-bit variant does not
5328    // allow to encode them.
5329    // Therefore, if the input register is 64-bit, we need to take the
5330    // 32-bit sub-part.
5331    if (!Is32Bit && Imm < 32)
5332      NewMI->getOperand(0).setSubReg(AArch64::sub_32);
5333    MI.eraseFromParent();
5334    return true;
5335  }
5336  // Look for CSINC
5337  case AArch64::CSINCWr:
5338  case AArch64::CSINCXr: {
5339    if (!(DefMI->getOperand(1).getReg() == AArch64::WZR &&
5340          DefMI->getOperand(2).getReg() == AArch64::WZR) &&
5341        !(DefMI->getOperand(1).getReg() == AArch64::XZR &&
5342          DefMI->getOperand(2).getReg() == AArch64::XZR))
5343      return false;
5344
5345    if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) != -1)
5346      return false;
5347
5348    AArch64CC::CondCode CC = (AArch64CC::CondCode)DefMI->getOperand(3).getImm();
5349    // Convert only when the condition code is not modified between
5350    // the CSINC and the branch. The CC may be used by other
5351    // instructions in between.
5352    if (areCFlagsAccessedBetweenInstrs(DefMI, MI, &getRegisterInfo(), AK_Write))
5353      return false;
5354    MachineBasicBlock &RefToMBB = *MBB;
5355    MachineBasicBlock *TBB = MI.getOperand(TargetBBInMI).getMBB();
5356    DebugLoc DL = MI.getDebugLoc();
5357    if (IsNegativeBranch)
5358      CC = AArch64CC::getInvertedCondCode(CC);
5359    BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB);
5360    MI.eraseFromParent();
5361    return true;
5362  }
5363  }
5364}
5365
5366std::pair<unsigned, unsigned>
5367AArch64InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
5368  const unsigned Mask = AArch64II::MO_FRAGMENT;
5369  return std::make_pair(TF & Mask, TF & ~Mask);
5370}
5371
5372ArrayRef<std::pair<unsigned, const char *>>
5373AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
5374  using namespace AArch64II;
5375
5376  static const std::pair<unsigned, const char *> TargetFlags[] = {
5377      {MO_PAGE, "aarch64-page"}, {MO_PAGEOFF, "aarch64-pageoff"},
5378      {MO_G3, "aarch64-g3"},     {MO_G2, "aarch64-g2"},
5379      {MO_G1, "aarch64-g1"},     {MO_G0, "aarch64-g0"},
5380      {MO_HI12, "aarch64-hi12"}};
5381  return makeArrayRef(TargetFlags);
5382}
5383
5384ArrayRef<std::pair<unsigned, const char *>>
5385AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
5386  using namespace AArch64II;
5387
5388  static const std::pair<unsigned, const char *> TargetFlags[] = {
5389      {MO_COFFSTUB, "aarch64-coffstub"},
5390      {MO_GOT, "aarch64-got"},
5391      {MO_NC, "aarch64-nc"},
5392      {MO_S, "aarch64-s"},
5393      {MO_TLS, "aarch64-tls"},
5394      {MO_DLLIMPORT, "aarch64-dllimport"},
5395      {MO_PREL, "aarch64-prel"},
5396      {MO_TAGGED, "aarch64-tagged"}};
5397  return makeArrayRef(TargetFlags);
5398}
5399
5400ArrayRef<std::pair<MachineMemOperand::Flags, const char *>>
5401AArch64InstrInfo::getSerializableMachineMemOperandTargetFlags() const {
5402  static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
5403      {{MOSuppressPair, "aarch64-suppress-pair"},
5404       {MOStridedAccess, "aarch64-strided-access"}};
5405  return makeArrayRef(TargetFlags);
5406}
5407
5408/// Constants defining how certain sequences should be outlined.
5409/// This encompasses how an outlined function should be called, and what kind of
5410/// frame should be emitted for that outlined function.
5411///
5412/// \p MachineOutlinerDefault implies that the function should be called with
5413/// a save and restore of LR to the stack.
5414///
5415/// That is,
5416///
5417/// I1     Save LR                    OUTLINED_FUNCTION:
5418/// I2 --> BL OUTLINED_FUNCTION       I1
5419/// I3     Restore LR                 I2
5420///                                   I3
5421///                                   RET
5422///
5423/// * Call construction overhead: 3 (save + BL + restore)
5424/// * Frame construction overhead: 1 (ret)
5425/// * Requires stack fixups? Yes
5426///
5427/// \p MachineOutlinerTailCall implies that the function is being created from
5428/// a sequence of instructions ending in a return.
5429///
5430/// That is,
5431///
5432/// I1                             OUTLINED_FUNCTION:
5433/// I2 --> B OUTLINED_FUNCTION     I1
5434/// RET                            I2
5435///                                RET
5436///
5437/// * Call construction overhead: 1 (B)
5438/// * Frame construction overhead: 0 (Return included in sequence)
5439/// * Requires stack fixups? No
5440///
5441/// \p MachineOutlinerNoLRSave implies that the function should be called using
5442/// a BL instruction, but doesn't require LR to be saved and restored. This
5443/// happens when LR is known to be dead.
5444///
5445/// That is,
5446///
5447/// I1                                OUTLINED_FUNCTION:
5448/// I2 --> BL OUTLINED_FUNCTION       I1
5449/// I3                                I2
5450///                                   I3
5451///                                   RET
5452///
5453/// * Call construction overhead: 1 (BL)
5454/// * Frame construction overhead: 1 (RET)
5455/// * Requires stack fixups? No
5456///
5457/// \p MachineOutlinerThunk implies that the function is being created from
5458/// a sequence of instructions ending in a call. The outlined function is
5459/// called with a BL instruction, and the outlined function tail-calls the
5460/// original call destination.
5461///
5462/// That is,
5463///
5464/// I1                                OUTLINED_FUNCTION:
5465/// I2 --> BL OUTLINED_FUNCTION       I1
5466/// BL f                              I2
5467///                                   B f
5468/// * Call construction overhead: 1 (BL)
5469/// * Frame construction overhead: 0
5470/// * Requires stack fixups? No
5471///
5472/// \p MachineOutlinerRegSave implies that the function should be called with a
5473/// save and restore of LR to an available register. This allows us to avoid
5474/// stack fixups. Note that this outlining variant is compatible with the
5475/// NoLRSave case.
5476///
5477/// That is,
5478///
5479/// I1     Save LR                    OUTLINED_FUNCTION:
5480/// I2 --> BL OUTLINED_FUNCTION       I1
5481/// I3     Restore LR                 I2
5482///                                   I3
5483///                                   RET
5484///
5485/// * Call construction overhead: 3 (save + BL + restore)
5486/// * Frame construction overhead: 1 (ret)
5487/// * Requires stack fixups? No
5488enum MachineOutlinerClass {
5489  MachineOutlinerDefault,  /// Emit a save, restore, call, and return.
5490  MachineOutlinerTailCall, /// Only emit a branch.
5491  MachineOutlinerNoLRSave, /// Emit a call and return.
5492  MachineOutlinerThunk,    /// Emit a call and tail-call.
5493  MachineOutlinerRegSave   /// Same as default, but save to a register.
5494};
5495
5496enum MachineOutlinerMBBFlags {
5497  LRUnavailableSomewhere = 0x2,
5498  HasCalls = 0x4,
5499  UnsafeRegsDead = 0x8
5500};
5501
5502unsigned
5503AArch64InstrInfo::findRegisterToSaveLRTo(const outliner::Candidate &C) const {
5504  assert(C.LRUWasSet && "LRU wasn't set?");
5505  MachineFunction *MF = C.getMF();
5506  const AArch64RegisterInfo *ARI = static_cast<const AArch64RegisterInfo *>(
5507      MF->getSubtarget().getRegisterInfo());
5508
5509  // Check if there is an available register across the sequence that we can
5510  // use.
5511  for (unsigned Reg : AArch64::GPR64RegClass) {
5512    if (!ARI->isReservedReg(*MF, Reg) &&
5513        Reg != AArch64::LR &&  // LR is not reserved, but don't use it.
5514        Reg != AArch64::X16 && // X16 is not guaranteed to be preserved.
5515        Reg != AArch64::X17 && // Ditto for X17.
5516        C.LRU.available(Reg) && C.UsedInSequence.available(Reg))
5517      return Reg;
5518  }
5519
5520  // No suitable register. Return 0.
5521  return 0u;
5522}
5523
5524static bool
5525outliningCandidatesSigningScopeConsensus(const outliner::Candidate &a,
5526                                         const outliner::Candidate &b) {
5527  const Function &Fa = a.getMF()->getFunction();
5528  const Function &Fb = b.getMF()->getFunction();
5529
5530  // If none of the functions have the "sign-return-address" attribute their
5531  // signing behaviour is equal
5532  if (!Fa.hasFnAttribute("sign-return-address") &&
5533      !Fb.hasFnAttribute("sign-return-address")) {
5534    return true;
5535  }
5536
5537  // If both functions have the "sign-return-address" attribute their signing
5538  // behaviour is equal, if the values of the attributes are equal
5539  if (Fa.hasFnAttribute("sign-return-address") &&
5540      Fb.hasFnAttribute("sign-return-address")) {
5541    StringRef ScopeA =
5542        Fa.getFnAttribute("sign-return-address").getValueAsString();
5543    StringRef ScopeB =
5544        Fb.getFnAttribute("sign-return-address").getValueAsString();
5545    return ScopeA.equals(ScopeB);
5546  }
5547
5548  // If function B doesn't have the "sign-return-address" attribute but A does,
5549  // the functions' signing behaviour is equal if A's value for
5550  // "sign-return-address" is "none" and vice versa.
5551  if (Fa.hasFnAttribute("sign-return-address")) {
5552    StringRef ScopeA =
5553        Fa.getFnAttribute("sign-return-address").getValueAsString();
5554    return ScopeA.equals("none");
5555  }
5556
5557  if (Fb.hasFnAttribute("sign-return-address")) {
5558    StringRef ScopeB =
5559        Fb.getFnAttribute("sign-return-address").getValueAsString();
5560    return ScopeB.equals("none");
5561  }
5562
5563  llvm_unreachable("Unkown combination of sign-return-address attributes");
5564}
5565
5566static bool
5567outliningCandidatesSigningKeyConsensus(const outliner::Candidate &a,
5568                                       const outliner::Candidate &b) {
5569  const Function &Fa = a.getMF()->getFunction();
5570  const Function &Fb = b.getMF()->getFunction();
5571
5572  // If none of the functions have the "sign-return-address-key" attribute
5573  // their keys are equal
5574  if (!Fa.hasFnAttribute("sign-return-address-key") &&
5575      !Fb.hasFnAttribute("sign-return-address-key")) {
5576    return true;
5577  }
5578
5579  // If both functions have the "sign-return-address-key" attribute their
5580  // keys are equal if the values of "sign-return-address-key" are equal
5581  if (Fa.hasFnAttribute("sign-return-address-key") &&
5582      Fb.hasFnAttribute("sign-return-address-key")) {
5583    StringRef KeyA =
5584        Fa.getFnAttribute("sign-return-address-key").getValueAsString();
5585    StringRef KeyB =
5586        Fb.getFnAttribute("sign-return-address-key").getValueAsString();
5587    return KeyA.equals(KeyB);
5588  }
5589
5590  // If B doesn't have the "sign-return-address-key" attribute, both keys are
5591  // equal, if function a has the default key (a_key)
5592  if (Fa.hasFnAttribute("sign-return-address-key")) {
5593    StringRef KeyA =
5594        Fa.getFnAttribute("sign-return-address-key").getValueAsString();
5595    return KeyA.equals_lower("a_key");
5596  }
5597
5598  if (Fb.hasFnAttribute("sign-return-address-key")) {
5599    StringRef KeyB =
5600        Fb.getFnAttribute("sign-return-address-key").getValueAsString();
5601    return KeyB.equals_lower("a_key");
5602  }
5603
5604  llvm_unreachable("Unkown combination of sign-return-address-key attributes");
5605}
5606
5607static bool outliningCandidatesV8_3OpsConsensus(const outliner::Candidate &a,
5608                                                const outliner::Candidate &b) {
5609  const AArch64Subtarget &SubtargetA =
5610      a.getMF()->getSubtarget<AArch64Subtarget>();
5611  const AArch64Subtarget &SubtargetB =
5612      b.getMF()->getSubtarget<AArch64Subtarget>();
5613  return SubtargetA.hasV8_3aOps() == SubtargetB.hasV8_3aOps();
5614}
5615
5616outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo(
5617    std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
5618  outliner::Candidate &FirstCand = RepeatedSequenceLocs[0];
5619  unsigned SequenceSize =
5620      std::accumulate(FirstCand.front(), std::next(FirstCand.back()), 0,
5621                      [this](unsigned Sum, const MachineInstr &MI) {
5622                        return Sum + getInstSizeInBytes(MI);
5623                      });
5624  unsigned NumBytesToCreateFrame = 0;
5625
5626  // We only allow outlining for functions having exactly matching return
5627  // address signing attributes, i.e., all share the same value for the
5628  // attribute "sign-return-address" and all share the same type of key they
5629  // are signed with.
5630  // Additionally we require all functions to simultaniously either support
5631  // v8.3a features or not. Otherwise an outlined function could get signed
5632  // using dedicated v8.3 instructions and a call from a function that doesn't
5633  // support v8.3 instructions would therefore be invalid.
5634  if (std::adjacent_find(
5635          RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
5636          [](const outliner::Candidate &a, const outliner::Candidate &b) {
5637            // Return true if a and b are non-equal w.r.t. return address
5638            // signing or support of v8.3a features
5639            if (outliningCandidatesSigningScopeConsensus(a, b) &&
5640                outliningCandidatesSigningKeyConsensus(a, b) &&
5641                outliningCandidatesV8_3OpsConsensus(a, b)) {
5642              return false;
5643            }
5644            return true;
5645          }) != RepeatedSequenceLocs.end()) {
5646    return outliner::OutlinedFunction();
5647  }
5648
5649  // Since at this point all candidates agree on their return address signing
5650  // picking just one is fine. If the candidate functions potentially sign their
5651  // return addresses, the outlined function should do the same. Note that in
5652  // the case of "sign-return-address"="non-leaf" this is an assumption: It is
5653  // not certainly true that the outlined function will have to sign its return
5654  // address but this decision is made later, when the decision to outline
5655  // has already been made.
5656  // The same holds for the number of additional instructions we need: On
5657  // v8.3a RET can be replaced by RETAA/RETAB and no AUT instruction is
5658  // necessary. However, at this point we don't know if the outlined function
5659  // will have a RET instruction so we assume the worst.
5660  const Function &FCF = FirstCand.getMF()->getFunction();
5661  const TargetRegisterInfo &TRI = getRegisterInfo();
5662  if (FCF.hasFnAttribute("sign-return-address")) {
5663    // One PAC and one AUT instructions
5664    NumBytesToCreateFrame += 8;
5665
5666    // We have to check if sp modifying instructions would get outlined.
5667    // If so we only allow outlining if sp is unchanged overall, so matching
5668    // sub and add instructions are okay to outline, all other sp modifications
5669    // are not
5670    auto hasIllegalSPModification = [&TRI](outliner::Candidate &C) {
5671      int SPValue = 0;
5672      MachineBasicBlock::iterator MBBI = C.front();
5673      for (;;) {
5674        if (MBBI->modifiesRegister(AArch64::SP, &TRI)) {
5675          switch (MBBI->getOpcode()) {
5676          case AArch64::ADDXri:
5677          case AArch64::ADDWri:
5678            assert(MBBI->getNumOperands() == 4 && "Wrong number of operands");
5679            assert(MBBI->getOperand(2).isImm() &&
5680                   "Expected operand to be immediate");
5681            assert(MBBI->getOperand(1).isReg() &&
5682                   "Expected operand to be a register");
5683            // Check if the add just increments sp. If so, we search for
5684            // matching sub instructions that decrement sp. If not, the
5685            // modification is illegal
5686            if (MBBI->getOperand(1).getReg() == AArch64::SP)
5687              SPValue += MBBI->getOperand(2).getImm();
5688            else
5689              return true;
5690            break;
5691          case AArch64::SUBXri:
5692          case AArch64::SUBWri:
5693            assert(MBBI->getNumOperands() == 4 && "Wrong number of operands");
5694            assert(MBBI->getOperand(2).isImm() &&
5695                   "Expected operand to be immediate");
5696            assert(MBBI->getOperand(1).isReg() &&
5697                   "Expected operand to be a register");
5698            // Check if the sub just decrements sp. If so, we search for
5699            // matching add instructions that increment sp. If not, the
5700            // modification is illegal
5701            if (MBBI->getOperand(1).getReg() == AArch64::SP)
5702              SPValue -= MBBI->getOperand(2).getImm();
5703            else
5704              return true;
5705            break;
5706          default:
5707            return true;
5708          }
5709        }
5710        if (MBBI == C.back())
5711          break;
5712        ++MBBI;
5713      }
5714      if (SPValue)
5715        return true;
5716      return false;
5717    };
5718    // Remove candidates with illegal stack modifying instructions
5719    RepeatedSequenceLocs.erase(std::remove_if(RepeatedSequenceLocs.begin(),
5720                                              RepeatedSequenceLocs.end(),
5721                                              hasIllegalSPModification),
5722                               RepeatedSequenceLocs.end());
5723
5724    // If the sequence doesn't have enough candidates left, then we're done.
5725    if (RepeatedSequenceLocs.size() < 2)
5726      return outliner::OutlinedFunction();
5727  }
5728
5729  // Properties about candidate MBBs that hold for all of them.
5730  unsigned FlagsSetInAll = 0xF;
5731
5732  // Compute liveness information for each candidate, and set FlagsSetInAll.
5733  std::for_each(RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
5734                [&FlagsSetInAll](outliner::Candidate &C) {
5735                  FlagsSetInAll &= C.Flags;
5736                });
5737
5738  // According to the AArch64 Procedure Call Standard, the following are
5739  // undefined on entry/exit from a function call:
5740  //
5741  // * Registers x16, x17, (and thus w16, w17)
5742  // * Condition codes (and thus the NZCV register)
5743  //
5744  // Because if this, we can't outline any sequence of instructions where
5745  // one
5746  // of these registers is live into/across it. Thus, we need to delete
5747  // those
5748  // candidates.
5749  auto CantGuaranteeValueAcrossCall = [&TRI](outliner::Candidate &C) {
5750    // If the unsafe registers in this block are all dead, then we don't need
5751    // to compute liveness here.
5752    if (C.Flags & UnsafeRegsDead)
5753      return false;
5754    C.initLRU(TRI);
5755    LiveRegUnits LRU = C.LRU;
5756    return (!LRU.available(AArch64::W16) || !LRU.available(AArch64::W17) ||
5757            !LRU.available(AArch64::NZCV));
5758  };
5759
5760  // Are there any candidates where those registers are live?
5761  if (!(FlagsSetInAll & UnsafeRegsDead)) {
5762    // Erase every candidate that violates the restrictions above. (It could be
5763    // true that we have viable candidates, so it's not worth bailing out in
5764    // the case that, say, 1 out of 20 candidates violate the restructions.)
5765    RepeatedSequenceLocs.erase(std::remove_if(RepeatedSequenceLocs.begin(),
5766                                              RepeatedSequenceLocs.end(),
5767                                              CantGuaranteeValueAcrossCall),
5768                               RepeatedSequenceLocs.end());
5769
5770    // If the sequence doesn't have enough candidates left, then we're done.
5771    if (RepeatedSequenceLocs.size() < 2)
5772      return outliner::OutlinedFunction();
5773  }
5774
5775  // At this point, we have only "safe" candidates to outline. Figure out
5776  // frame + call instruction information.
5777
5778  unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back()->getOpcode();
5779
5780  // Helper lambda which sets call information for every candidate.
5781  auto SetCandidateCallInfo =
5782      [&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) {
5783        for (outliner::Candidate &C : RepeatedSequenceLocs)
5784          C.setCallInfo(CallID, NumBytesForCall);
5785      };
5786
5787  unsigned FrameID = MachineOutlinerDefault;
5788  NumBytesToCreateFrame += 4;
5789
5790  bool HasBTI = any_of(RepeatedSequenceLocs, [](outliner::Candidate &C) {
5791    return C.getMF()->getFunction().hasFnAttribute("branch-target-enforcement");
5792  });
5793
5794  // Returns true if an instructions is safe to fix up, false otherwise.
5795  auto IsSafeToFixup = [this, &TRI](MachineInstr &MI) {
5796    if (MI.isCall())
5797      return true;
5798
5799    if (!MI.modifiesRegister(AArch64::SP, &TRI) &&
5800        !MI.readsRegister(AArch64::SP, &TRI))
5801      return true;
5802
5803    // Any modification of SP will break our code to save/restore LR.
5804    // FIXME: We could handle some instructions which add a constant
5805    // offset to SP, with a bit more work.
5806    if (MI.modifiesRegister(AArch64::SP, &TRI))
5807      return false;
5808
5809    // At this point, we have a stack instruction that we might need to
5810    // fix up. We'll handle it if it's a load or store.
5811    if (MI.mayLoadOrStore()) {
5812      const MachineOperand *Base; // Filled with the base operand of MI.
5813      int64_t Offset;             // Filled with the offset of MI.
5814
5815      // Does it allow us to offset the base operand and is the base the
5816      // register SP?
5817      if (!getMemOperandWithOffset(MI, Base, Offset, &TRI) || !Base->isReg() ||
5818          Base->getReg() != AArch64::SP)
5819        return false;
5820
5821      // Find the minimum/maximum offset for this instruction and check
5822      // if fixing it up would be in range.
5823      int64_t MinOffset,
5824          MaxOffset;  // Unscaled offsets for the instruction.
5825      unsigned Scale; // The scale to multiply the offsets by.
5826      unsigned DummyWidth;
5827      getMemOpInfo(MI.getOpcode(), Scale, DummyWidth, MinOffset, MaxOffset);
5828
5829      Offset += 16; // Update the offset to what it would be if we outlined.
5830      if (Offset < MinOffset * Scale || Offset > MaxOffset * Scale)
5831        return false;
5832
5833      // It's in range, so we can outline it.
5834      return true;
5835    }
5836
5837    // FIXME: Add handling for instructions like "add x0, sp, #8".
5838
5839    // We can't fix it up, so don't outline it.
5840    return false;
5841  };
5842
5843  // True if it's possible to fix up each stack instruction in this sequence.
5844  // Important for frames/call variants that modify the stack.
5845  bool AllStackInstrsSafe = std::all_of(
5846      FirstCand.front(), std::next(FirstCand.back()), IsSafeToFixup);
5847
5848  // If the last instruction in any candidate is a terminator, then we should
5849  // tail call all of the candidates.
5850  if (RepeatedSequenceLocs[0].back()->isTerminator()) {
5851    FrameID = MachineOutlinerTailCall;
5852    NumBytesToCreateFrame = 0;
5853    SetCandidateCallInfo(MachineOutlinerTailCall, 4);
5854  }
5855
5856  else if (LastInstrOpcode == AArch64::BL ||
5857           (LastInstrOpcode == AArch64::BLR && !HasBTI)) {
5858    // FIXME: Do we need to check if the code after this uses the value of LR?
5859    FrameID = MachineOutlinerThunk;
5860    NumBytesToCreateFrame = 0;
5861    SetCandidateCallInfo(MachineOutlinerThunk, 4);
5862  }
5863
5864  else {
5865    // We need to decide how to emit calls + frames. We can always emit the same
5866    // frame if we don't need to save to the stack. If we have to save to the
5867    // stack, then we need a different frame.
5868    unsigned NumBytesNoStackCalls = 0;
5869    std::vector<outliner::Candidate> CandidatesWithoutStackFixups;
5870
5871    // Check if we have to save LR.
5872    for (outliner::Candidate &C : RepeatedSequenceLocs) {
5873      C.initLRU(TRI);
5874
5875      // If we have a noreturn caller, then we're going to be conservative and
5876      // say that we have to save LR. If we don't have a ret at the end of the
5877      // block, then we can't reason about liveness accurately.
5878      //
5879      // FIXME: We can probably do better than always disabling this in
5880      // noreturn functions by fixing up the liveness info.
5881      bool IsNoReturn =
5882          C.getMF()->getFunction().hasFnAttribute(Attribute::NoReturn);
5883
5884      // Is LR available? If so, we don't need a save.
5885      if (C.LRU.available(AArch64::LR) && !IsNoReturn) {
5886        NumBytesNoStackCalls += 4;
5887        C.setCallInfo(MachineOutlinerNoLRSave, 4);
5888        CandidatesWithoutStackFixups.push_back(C);
5889      }
5890
5891      // Is an unused register available? If so, we won't modify the stack, so
5892      // we can outline with the same frame type as those that don't save LR.
5893      else if (findRegisterToSaveLRTo(C)) {
5894        NumBytesNoStackCalls += 12;
5895        C.setCallInfo(MachineOutlinerRegSave, 12);
5896        CandidatesWithoutStackFixups.push_back(C);
5897      }
5898
5899      // Is SP used in the sequence at all? If not, we don't have to modify
5900      // the stack, so we are guaranteed to get the same frame.
5901      else if (C.UsedInSequence.available(AArch64::SP)) {
5902        NumBytesNoStackCalls += 12;
5903        C.setCallInfo(MachineOutlinerDefault, 12);
5904        CandidatesWithoutStackFixups.push_back(C);
5905      }
5906
5907      // If we outline this, we need to modify the stack. Pretend we don't
5908      // outline this by saving all of its bytes.
5909      else {
5910        NumBytesNoStackCalls += SequenceSize;
5911      }
5912    }
5913
5914    // If there are no places where we have to save LR, then note that we
5915    // don't have to update the stack. Otherwise, give every candidate the
5916    // default call type, as long as it's safe to do so.
5917    if (!AllStackInstrsSafe ||
5918        NumBytesNoStackCalls <= RepeatedSequenceLocs.size() * 12) {
5919      RepeatedSequenceLocs = CandidatesWithoutStackFixups;
5920      FrameID = MachineOutlinerNoLRSave;
5921    } else {
5922      SetCandidateCallInfo(MachineOutlinerDefault, 12);
5923    }
5924
5925    // If we dropped all of the candidates, bail out here.
5926    if (RepeatedSequenceLocs.size() < 2) {
5927      RepeatedSequenceLocs.clear();
5928      return outliner::OutlinedFunction();
5929    }
5930  }
5931
5932  // Does every candidate's MBB contain a call? If so, then we might have a call
5933  // in the range.
5934  if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
5935    // Check if the range contains a call. These require a save + restore of the
5936    // link register.
5937    bool ModStackToSaveLR = false;
5938    if (std::any_of(FirstCand.front(), FirstCand.back(),
5939                    [](const MachineInstr &MI) { return MI.isCall(); }))
5940      ModStackToSaveLR = true;
5941
5942    // Handle the last instruction separately. If this is a tail call, then the
5943    // last instruction is a call. We don't want to save + restore in this case.
5944    // However, it could be possible that the last instruction is a call without
5945    // it being valid to tail call this sequence. We should consider this as
5946    // well.
5947    else if (FrameID != MachineOutlinerThunk &&
5948             FrameID != MachineOutlinerTailCall && FirstCand.back()->isCall())
5949      ModStackToSaveLR = true;
5950
5951    if (ModStackToSaveLR) {
5952      // We can't fix up the stack. Bail out.
5953      if (!AllStackInstrsSafe) {
5954        RepeatedSequenceLocs.clear();
5955        return outliner::OutlinedFunction();
5956      }
5957
5958      // Save + restore LR.
5959      NumBytesToCreateFrame += 8;
5960    }
5961  }
5962
5963  return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize,
5964                                    NumBytesToCreateFrame, FrameID);
5965}
5966
5967bool AArch64InstrInfo::isFunctionSafeToOutlineFrom(
5968    MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
5969  const Function &F = MF.getFunction();
5970
5971  // Can F be deduplicated by the linker? If it can, don't outline from it.
5972  if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
5973    return false;
5974
5975  // Don't outline from functions with section markings; the program could
5976  // expect that all the code is in the named section.
5977  // FIXME: Allow outlining from multiple functions with the same section
5978  // marking.
5979  if (F.hasSection())
5980    return false;
5981
5982  // Outlining from functions with redzones is unsafe since the outliner may
5983  // modify the stack. Check if hasRedZone is true or unknown; if yes, don't
5984  // outline from it.
5985  AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
5986  if (!AFI || AFI->hasRedZone().getValueOr(true))
5987    return false;
5988
5989  // It's safe to outline from MF.
5990  return true;
5991}
5992
5993bool AArch64InstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB,
5994                                              unsigned &Flags) const {
5995  // Check if LR is available through all of the MBB. If it's not, then set
5996  // a flag.
5997  assert(MBB.getParent()->getRegInfo().tracksLiveness() &&
5998         "Suitable Machine Function for outlining must track liveness");
5999  LiveRegUnits LRU(getRegisterInfo());
6000
6001  std::for_each(MBB.rbegin(), MBB.rend(),
6002                [&LRU](MachineInstr &MI) { LRU.accumulate(MI); });
6003
6004  // Check if each of the unsafe registers are available...
6005  bool W16AvailableInBlock = LRU.available(AArch64::W16);
6006  bool W17AvailableInBlock = LRU.available(AArch64::W17);
6007  bool NZCVAvailableInBlock = LRU.available(AArch64::NZCV);
6008
6009  // If all of these are dead (and not live out), we know we don't have to check
6010  // them later.
6011  if (W16AvailableInBlock && W17AvailableInBlock && NZCVAvailableInBlock)
6012    Flags |= MachineOutlinerMBBFlags::UnsafeRegsDead;
6013
6014  // Now, add the live outs to the set.
6015  LRU.addLiveOuts(MBB);
6016
6017  // If any of these registers is available in the MBB, but also a live out of
6018  // the block, then we know outlining is unsafe.
6019  if (W16AvailableInBlock && !LRU.available(AArch64::W16))
6020    return false;
6021  if (W17AvailableInBlock && !LRU.available(AArch64::W17))
6022    return false;
6023  if (NZCVAvailableInBlock && !LRU.available(AArch64::NZCV))
6024    return false;
6025
6026  // Check if there's a call inside this MachineBasicBlock. If there is, then
6027  // set a flag.
6028  if (any_of(MBB, [](MachineInstr &MI) { return MI.isCall(); }))
6029    Flags |= MachineOutlinerMBBFlags::HasCalls;
6030
6031  MachineFunction *MF = MBB.getParent();
6032
6033  // In the event that we outline, we may have to save LR. If there is an
6034  // available register in the MBB, then we'll always save LR there. Check if
6035  // this is true.
6036  bool CanSaveLR = false;
6037  const AArch64RegisterInfo *ARI = static_cast<const AArch64RegisterInfo *>(
6038      MF->getSubtarget().getRegisterInfo());
6039
6040  // Check if there is an available register across the sequence that we can
6041  // use.
6042  for (unsigned Reg : AArch64::GPR64RegClass) {
6043    if (!ARI->isReservedReg(*MF, Reg) && Reg != AArch64::LR &&
6044        Reg != AArch64::X16 && Reg != AArch64::X17 && LRU.available(Reg)) {
6045      CanSaveLR = true;
6046      break;
6047    }
6048  }
6049
6050  // Check if we have a register we can save LR to, and if LR was used
6051  // somewhere. If both of those things are true, then we need to evaluate the
6052  // safety of outlining stack instructions later.
6053  if (!CanSaveLR && !LRU.available(AArch64::LR))
6054    Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere;
6055
6056  return true;
6057}
6058
6059outliner::InstrType
6060AArch64InstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT,
6061                                   unsigned Flags) const {
6062  MachineInstr &MI = *MIT;
6063  MachineBasicBlock *MBB = MI.getParent();
6064  MachineFunction *MF = MBB->getParent();
6065  AArch64FunctionInfo *FuncInfo = MF->getInfo<AArch64FunctionInfo>();
6066
6067  // Don't outline anything used for return address signing. The outlined
6068  // function will get signed later if needed
6069  switch (MI.getOpcode()) {
6070  case AArch64::PACIASP:
6071  case AArch64::PACIBSP:
6072  case AArch64::AUTIASP:
6073  case AArch64::AUTIBSP:
6074  case AArch64::RETAA:
6075  case AArch64::RETAB:
6076  case AArch64::EMITBKEY:
6077    return outliner::InstrType::Illegal;
6078  }
6079
6080  // Don't outline LOHs.
6081  if (FuncInfo->getLOHRelated().count(&MI))
6082    return outliner::InstrType::Illegal;
6083
6084  // Don't allow debug values to impact outlining type.
6085  if (MI.isDebugInstr() || MI.isIndirectDebugValue())
6086    return outliner::InstrType::Invisible;
6087
6088  // At this point, KILL instructions don't really tell us much so we can go
6089  // ahead and skip over them.
6090  if (MI.isKill())
6091    return outliner::InstrType::Invisible;
6092
6093  // Is this a terminator for a basic block?
6094  if (MI.isTerminator()) {
6095
6096    // Is this the end of a function?
6097    if (MI.getParent()->succ_empty())
6098      return outliner::InstrType::Legal;
6099
6100    // It's not, so don't outline it.
6101    return outliner::InstrType::Illegal;
6102  }
6103
6104  // Make sure none of the operands are un-outlinable.
6105  for (const MachineOperand &MOP : MI.operands()) {
6106    if (MOP.isCPI() || MOP.isJTI() || MOP.isCFIIndex() || MOP.isFI() ||
6107        MOP.isTargetIndex())
6108      return outliner::InstrType::Illegal;
6109
6110    // If it uses LR or W30 explicitly, then don't touch it.
6111    if (MOP.isReg() && !MOP.isImplicit() &&
6112        (MOP.getReg() == AArch64::LR || MOP.getReg() == AArch64::W30))
6113      return outliner::InstrType::Illegal;
6114  }
6115
6116  // Special cases for instructions that can always be outlined, but will fail
6117  // the later tests. e.g, ADRPs, which are PC-relative use LR, but can always
6118  // be outlined because they don't require a *specific* value to be in LR.
6119  if (MI.getOpcode() == AArch64::ADRP)
6120    return outliner::InstrType::Legal;
6121
6122  // If MI is a call we might be able to outline it. We don't want to outline
6123  // any calls that rely on the position of items on the stack. When we outline
6124  // something containing a call, we have to emit a save and restore of LR in
6125  // the outlined function. Currently, this always happens by saving LR to the
6126  // stack. Thus, if we outline, say, half the parameters for a function call
6127  // plus the call, then we'll break the callee's expectations for the layout
6128  // of the stack.
6129  //
6130  // FIXME: Allow calls to functions which construct a stack frame, as long
6131  // as they don't access arguments on the stack.
6132  // FIXME: Figure out some way to analyze functions defined in other modules.
6133  // We should be able to compute the memory usage based on the IR calling
6134  // convention, even if we can't see the definition.
6135  if (MI.isCall()) {
6136    // Get the function associated with the call. Look at each operand and find
6137    // the one that represents the callee and get its name.
6138    const Function *Callee = nullptr;
6139    for (const MachineOperand &MOP : MI.operands()) {
6140      if (MOP.isGlobal()) {
6141        Callee = dyn_cast<Function>(MOP.getGlobal());
6142        break;
6143      }
6144    }
6145
6146    // Never outline calls to mcount.  There isn't any rule that would require
6147    // this, but the Linux kernel's "ftrace" feature depends on it.
6148    if (Callee && Callee->getName() == "\01_mcount")
6149      return outliner::InstrType::Illegal;
6150
6151    // If we don't know anything about the callee, assume it depends on the
6152    // stack layout of the caller. In that case, it's only legal to outline
6153    // as a tail-call.  Whitelist the call instructions we know about so we
6154    // don't get unexpected results with call pseudo-instructions.
6155    auto UnknownCallOutlineType = outliner::InstrType::Illegal;
6156    if (MI.getOpcode() == AArch64::BLR || MI.getOpcode() == AArch64::BL)
6157      UnknownCallOutlineType = outliner::InstrType::LegalTerminator;
6158
6159    if (!Callee)
6160      return UnknownCallOutlineType;
6161
6162    // We have a function we have information about. Check it if it's something
6163    // can safely outline.
6164    MachineFunction *CalleeMF = MF->getMMI().getMachineFunction(*Callee);
6165
6166    // We don't know what's going on with the callee at all. Don't touch it.
6167    if (!CalleeMF)
6168      return UnknownCallOutlineType;
6169
6170    // Check if we know anything about the callee saves on the function. If we
6171    // don't, then don't touch it, since that implies that we haven't
6172    // computed anything about its stack frame yet.
6173    MachineFrameInfo &MFI = CalleeMF->getFrameInfo();
6174    if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 ||
6175        MFI.getNumObjects() > 0)
6176      return UnknownCallOutlineType;
6177
6178    // At this point, we can say that CalleeMF ought to not pass anything on the
6179    // stack. Therefore, we can outline it.
6180    return outliner::InstrType::Legal;
6181  }
6182
6183  // Don't outline positions.
6184  if (MI.isPosition())
6185    return outliner::InstrType::Illegal;
6186
6187  // Don't touch the link register or W30.
6188  if (MI.readsRegister(AArch64::W30, &getRegisterInfo()) ||
6189      MI.modifiesRegister(AArch64::W30, &getRegisterInfo()))
6190    return outliner::InstrType::Illegal;
6191
6192  // Don't outline BTI instructions, because that will prevent the outlining
6193  // site from being indirectly callable.
6194  if (MI.getOpcode() == AArch64::HINT) {
6195    int64_t Imm = MI.getOperand(0).getImm();
6196    if (Imm == 32 || Imm == 34 || Imm == 36 || Imm == 38)
6197      return outliner::InstrType::Illegal;
6198  }
6199
6200  return outliner::InstrType::Legal;
6201}
6202
6203void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
6204  for (MachineInstr &MI : MBB) {
6205    const MachineOperand *Base;
6206    unsigned Width;
6207    int64_t Offset;
6208
6209    // Is this a load or store with an immediate offset with SP as the base?
6210    if (!MI.mayLoadOrStore() ||
6211        !getMemOperandWithOffsetWidth(MI, Base, Offset, Width, &RI) ||
6212        (Base->isReg() && Base->getReg() != AArch64::SP))
6213      continue;
6214
6215    // It is, so we have to fix it up.
6216    unsigned Scale;
6217    int64_t Dummy1, Dummy2;
6218
6219    MachineOperand &StackOffsetOperand = getMemOpBaseRegImmOfsOffsetOperand(MI);
6220    assert(StackOffsetOperand.isImm() && "Stack offset wasn't immediate!");
6221    getMemOpInfo(MI.getOpcode(), Scale, Width, Dummy1, Dummy2);
6222    assert(Scale != 0 && "Unexpected opcode!");
6223
6224    // We've pushed the return address to the stack, so add 16 to the offset.
6225    // This is safe, since we already checked if it would overflow when we
6226    // checked if this instruction was legal to outline.
6227    int64_t NewImm = (Offset + 16) / Scale;
6228    StackOffsetOperand.setImm(NewImm);
6229  }
6230}
6231
6232static void signOutlinedFunction(MachineFunction &MF, MachineBasicBlock &MBB,
6233                                 bool ShouldSignReturnAddr,
6234                                 bool ShouldSignReturnAddrWithAKey) {
6235  if (ShouldSignReturnAddr) {
6236    MachineBasicBlock::iterator MBBPAC = MBB.begin();
6237    MachineBasicBlock::iterator MBBAUT = MBB.getFirstTerminator();
6238    const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
6239    const TargetInstrInfo *TII = Subtarget.getInstrInfo();
6240    DebugLoc DL;
6241
6242    if (MBBAUT != MBB.end())
6243      DL = MBBAUT->getDebugLoc();
6244
6245    // At the very beginning of the basic block we insert the following
6246    // depending on the key type
6247    //
6248    // a_key:                   b_key:
6249    //    PACIASP                   EMITBKEY
6250    //    CFI_INSTRUCTION           PACIBSP
6251    //                              CFI_INSTRUCTION
6252    if (ShouldSignReturnAddrWithAKey) {
6253      BuildMI(MBB, MBBPAC, DebugLoc(), TII->get(AArch64::PACIASP))
6254          .setMIFlag(MachineInstr::FrameSetup);
6255    } else {
6256      BuildMI(MBB, MBBPAC, DebugLoc(), TII->get(AArch64::EMITBKEY))
6257          .setMIFlag(MachineInstr::FrameSetup);
6258      BuildMI(MBB, MBBPAC, DebugLoc(), TII->get(AArch64::PACIBSP))
6259          .setMIFlag(MachineInstr::FrameSetup);
6260    }
6261    unsigned CFIIndex =
6262        MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr));
6263    BuildMI(MBB, MBBPAC, DebugLoc(), TII->get(AArch64::CFI_INSTRUCTION))
6264        .addCFIIndex(CFIIndex)
6265        .setMIFlags(MachineInstr::FrameSetup);
6266
6267    // If v8.3a features are available we can replace a RET instruction by
6268    // RETAA or RETAB and omit the AUT instructions
6269    if (Subtarget.hasV8_3aOps() && MBBAUT != MBB.end() &&
6270        MBBAUT->getOpcode() == AArch64::RET) {
6271      BuildMI(MBB, MBBAUT, DL,
6272              TII->get(ShouldSignReturnAddrWithAKey ? AArch64::RETAA
6273                                                    : AArch64::RETAB))
6274          .copyImplicitOps(*MBBAUT);
6275      MBB.erase(MBBAUT);
6276    } else {
6277      BuildMI(MBB, MBBAUT, DL,
6278              TII->get(ShouldSignReturnAddrWithAKey ? AArch64::AUTIASP
6279                                                    : AArch64::AUTIBSP))
6280          .setMIFlag(MachineInstr::FrameDestroy);
6281    }
6282  }
6283}
6284
6285void AArch64InstrInfo::buildOutlinedFrame(
6286    MachineBasicBlock &MBB, MachineFunction &MF,
6287    const outliner::OutlinedFunction &OF) const {
6288  // For thunk outlining, rewrite the last instruction from a call to a
6289  // tail-call.
6290  if (OF.FrameConstructionID == MachineOutlinerThunk) {
6291    MachineInstr *Call = &*--MBB.instr_end();
6292    unsigned TailOpcode;
6293    if (Call->getOpcode() == AArch64::BL) {
6294      TailOpcode = AArch64::TCRETURNdi;
6295    } else {
6296      assert(Call->getOpcode() == AArch64::BLR);
6297      TailOpcode = AArch64::TCRETURNriALL;
6298    }
6299    MachineInstr *TC = BuildMI(MF, DebugLoc(), get(TailOpcode))
6300                           .add(Call->getOperand(0))
6301                           .addImm(0);
6302    MBB.insert(MBB.end(), TC);
6303    Call->eraseFromParent();
6304  }
6305
6306  bool IsLeafFunction = true;
6307
6308  // Is there a call in the outlined range?
6309  auto IsNonTailCall = [](const MachineInstr &MI) {
6310    return MI.isCall() && !MI.isReturn();
6311  };
6312
6313  if (std::any_of(MBB.instr_begin(), MBB.instr_end(), IsNonTailCall)) {
6314    // Fix up the instructions in the range, since we're going to modify the
6315    // stack.
6316    assert(OF.FrameConstructionID != MachineOutlinerDefault &&
6317           "Can only fix up stack references once");
6318    fixupPostOutline(MBB);
6319
6320    IsLeafFunction = false;
6321
6322    // LR has to be a live in so that we can save it.
6323    MBB.addLiveIn(AArch64::LR);
6324
6325    MachineBasicBlock::iterator It = MBB.begin();
6326    MachineBasicBlock::iterator Et = MBB.end();
6327
6328    if (OF.FrameConstructionID == MachineOutlinerTailCall ||
6329        OF.FrameConstructionID == MachineOutlinerThunk)
6330      Et = std::prev(MBB.end());
6331
6332    // Insert a save before the outlined region
6333    MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
6334                                .addReg(AArch64::SP, RegState::Define)
6335                                .addReg(AArch64::LR)
6336                                .addReg(AArch64::SP)
6337                                .addImm(-16);
6338    It = MBB.insert(It, STRXpre);
6339
6340    const TargetSubtargetInfo &STI = MF.getSubtarget();
6341    const MCRegisterInfo *MRI = STI.getRegisterInfo();
6342    unsigned DwarfReg = MRI->getDwarfRegNum(AArch64::LR, true);
6343
6344    // Add a CFI saying the stack was moved 16 B down.
6345    int64_t StackPosEntry =
6346        MF.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, 16));
6347    BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION))
6348        .addCFIIndex(StackPosEntry)
6349        .setMIFlags(MachineInstr::FrameSetup);
6350
6351    // Add a CFI saying that the LR that we want to find is now 16 B higher than
6352    // before.
6353    int64_t LRPosEntry =
6354        MF.addFrameInst(MCCFIInstruction::createOffset(nullptr, DwarfReg, 16));
6355    BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION))
6356        .addCFIIndex(LRPosEntry)
6357        .setMIFlags(MachineInstr::FrameSetup);
6358
6359    // Insert a restore before the terminator for the function.
6360    MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
6361                                 .addReg(AArch64::SP, RegState::Define)
6362                                 .addReg(AArch64::LR, RegState::Define)
6363                                 .addReg(AArch64::SP)
6364                                 .addImm(16);
6365    Et = MBB.insert(Et, LDRXpost);
6366  }
6367
6368  // If a bunch of candidates reach this point they must agree on their return
6369  // address signing. It is therefore enough to just consider the signing
6370  // behaviour of one of them
6371  const Function &CF = OF.Candidates.front().getMF()->getFunction();
6372  bool ShouldSignReturnAddr = false;
6373  if (CF.hasFnAttribute("sign-return-address")) {
6374    StringRef Scope =
6375        CF.getFnAttribute("sign-return-address").getValueAsString();
6376    if (Scope.equals("all"))
6377      ShouldSignReturnAddr = true;
6378    else if (Scope.equals("non-leaf") && !IsLeafFunction)
6379      ShouldSignReturnAddr = true;
6380  }
6381
6382  // a_key is the default
6383  bool ShouldSignReturnAddrWithAKey = true;
6384  if (CF.hasFnAttribute("sign-return-address-key")) {
6385    const StringRef Key =
6386        CF.getFnAttribute("sign-return-address-key").getValueAsString();
6387    // Key can either be a_key or b_key
6388    assert((Key.equals_lower("a_key") || Key.equals_lower("b_key")) &&
6389           "Return address signing key must be either a_key or b_key");
6390    ShouldSignReturnAddrWithAKey = Key.equals_lower("a_key");
6391  }
6392
6393  // If this is a tail call outlined function, then there's already a return.
6394  if (OF.FrameConstructionID == MachineOutlinerTailCall ||
6395      OF.FrameConstructionID == MachineOutlinerThunk) {
6396    signOutlinedFunction(MF, MBB, ShouldSignReturnAddr,
6397                         ShouldSignReturnAddrWithAKey);
6398    return;
6399  }
6400
6401  // It's not a tail call, so we have to insert the return ourselves.
6402  MachineInstr *ret = BuildMI(MF, DebugLoc(), get(AArch64::RET))
6403                          .addReg(AArch64::LR, RegState::Undef);
6404  MBB.insert(MBB.end(), ret);
6405
6406  signOutlinedFunction(MF, MBB, ShouldSignReturnAddr,
6407                       ShouldSignReturnAddrWithAKey);
6408
6409  // Did we have to modify the stack by saving the link register?
6410  if (OF.FrameConstructionID != MachineOutlinerDefault)
6411    return;
6412
6413  // We modified the stack.
6414  // Walk over the basic block and fix up all the stack accesses.
6415  fixupPostOutline(MBB);
6416}
6417
6418MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall(
6419    Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It,
6420    MachineFunction &MF, const outliner::Candidate &C) const {
6421
6422  // Are we tail calling?
6423  if (C.CallConstructionID == MachineOutlinerTailCall) {
6424    // If yes, then we can just branch to the label.
6425    It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::TCRETURNdi))
6426                            .addGlobalAddress(M.getNamedValue(MF.getName()))
6427                            .addImm(0));
6428    return It;
6429  }
6430
6431  // Are we saving the link register?
6432  if (C.CallConstructionID == MachineOutlinerNoLRSave ||
6433      C.CallConstructionID == MachineOutlinerThunk) {
6434    // No, so just insert the call.
6435    It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
6436                            .addGlobalAddress(M.getNamedValue(MF.getName())));
6437    return It;
6438  }
6439
6440  // We want to return the spot where we inserted the call.
6441  MachineBasicBlock::iterator CallPt;
6442
6443  // Instructions for saving and restoring LR around the call instruction we're
6444  // going to insert.
6445  MachineInstr *Save;
6446  MachineInstr *Restore;
6447  // Can we save to a register?
6448  if (C.CallConstructionID == MachineOutlinerRegSave) {
6449    // FIXME: This logic should be sunk into a target-specific interface so that
6450    // we don't have to recompute the register.
6451    unsigned Reg = findRegisterToSaveLRTo(C);
6452    assert(Reg != 0 && "No callee-saved register available?");
6453
6454    // Save and restore LR from that register.
6455    Save = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), Reg)
6456               .addReg(AArch64::XZR)
6457               .addReg(AArch64::LR)
6458               .addImm(0);
6459    Restore = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), AArch64::LR)
6460                .addReg(AArch64::XZR)
6461                .addReg(Reg)
6462                .addImm(0);
6463  } else {
6464    // We have the default case. Save and restore from SP.
6465    Save = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
6466               .addReg(AArch64::SP, RegState::Define)
6467               .addReg(AArch64::LR)
6468               .addReg(AArch64::SP)
6469               .addImm(-16);
6470    Restore = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
6471                  .addReg(AArch64::SP, RegState::Define)
6472                  .addReg(AArch64::LR, RegState::Define)
6473                  .addReg(AArch64::SP)
6474                  .addImm(16);
6475  }
6476
6477  It = MBB.insert(It, Save);
6478  It++;
6479
6480  // Insert the call.
6481  It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
6482                          .addGlobalAddress(M.getNamedValue(MF.getName())));
6483  CallPt = It;
6484  It++;
6485
6486  It = MBB.insert(It, Restore);
6487  return CallPt;
6488}
6489
6490bool AArch64InstrInfo::shouldOutlineFromFunctionByDefault(
6491  MachineFunction &MF) const {
6492  return MF.getFunction().hasMinSize();
6493}
6494
6495Optional<DestSourcePair>
6496AArch64InstrInfo::isCopyInstrImpl(const MachineInstr &MI) const {
6497
6498  // AArch64::ORRWrs and AArch64::ORRXrs with WZR/XZR reg
6499  // and zero immediate operands used as an alias for mov instruction.
6500  if (MI.getOpcode() == AArch64::ORRWrs &&
6501      MI.getOperand(1).getReg() == AArch64::WZR &&
6502      MI.getOperand(3).getImm() == 0x0) {
6503    return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
6504  }
6505
6506  if (MI.getOpcode() == AArch64::ORRXrs &&
6507      MI.getOperand(1).getReg() == AArch64::XZR &&
6508      MI.getOperand(3).getImm() == 0x0) {
6509    return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
6510  }
6511
6512  return None;
6513}
6514
6515Optional<RegImmPair> AArch64InstrInfo::isAddImmediate(const MachineInstr &MI,
6516                                                      Register Reg) const {
6517  int Sign = 1;
6518  int64_t Offset = 0;
6519
6520  // TODO: Handle cases where Reg is a super- or sub-register of the
6521  // destination register.
6522  if (Reg != MI.getOperand(0).getReg())
6523    return None;
6524
6525  switch (MI.getOpcode()) {
6526  default:
6527    return None;
6528  case AArch64::SUBWri:
6529  case AArch64::SUBXri:
6530  case AArch64::SUBSWri:
6531  case AArch64::SUBSXri:
6532    Sign *= -1;
6533    LLVM_FALLTHROUGH;
6534  case AArch64::ADDSWri:
6535  case AArch64::ADDSXri:
6536  case AArch64::ADDWri:
6537  case AArch64::ADDXri: {
6538    // TODO: Third operand can be global address (usually some string).
6539    if (!MI.getOperand(0).isReg() || !MI.getOperand(1).isReg() ||
6540        !MI.getOperand(2).isImm())
6541      return None;
6542    Offset = MI.getOperand(2).getImm() * Sign;
6543    int Shift = MI.getOperand(3).getImm();
6544    assert((Shift == 0 || Shift == 12) && "Shift can be either 0 or 12");
6545    Offset = Offset << Shift;
6546  }
6547  }
6548  return RegImmPair{MI.getOperand(1).getReg(), Offset};
6549}
6550
6551/// If the given ORR instruction is a copy, and \p DescribedReg overlaps with
6552/// the destination register then, if possible, describe the value in terms of
6553/// the source register.
6554static Optional<ParamLoadedValue>
6555describeORRLoadedValue(const MachineInstr &MI, Register DescribedReg,
6556                       const TargetInstrInfo *TII,
6557                       const TargetRegisterInfo *TRI) {
6558  auto DestSrc = TII->isCopyInstr(MI);
6559  if (!DestSrc)
6560    return None;
6561
6562  Register DestReg = DestSrc->Destination->getReg();
6563  Register SrcReg = DestSrc->Source->getReg();
6564
6565  auto Expr = DIExpression::get(MI.getMF()->getFunction().getContext(), {});
6566
6567  // If the described register is the destination, just return the source.
6568  if (DestReg == DescribedReg)
6569    return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr);
6570
6571  // ORRWrs zero-extends to 64-bits, so we need to consider such cases.
6572  if (MI.getOpcode() == AArch64::ORRWrs &&
6573      TRI->isSuperRegister(DestReg, DescribedReg))
6574    return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr);
6575
6576  // We may need to describe the lower part of a ORRXrs move.
6577  if (MI.getOpcode() == AArch64::ORRXrs &&
6578      TRI->isSubRegister(DestReg, DescribedReg)) {
6579    Register SrcSubReg = TRI->getSubReg(SrcReg, AArch64::sub_32);
6580    return ParamLoadedValue(MachineOperand::CreateReg(SrcSubReg, false), Expr);
6581  }
6582
6583  assert(!TRI->isSuperOrSubRegisterEq(DestReg, DescribedReg) &&
6584         "Unhandled ORR[XW]rs copy case");
6585
6586  return None;
6587}
6588
6589Optional<ParamLoadedValue>
6590AArch64InstrInfo::describeLoadedValue(const MachineInstr &MI,
6591                                      Register Reg) const {
6592  const MachineFunction *MF = MI.getMF();
6593  const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
6594  switch (MI.getOpcode()) {
6595  case AArch64::MOVZWi:
6596  case AArch64::MOVZXi: {
6597    // MOVZWi may be used for producing zero-extended 32-bit immediates in
6598    // 64-bit parameters, so we need to consider super-registers.
6599    if (!TRI->isSuperRegisterEq(MI.getOperand(0).getReg(), Reg))
6600      return None;
6601
6602    if (!MI.getOperand(1).isImm())
6603      return None;
6604    int64_t Immediate = MI.getOperand(1).getImm();
6605    int Shift = MI.getOperand(2).getImm();
6606    return ParamLoadedValue(MachineOperand::CreateImm(Immediate << Shift),
6607                            nullptr);
6608  }
6609  case AArch64::ORRWrs:
6610  case AArch64::ORRXrs:
6611    return describeORRLoadedValue(MI, Reg, this, TRI);
6612  }
6613
6614  return TargetInstrInfo::describeLoadedValue(MI, Reg);
6615}
6616
6617#define GET_INSTRINFO_HELPERS
6618#include "AArch64GenInstrInfo.inc"
6619