ARMISelDAGToDAG.cpp revision 360784
1//===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the ARM target.
10//
11//===----------------------------------------------------------------------===//
12
13#include "ARM.h"
14#include "ARMBaseInstrInfo.h"
15#include "ARMTargetMachine.h"
16#include "MCTargetDesc/ARMAddressingModes.h"
17#include "Utils/ARMBaseInfo.h"
18#include "llvm/ADT/StringSwitch.h"
19#include "llvm/CodeGen/MachineFrameInfo.h"
20#include "llvm/CodeGen/MachineFunction.h"
21#include "llvm/CodeGen/MachineInstrBuilder.h"
22#include "llvm/CodeGen/MachineRegisterInfo.h"
23#include "llvm/CodeGen/SelectionDAG.h"
24#include "llvm/CodeGen/SelectionDAGISel.h"
25#include "llvm/CodeGen/TargetLowering.h"
26#include "llvm/IR/CallingConv.h"
27#include "llvm/IR/Constants.h"
28#include "llvm/IR/DerivedTypes.h"
29#include "llvm/IR/Function.h"
30#include "llvm/IR/Intrinsics.h"
31#include "llvm/IR/IntrinsicsARM.h"
32#include "llvm/IR/LLVMContext.h"
33#include "llvm/Support/CommandLine.h"
34#include "llvm/Support/Debug.h"
35#include "llvm/Support/ErrorHandling.h"
36#include "llvm/Target/TargetOptions.h"
37
38using namespace llvm;
39
40#define DEBUG_TYPE "arm-isel"
41
42static cl::opt<bool>
43DisableShifterOp("disable-shifter-op", cl::Hidden,
44  cl::desc("Disable isel of shifter-op"),
45  cl::init(false));
46
47//===--------------------------------------------------------------------===//
48/// ARMDAGToDAGISel - ARM specific code to select ARM machine
49/// instructions for SelectionDAG operations.
50///
51namespace {
52
53class ARMDAGToDAGISel : public SelectionDAGISel {
54  /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
55  /// make the right decision when generating code for different targets.
56  const ARMSubtarget *Subtarget;
57
58public:
59  explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOpt::Level OptLevel)
60      : SelectionDAGISel(tm, OptLevel) {}
61
62  bool runOnMachineFunction(MachineFunction &MF) override {
63    // Reset the subtarget each time through.
64    Subtarget = &MF.getSubtarget<ARMSubtarget>();
65    SelectionDAGISel::runOnMachineFunction(MF);
66    return true;
67  }
68
69  StringRef getPassName() const override { return "ARM Instruction Selection"; }
70
71  void PreprocessISelDAG() override;
72
73  /// getI32Imm - Return a target constant of type i32 with the specified
74  /// value.
75  inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
76    return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
77  }
78
79  void Select(SDNode *N) override;
80
81  bool hasNoVMLxHazardUse(SDNode *N) const;
82  bool isShifterOpProfitable(const SDValue &Shift,
83                             ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
84  bool SelectRegShifterOperand(SDValue N, SDValue &A,
85                               SDValue &B, SDValue &C,
86                               bool CheckProfitability = true);
87  bool SelectImmShifterOperand(SDValue N, SDValue &A,
88                               SDValue &B, bool CheckProfitability = true);
89  bool SelectShiftRegShifterOperand(SDValue N, SDValue &A,
90                                    SDValue &B, SDValue &C) {
91    // Don't apply the profitability check
92    return SelectRegShifterOperand(N, A, B, C, false);
93  }
94  bool SelectShiftImmShifterOperand(SDValue N, SDValue &A,
95                                    SDValue &B) {
96    // Don't apply the profitability check
97    return SelectImmShifterOperand(N, A, B, false);
98  }
99
100  bool SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out);
101
102  bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
103  bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
104
105  bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) {
106    const ConstantSDNode *CN = cast<ConstantSDNode>(N);
107    Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32);
108    Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32);
109    return true;
110  }
111
112  bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
113                             SDValue &Offset, SDValue &Opc);
114  bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
115                             SDValue &Offset, SDValue &Opc);
116  bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
117                             SDValue &Offset, SDValue &Opc);
118  bool SelectAddrOffsetNone(SDValue N, SDValue &Base);
119  bool SelectAddrMode3(SDValue N, SDValue &Base,
120                       SDValue &Offset, SDValue &Opc);
121  bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
122                             SDValue &Offset, SDValue &Opc);
123  bool IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset, bool FP16);
124  bool SelectAddrMode5(SDValue N, SDValue &Base, SDValue &Offset);
125  bool SelectAddrMode5FP16(SDValue N, SDValue &Base, SDValue &Offset);
126  bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
127  bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset);
128
129  bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label);
130
131  // Thumb Addressing Modes:
132  bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
133  bool SelectThumbAddrModeRRSext(SDValue N, SDValue &Base, SDValue &Offset);
134  bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,
135                                SDValue &OffImm);
136  bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
137                                 SDValue &OffImm);
138  bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
139                                 SDValue &OffImm);
140  bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
141                                 SDValue &OffImm);
142  bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
143  template <unsigned Shift>
144  bool SelectTAddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm);
145
146  // Thumb 2 Addressing Modes:
147  bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
148  bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
149                            SDValue &OffImm);
150  bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
151                                 SDValue &OffImm);
152  template <unsigned Shift>
153  bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm);
154  bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm,
155                                  unsigned Shift);
156  template <unsigned Shift>
157  bool SelectT2AddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm);
158  bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
159                             SDValue &OffReg, SDValue &ShImm);
160  bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm);
161
162  template<int Min, int Max>
163  bool SelectImmediateInRange(SDValue N, SDValue &OffImm);
164
165  inline bool is_so_imm(unsigned Imm) const {
166    return ARM_AM::getSOImmVal(Imm) != -1;
167  }
168
169  inline bool is_so_imm_not(unsigned Imm) const {
170    return ARM_AM::getSOImmVal(~Imm) != -1;
171  }
172
173  inline bool is_t2_so_imm(unsigned Imm) const {
174    return ARM_AM::getT2SOImmVal(Imm) != -1;
175  }
176
177  inline bool is_t2_so_imm_not(unsigned Imm) const {
178    return ARM_AM::getT2SOImmVal(~Imm) != -1;
179  }
180
181  // Include the pieces autogenerated from the target description.
182#include "ARMGenDAGISel.inc"
183
184private:
185  void transferMemOperands(SDNode *Src, SDNode *Dst);
186
187  /// Indexed (pre/post inc/dec) load matching code for ARM.
188  bool tryARMIndexedLoad(SDNode *N);
189  bool tryT1IndexedLoad(SDNode *N);
190  bool tryT2IndexedLoad(SDNode *N);
191  bool tryMVEIndexedLoad(SDNode *N);
192
193  /// SelectVLD - Select NEON load intrinsics.  NumVecs should be
194  /// 1, 2, 3 or 4.  The opcode arrays specify the instructions used for
195  /// loads of D registers and even subregs and odd subregs of Q registers.
196  /// For NumVecs <= 2, QOpcodes1 is not used.
197  void SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
198                 const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
199                 const uint16_t *QOpcodes1);
200
201  /// SelectVST - Select NEON store intrinsics.  NumVecs should
202  /// be 1, 2, 3 or 4.  The opcode arrays specify the instructions used for
203  /// stores of D registers and even subregs and odd subregs of Q registers.
204  /// For NumVecs <= 2, QOpcodes1 is not used.
205  void SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
206                 const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
207                 const uint16_t *QOpcodes1);
208
209  /// SelectVLDSTLane - Select NEON load/store lane intrinsics.  NumVecs should
210  /// be 2, 3 or 4.  The opcode arrays specify the instructions used for
211  /// load/store of D registers and Q registers.
212  void SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
213                       unsigned NumVecs, const uint16_t *DOpcodes,
214                       const uint16_t *QOpcodes);
215
216  /// Helper functions for setting up clusters of MVE predication operands.
217  template <typename SDValueVector>
218  void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
219                            SDValue PredicateMask);
220  template <typename SDValueVector>
221  void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
222                            SDValue PredicateMask, SDValue Inactive);
223
224  template <typename SDValueVector>
225  void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc);
226  template <typename SDValueVector>
227  void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, EVT InactiveTy);
228
229  /// SelectMVE_WB - Select MVE writeback load/store intrinsics.
230  void SelectMVE_WB(SDNode *N, const uint16_t *Opcodes, bool Predicated);
231
232  /// SelectMVE_LongShift - Select MVE 64-bit scalar shift intrinsics.
233  void SelectMVE_LongShift(SDNode *N, uint16_t Opcode, bool Immediate,
234                           bool HasSaturationOperand);
235
236  /// SelectMVE_VADCSBC - Select MVE vector add/sub-with-carry intrinsics.
237  void SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry,
238                         uint16_t OpcodeWithNoCarry, bool Add, bool Predicated);
239
240  /// Select long MVE vector reductions with two vector operands
241  /// Stride is the number of vector element widths the instruction can operate
242  /// on:
243  /// 2 for long non-rounding variants, vml{a,s}ldav[a][x]: [i16, i32]
244  /// 1 for long rounding variants: vrml{a,s}ldavh[a][x]: [i32]
245  /// Stride is used when addressing the OpcodesS array which contains multiple
246  /// opcodes for each element width.
247  /// TySize is the index into the list of element types listed above
248  void SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated,
249                             const uint16_t *OpcodesS, const uint16_t *OpcodesU,
250                             size_t Stride, size_t TySize);
251
252  /// Select a 64-bit MVE vector reduction with two vector operands
253  /// arm_mve_vmlldava_[predicated]
254  void SelectMVE_VMLLDAV(SDNode *N, bool Predicated, const uint16_t *OpcodesS,
255                         const uint16_t *OpcodesU);
256  /// Select a 72-bit MVE vector rounding reduction with two vector operands
257  /// int_arm_mve_vrmlldavha[_predicated]
258  void SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated, const uint16_t *OpcodesS,
259                           const uint16_t *OpcodesU);
260
261  /// SelectMVE_VLD - Select MVE interleaving load intrinsics. NumVecs
262  /// should be 2 or 4. The opcode array specifies the instructions
263  /// used for 8, 16 and 32-bit lane sizes respectively, and each
264  /// pointer points to a set of NumVecs sub-opcodes used for the
265  /// different stages (e.g. VLD20 versus VLD21) of each load family.
266  void SelectMVE_VLD(SDNode *N, unsigned NumVecs,
267                     const uint16_t *const *Opcodes);
268
269  /// SelectVLDDup - Select NEON load-duplicate intrinsics.  NumVecs
270  /// should be 1, 2, 3 or 4.  The opcode array specifies the instructions used
271  /// for loading D registers.
272  void SelectVLDDup(SDNode *N, bool IsIntrinsic, bool isUpdating,
273                    unsigned NumVecs, const uint16_t *DOpcodes,
274                    const uint16_t *QOpcodes0 = nullptr,
275                    const uint16_t *QOpcodes1 = nullptr);
276
277  /// Try to select SBFX/UBFX instructions for ARM.
278  bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
279
280  // Select special operations if node forms integer ABS pattern
281  bool tryABSOp(SDNode *N);
282
283  bool tryReadRegister(SDNode *N);
284  bool tryWriteRegister(SDNode *N);
285
286  bool tryInlineAsm(SDNode *N);
287
288  void SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI);
289
290  void SelectCMP_SWAP(SDNode *N);
291
292  /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
293  /// inline asm expressions.
294  bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
295                                    std::vector<SDValue> &OutOps) override;
296
297  // Form pairs of consecutive R, S, D, or Q registers.
298  SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1);
299  SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1);
300  SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1);
301  SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1);
302
303  // Form sequences of 4 consecutive S, D, or Q registers.
304  SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
305  SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
306  SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
307
308  // Get the alignment operand for a NEON VLD or VST instruction.
309  SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs,
310                        bool is64BitVector);
311
312  /// Checks if N is a multiplication by a constant where we can extract out a
313  /// power of two from the constant so that it can be used in a shift, but only
314  /// if it simplifies the materialization of the constant. Returns true if it
315  /// is, and assigns to PowerOfTwo the power of two that should be extracted
316  /// out and to NewMulConst the new constant to be multiplied by.
317  bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift,
318                              unsigned &PowerOfTwo, SDValue &NewMulConst) const;
319
320  /// Replace N with M in CurDAG, in a way that also ensures that M gets
321  /// selected when N would have been selected.
322  void replaceDAGValue(const SDValue &N, SDValue M);
323};
324}
325
326/// isInt32Immediate - This method tests to see if the node is a 32-bit constant
327/// operand. If so Imm will receive the 32-bit value.
328static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
329  if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
330    Imm = cast<ConstantSDNode>(N)->getZExtValue();
331    return true;
332  }
333  return false;
334}
335
336// isInt32Immediate - This method tests to see if a constant operand.
337// If so Imm will receive the 32 bit value.
338static bool isInt32Immediate(SDValue N, unsigned &Imm) {
339  return isInt32Immediate(N.getNode(), Imm);
340}
341
342// isOpcWithIntImmediate - This method tests to see if the node is a specific
343// opcode and that it has a immediate integer right operand.
344// If so Imm will receive the 32 bit value.
345static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
346  return N->getOpcode() == Opc &&
347         isInt32Immediate(N->getOperand(1).getNode(), Imm);
348}
349
350/// Check whether a particular node is a constant value representable as
351/// (N * Scale) where (N in [\p RangeMin, \p RangeMax).
352///
353/// \param ScaledConstant [out] - On success, the pre-scaled constant value.
354static bool isScaledConstantInRange(SDValue Node, int Scale,
355                                    int RangeMin, int RangeMax,
356                                    int &ScaledConstant) {
357  assert(Scale > 0 && "Invalid scale!");
358
359  // Check that this is a constant.
360  const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node);
361  if (!C)
362    return false;
363
364  ScaledConstant = (int) C->getZExtValue();
365  if ((ScaledConstant % Scale) != 0)
366    return false;
367
368  ScaledConstant /= Scale;
369  return ScaledConstant >= RangeMin && ScaledConstant < RangeMax;
370}
371
372void ARMDAGToDAGISel::PreprocessISelDAG() {
373  if (!Subtarget->hasV6T2Ops())
374    return;
375
376  bool isThumb2 = Subtarget->isThumb();
377  for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
378       E = CurDAG->allnodes_end(); I != E; ) {
379    SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.
380
381    if (N->getOpcode() != ISD::ADD)
382      continue;
383
384    // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with
385    // leading zeros, followed by consecutive set bits, followed by 1 or 2
386    // trailing zeros, e.g. 1020.
387    // Transform the expression to
388    // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number
389    // of trailing zeros of c2. The left shift would be folded as an shifter
390    // operand of 'add' and the 'and' and 'srl' would become a bits extraction
391    // node (UBFX).
392
393    SDValue N0 = N->getOperand(0);
394    SDValue N1 = N->getOperand(1);
395    unsigned And_imm = 0;
396    if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) {
397      if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm))
398        std::swap(N0, N1);
399    }
400    if (!And_imm)
401      continue;
402
403    // Check if the AND mask is an immediate of the form: 000.....1111111100
404    unsigned TZ = countTrailingZeros(And_imm);
405    if (TZ != 1 && TZ != 2)
406      // Be conservative here. Shifter operands aren't always free. e.g. On
407      // Swift, left shifter operand of 1 / 2 for free but others are not.
408      // e.g.
409      //  ubfx   r3, r1, #16, #8
410      //  ldr.w  r3, [r0, r3, lsl #2]
411      // vs.
412      //  mov.w  r9, #1020
413      //  and.w  r2, r9, r1, lsr #14
414      //  ldr    r2, [r0, r2]
415      continue;
416    And_imm >>= TZ;
417    if (And_imm & (And_imm + 1))
418      continue;
419
420    // Look for (and (srl X, c1), c2).
421    SDValue Srl = N1.getOperand(0);
422    unsigned Srl_imm = 0;
423    if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) ||
424        (Srl_imm <= 2))
425      continue;
426
427    // Make sure first operand is not a shifter operand which would prevent
428    // folding of the left shift.
429    SDValue CPTmp0;
430    SDValue CPTmp1;
431    SDValue CPTmp2;
432    if (isThumb2) {
433      if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1))
434        continue;
435    } else {
436      if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) ||
437          SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2))
438        continue;
439    }
440
441    // Now make the transformation.
442    Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32,
443                          Srl.getOperand(0),
444                          CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl),
445                                              MVT::i32));
446    N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32,
447                         Srl,
448                         CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32));
449    N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32,
450                         N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32));
451    CurDAG->UpdateNodeOperands(N, N0, N1);
452  }
453}
454
455/// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
456/// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
457/// least on current ARM implementations) which should be avoidded.
458bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
459  if (OptLevel == CodeGenOpt::None)
460    return true;
461
462  if (!Subtarget->hasVMLxHazards())
463    return true;
464
465  if (!N->hasOneUse())
466    return false;
467
468  SDNode *Use = *N->use_begin();
469  if (Use->getOpcode() == ISD::CopyToReg)
470    return true;
471  if (Use->isMachineOpcode()) {
472    const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>(
473        CurDAG->getSubtarget().getInstrInfo());
474
475    const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode());
476    if (MCID.mayStore())
477      return true;
478    unsigned Opcode = MCID.getOpcode();
479    if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
480      return true;
481    // vmlx feeding into another vmlx. We actually want to unfold
482    // the use later in the MLxExpansion pass. e.g.
483    // vmla
484    // vmla (stall 8 cycles)
485    //
486    // vmul (5 cycles)
487    // vadd (5 cycles)
488    // vmla
489    // This adds up to about 18 - 19 cycles.
490    //
491    // vmla
492    // vmul (stall 4 cycles)
493    // vadd adds up to about 14 cycles.
494    return TII->isFpMLxInstruction(Opcode);
495  }
496
497  return false;
498}
499
500bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
501                                            ARM_AM::ShiftOpc ShOpcVal,
502                                            unsigned ShAmt) {
503  if (!Subtarget->isLikeA9() && !Subtarget->isSwift())
504    return true;
505  if (Shift.hasOneUse())
506    return true;
507  // R << 2 is free.
508  return ShOpcVal == ARM_AM::lsl &&
509         (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1));
510}
511
512bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N,
513                                             unsigned MaxShift,
514                                             unsigned &PowerOfTwo,
515                                             SDValue &NewMulConst) const {
516  assert(N.getOpcode() == ISD::MUL);
517  assert(MaxShift > 0);
518
519  // If the multiply is used in more than one place then changing the constant
520  // will make other uses incorrect, so don't.
521  if (!N.hasOneUse()) return false;
522  // Check if the multiply is by a constant
523  ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(N.getOperand(1));
524  if (!MulConst) return false;
525  // If the constant is used in more than one place then modifying it will mean
526  // we need to materialize two constants instead of one, which is a bad idea.
527  if (!MulConst->hasOneUse()) return false;
528  unsigned MulConstVal = MulConst->getZExtValue();
529  if (MulConstVal == 0) return false;
530
531  // Find the largest power of 2 that MulConstVal is a multiple of
532  PowerOfTwo = MaxShift;
533  while ((MulConstVal % (1 << PowerOfTwo)) != 0) {
534    --PowerOfTwo;
535    if (PowerOfTwo == 0) return false;
536  }
537
538  // Only optimise if the new cost is better
539  unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo);
540  NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32);
541  unsigned OldCost = ConstantMaterializationCost(MulConstVal, Subtarget);
542  unsigned NewCost = ConstantMaterializationCost(NewMulConstVal, Subtarget);
543  return NewCost < OldCost;
544}
545
546void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) {
547  CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode());
548  ReplaceUses(N, M);
549}
550
551bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
552                                              SDValue &BaseReg,
553                                              SDValue &Opc,
554                                              bool CheckProfitability) {
555  if (DisableShifterOp)
556    return false;
557
558  // If N is a multiply-by-constant and it's profitable to extract a shift and
559  // use it in a shifted operand do so.
560  if (N.getOpcode() == ISD::MUL) {
561    unsigned PowerOfTwo = 0;
562    SDValue NewMulConst;
563    if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) {
564      HandleSDNode Handle(N);
565      SDLoc Loc(N);
566      replaceDAGValue(N.getOperand(1), NewMulConst);
567      BaseReg = Handle.getValue();
568      Opc = CurDAG->getTargetConstant(
569          ARM_AM::getSORegOpc(ARM_AM::lsl, PowerOfTwo), Loc, MVT::i32);
570      return true;
571    }
572  }
573
574  ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
575
576  // Don't match base register only case. That is matched to a separate
577  // lower complexity pattern with explicit register operand.
578  if (ShOpcVal == ARM_AM::no_shift) return false;
579
580  BaseReg = N.getOperand(0);
581  unsigned ShImmVal = 0;
582  ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
583  if (!RHS) return false;
584  ShImmVal = RHS->getZExtValue() & 31;
585  Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
586                                  SDLoc(N), MVT::i32);
587  return true;
588}
589
590bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N,
591                                              SDValue &BaseReg,
592                                              SDValue &ShReg,
593                                              SDValue &Opc,
594                                              bool CheckProfitability) {
595  if (DisableShifterOp)
596    return false;
597
598  ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
599
600  // Don't match base register only case. That is matched to a separate
601  // lower complexity pattern with explicit register operand.
602  if (ShOpcVal == ARM_AM::no_shift) return false;
603
604  BaseReg = N.getOperand(0);
605  unsigned ShImmVal = 0;
606  ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
607  if (RHS) return false;
608
609  ShReg = N.getOperand(1);
610  if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal))
611    return false;
612  Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
613                                  SDLoc(N), MVT::i32);
614  return true;
615}
616
617// Determine whether an ISD::OR's operands are suitable to turn the operation
618// into an addition, which often has more compact encodings.
619bool ARMDAGToDAGISel::SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out) {
620  assert(Parent->getOpcode() == ISD::OR && "unexpected parent");
621  Out = N;
622  return CurDAG->haveNoCommonBitsSet(N, Parent->getOperand(1));
623}
624
625
626bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
627                                          SDValue &Base,
628                                          SDValue &OffImm) {
629  // Match simple R + imm12 operands.
630
631  // Base only.
632  if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
633      !CurDAG->isBaseWithConstantOffset(N)) {
634    if (N.getOpcode() == ISD::FrameIndex) {
635      // Match frame index.
636      int FI = cast<FrameIndexSDNode>(N)->getIndex();
637      Base = CurDAG->getTargetFrameIndex(
638          FI, TLI->getPointerTy(CurDAG->getDataLayout()));
639      OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
640      return true;
641    }
642
643    if (N.getOpcode() == ARMISD::Wrapper &&
644        N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
645        N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
646        N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
647      Base = N.getOperand(0);
648    } else
649      Base = N;
650    OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
651    return true;
652  }
653
654  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
655    int RHSC = (int)RHS->getSExtValue();
656    if (N.getOpcode() == ISD::SUB)
657      RHSC = -RHSC;
658
659    if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits
660      Base   = N.getOperand(0);
661      if (Base.getOpcode() == ISD::FrameIndex) {
662        int FI = cast<FrameIndexSDNode>(Base)->getIndex();
663        Base = CurDAG->getTargetFrameIndex(
664            FI, TLI->getPointerTy(CurDAG->getDataLayout()));
665      }
666      OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
667      return true;
668    }
669  }
670
671  // Base only.
672  Base = N;
673  OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
674  return true;
675}
676
677
678
679bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
680                                      SDValue &Opc) {
681  if (N.getOpcode() == ISD::MUL &&
682      ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) {
683    if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
684      // X * [3,5,9] -> X + X * [2,4,8] etc.
685      int RHSC = (int)RHS->getZExtValue();
686      if (RHSC & 1) {
687        RHSC = RHSC & ~1;
688        ARM_AM::AddrOpc AddSub = ARM_AM::add;
689        if (RHSC < 0) {
690          AddSub = ARM_AM::sub;
691          RHSC = - RHSC;
692        }
693        if (isPowerOf2_32(RHSC)) {
694          unsigned ShAmt = Log2_32(RHSC);
695          Base = Offset = N.getOperand(0);
696          Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
697                                                            ARM_AM::lsl),
698                                          SDLoc(N), MVT::i32);
699          return true;
700        }
701      }
702    }
703  }
704
705  if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
706      // ISD::OR that is equivalent to an ISD::ADD.
707      !CurDAG->isBaseWithConstantOffset(N))
708    return false;
709
710  // Leave simple R +/- imm12 operands for LDRi12
711  if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
712    int RHSC;
713    if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
714                                -0x1000+1, 0x1000, RHSC)) // 12 bits.
715      return false;
716  }
717
718  // Otherwise this is R +/- [possibly shifted] R.
719  ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add;
720  ARM_AM::ShiftOpc ShOpcVal =
721    ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
722  unsigned ShAmt = 0;
723
724  Base   = N.getOperand(0);
725  Offset = N.getOperand(1);
726
727  if (ShOpcVal != ARM_AM::no_shift) {
728    // Check to see if the RHS of the shift is a constant, if not, we can't fold
729    // it.
730    if (ConstantSDNode *Sh =
731           dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
732      ShAmt = Sh->getZExtValue();
733      if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
734        Offset = N.getOperand(1).getOperand(0);
735      else {
736        ShAmt = 0;
737        ShOpcVal = ARM_AM::no_shift;
738      }
739    } else {
740      ShOpcVal = ARM_AM::no_shift;
741    }
742  }
743
744  // Try matching (R shl C) + (R).
745  if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
746      !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
747        N.getOperand(0).hasOneUse())) {
748    ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
749    if (ShOpcVal != ARM_AM::no_shift) {
750      // Check to see if the RHS of the shift is a constant, if not, we can't
751      // fold it.
752      if (ConstantSDNode *Sh =
753          dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
754        ShAmt = Sh->getZExtValue();
755        if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
756          Offset = N.getOperand(0).getOperand(0);
757          Base = N.getOperand(1);
758        } else {
759          ShAmt = 0;
760          ShOpcVal = ARM_AM::no_shift;
761        }
762      } else {
763        ShOpcVal = ARM_AM::no_shift;
764      }
765    }
766  }
767
768  // If Offset is a multiply-by-constant and it's profitable to extract a shift
769  // and use it in a shifted operand do so.
770  if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) {
771    unsigned PowerOfTwo = 0;
772    SDValue NewMulConst;
773    if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) {
774      HandleSDNode Handle(Offset);
775      replaceDAGValue(Offset.getOperand(1), NewMulConst);
776      Offset = Handle.getValue();
777      ShAmt = PowerOfTwo;
778      ShOpcVal = ARM_AM::lsl;
779    }
780  }
781
782  Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
783                                  SDLoc(N), MVT::i32);
784  return true;
785}
786
787bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
788                                            SDValue &Offset, SDValue &Opc) {
789  unsigned Opcode = Op->getOpcode();
790  ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
791    ? cast<LoadSDNode>(Op)->getAddressingMode()
792    : cast<StoreSDNode>(Op)->getAddressingMode();
793  ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
794    ? ARM_AM::add : ARM_AM::sub;
795  int Val;
796  if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val))
797    return false;
798
799  Offset = N;
800  ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
801  unsigned ShAmt = 0;
802  if (ShOpcVal != ARM_AM::no_shift) {
803    // Check to see if the RHS of the shift is a constant, if not, we can't fold
804    // it.
805    if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
806      ShAmt = Sh->getZExtValue();
807      if (isShifterOpProfitable(N, ShOpcVal, ShAmt))
808        Offset = N.getOperand(0);
809      else {
810        ShAmt = 0;
811        ShOpcVal = ARM_AM::no_shift;
812      }
813    } else {
814      ShOpcVal = ARM_AM::no_shift;
815    }
816  }
817
818  Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
819                                  SDLoc(N), MVT::i32);
820  return true;
821}
822
823bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
824                                            SDValue &Offset, SDValue &Opc) {
825  unsigned Opcode = Op->getOpcode();
826  ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
827    ? cast<LoadSDNode>(Op)->getAddressingMode()
828    : cast<StoreSDNode>(Op)->getAddressingMode();
829  ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
830    ? ARM_AM::add : ARM_AM::sub;
831  int Val;
832  if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
833    if (AddSub == ARM_AM::sub) Val *= -1;
834    Offset = CurDAG->getRegister(0, MVT::i32);
835    Opc = CurDAG->getTargetConstant(Val, SDLoc(Op), MVT::i32);
836    return true;
837  }
838
839  return false;
840}
841
842
843bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
844                                            SDValue &Offset, SDValue &Opc) {
845  unsigned Opcode = Op->getOpcode();
846  ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
847    ? cast<LoadSDNode>(Op)->getAddressingMode()
848    : cast<StoreSDNode>(Op)->getAddressingMode();
849  ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
850    ? ARM_AM::add : ARM_AM::sub;
851  int Val;
852  if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
853    Offset = CurDAG->getRegister(0, MVT::i32);
854    Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,
855                                                      ARM_AM::no_shift),
856                                    SDLoc(Op), MVT::i32);
857    return true;
858  }
859
860  return false;
861}
862
863bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) {
864  Base = N;
865  return true;
866}
867
868bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
869                                      SDValue &Base, SDValue &Offset,
870                                      SDValue &Opc) {
871  if (N.getOpcode() == ISD::SUB) {
872    // X - C  is canonicalize to X + -C, no need to handle it here.
873    Base = N.getOperand(0);
874    Offset = N.getOperand(1);
875    Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N),
876                                    MVT::i32);
877    return true;
878  }
879
880  if (!CurDAG->isBaseWithConstantOffset(N)) {
881    Base = N;
882    if (N.getOpcode() == ISD::FrameIndex) {
883      int FI = cast<FrameIndexSDNode>(N)->getIndex();
884      Base = CurDAG->getTargetFrameIndex(
885          FI, TLI->getPointerTy(CurDAG->getDataLayout()));
886    }
887    Offset = CurDAG->getRegister(0, MVT::i32);
888    Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
889                                    MVT::i32);
890    return true;
891  }
892
893  // If the RHS is +/- imm8, fold into addr mode.
894  int RHSC;
895  if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
896                              -256 + 1, 256, RHSC)) { // 8 bits.
897    Base = N.getOperand(0);
898    if (Base.getOpcode() == ISD::FrameIndex) {
899      int FI = cast<FrameIndexSDNode>(Base)->getIndex();
900      Base = CurDAG->getTargetFrameIndex(
901          FI, TLI->getPointerTy(CurDAG->getDataLayout()));
902    }
903    Offset = CurDAG->getRegister(0, MVT::i32);
904
905    ARM_AM::AddrOpc AddSub = ARM_AM::add;
906    if (RHSC < 0) {
907      AddSub = ARM_AM::sub;
908      RHSC = -RHSC;
909    }
910    Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N),
911                                    MVT::i32);
912    return true;
913  }
914
915  Base = N.getOperand(0);
916  Offset = N.getOperand(1);
917  Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
918                                  MVT::i32);
919  return true;
920}
921
922bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
923                                            SDValue &Offset, SDValue &Opc) {
924  unsigned Opcode = Op->getOpcode();
925  ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
926    ? cast<LoadSDNode>(Op)->getAddressingMode()
927    : cast<StoreSDNode>(Op)->getAddressingMode();
928  ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
929    ? ARM_AM::add : ARM_AM::sub;
930  int Val;
931  if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits.
932    Offset = CurDAG->getRegister(0, MVT::i32);
933    Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op),
934                                    MVT::i32);
935    return true;
936  }
937
938  Offset = N;
939  Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op),
940                                  MVT::i32);
941  return true;
942}
943
944bool ARMDAGToDAGISel::IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset,
945                                        bool FP16) {
946  if (!CurDAG->isBaseWithConstantOffset(N)) {
947    Base = N;
948    if (N.getOpcode() == ISD::FrameIndex) {
949      int FI = cast<FrameIndexSDNode>(N)->getIndex();
950      Base = CurDAG->getTargetFrameIndex(
951          FI, TLI->getPointerTy(CurDAG->getDataLayout()));
952    } else if (N.getOpcode() == ARMISD::Wrapper &&
953               N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
954               N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
955               N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
956      Base = N.getOperand(0);
957    }
958    Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
959                                       SDLoc(N), MVT::i32);
960    return true;
961  }
962
963  // If the RHS is +/- imm8, fold into addr mode.
964  int RHSC;
965  const int Scale = FP16 ? 2 : 4;
966
967  if (isScaledConstantInRange(N.getOperand(1), Scale, -255, 256, RHSC)) {
968    Base = N.getOperand(0);
969    if (Base.getOpcode() == ISD::FrameIndex) {
970      int FI = cast<FrameIndexSDNode>(Base)->getIndex();
971      Base = CurDAG->getTargetFrameIndex(
972          FI, TLI->getPointerTy(CurDAG->getDataLayout()));
973    }
974
975    ARM_AM::AddrOpc AddSub = ARM_AM::add;
976    if (RHSC < 0) {
977      AddSub = ARM_AM::sub;
978      RHSC = -RHSC;
979    }
980
981    if (FP16)
982      Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(AddSub, RHSC),
983                                         SDLoc(N), MVT::i32);
984    else
985      Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
986                                         SDLoc(N), MVT::i32);
987
988    return true;
989  }
990
991  Base = N;
992
993  if (FP16)
994    Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(ARM_AM::add, 0),
995                                       SDLoc(N), MVT::i32);
996  else
997    Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
998                                       SDLoc(N), MVT::i32);
999
1000  return true;
1001}
1002
1003bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
1004                                      SDValue &Base, SDValue &Offset) {
1005  return IsAddressingMode5(N, Base, Offset, /*FP16=*/ false);
1006}
1007
1008bool ARMDAGToDAGISel::SelectAddrMode5FP16(SDValue N,
1009                                          SDValue &Base, SDValue &Offset) {
1010  return IsAddressingMode5(N, Base, Offset, /*FP16=*/ true);
1011}
1012
1013bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
1014                                      SDValue &Align) {
1015  Addr = N;
1016
1017  unsigned Alignment = 0;
1018
1019  MemSDNode *MemN = cast<MemSDNode>(Parent);
1020
1021  if (isa<LSBaseSDNode>(MemN) ||
1022      ((MemN->getOpcode() == ARMISD::VST1_UPD ||
1023        MemN->getOpcode() == ARMISD::VLD1_UPD) &&
1024       MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) {
1025    // This case occurs only for VLD1-lane/dup and VST1-lane instructions.
1026    // The maximum alignment is equal to the memory size being referenced.
1027    unsigned MMOAlign = MemN->getAlignment();
1028    unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8;
1029    if (MMOAlign >= MemSize && MemSize > 1)
1030      Alignment = MemSize;
1031  } else {
1032    // All other uses of addrmode6 are for intrinsics.  For now just record
1033    // the raw alignment value; it will be refined later based on the legal
1034    // alignment operands for the intrinsic.
1035    Alignment = MemN->getAlignment();
1036  }
1037
1038  Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32);
1039  return true;
1040}
1041
1042bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N,
1043                                            SDValue &Offset) {
1044  LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op);
1045  ISD::MemIndexedMode AM = LdSt->getAddressingMode();
1046  if (AM != ISD::POST_INC)
1047    return false;
1048  Offset = N;
1049  if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) {
1050    if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits())
1051      Offset = CurDAG->getRegister(0, MVT::i32);
1052  }
1053  return true;
1054}
1055
1056bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
1057                                       SDValue &Offset, SDValue &Label) {
1058  if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
1059    Offset = N.getOperand(0);
1060    SDValue N1 = N.getOperand(1);
1061    Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
1062                                      SDLoc(N), MVT::i32);
1063    return true;
1064  }
1065
1066  return false;
1067}
1068
1069
1070//===----------------------------------------------------------------------===//
1071//                         Thumb Addressing Modes
1072//===----------------------------------------------------------------------===//
1073
1074static bool shouldUseZeroOffsetLdSt(SDValue N) {
1075  // Negative numbers are difficult to materialise in thumb1. If we are
1076  // selecting the add of a negative, instead try to select ri with a zero
1077  // offset, so create the add node directly which will become a sub.
1078  if (N.getOpcode() != ISD::ADD)
1079    return false;
1080
1081  // Look for an imm which is not legal for ld/st, but is legal for sub.
1082  if (auto C = dyn_cast<ConstantSDNode>(N.getOperand(1)))
1083    return C->getSExtValue() < 0 && C->getSExtValue() >= -255;
1084
1085  return false;
1086}
1087
1088bool ARMDAGToDAGISel::SelectThumbAddrModeRRSext(SDValue N, SDValue &Base,
1089                                                SDValue &Offset) {
1090  if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) {
1091    ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N);
1092    if (!NC || !NC->isNullValue())
1093      return false;
1094
1095    Base = Offset = N;
1096    return true;
1097  }
1098
1099  Base = N.getOperand(0);
1100  Offset = N.getOperand(1);
1101  return true;
1102}
1103
1104bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N, SDValue &Base,
1105                                            SDValue &Offset) {
1106  if (shouldUseZeroOffsetLdSt(N))
1107    return false; // Select ri instead
1108  return SelectThumbAddrModeRRSext(N, Base, Offset);
1109}
1110
1111bool
1112ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
1113                                          SDValue &Base, SDValue &OffImm) {
1114  if (shouldUseZeroOffsetLdSt(N)) {
1115    Base = N;
1116    OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1117    return true;
1118  }
1119
1120  if (!CurDAG->isBaseWithConstantOffset(N)) {
1121    if (N.getOpcode() == ISD::ADD) {
1122      return false; // We want to select register offset instead
1123    } else if (N.getOpcode() == ARMISD::Wrapper &&
1124        N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1125        N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1126        N.getOperand(0).getOpcode() != ISD::TargetConstantPool &&
1127        N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1128      Base = N.getOperand(0);
1129    } else {
1130      Base = N;
1131    }
1132
1133    OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1134    return true;
1135  }
1136
1137  // If the RHS is + imm5 * scale, fold into addr mode.
1138  int RHSC;
1139  if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) {
1140    Base = N.getOperand(0);
1141    OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1142    return true;
1143  }
1144
1145  // Offset is too large, so use register offset instead.
1146  return false;
1147}
1148
1149bool
1150ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
1151                                           SDValue &OffImm) {
1152  return SelectThumbAddrModeImm5S(N, 4, Base, OffImm);
1153}
1154
1155bool
1156ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
1157                                           SDValue &OffImm) {
1158  return SelectThumbAddrModeImm5S(N, 2, Base, OffImm);
1159}
1160
1161bool
1162ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
1163                                           SDValue &OffImm) {
1164  return SelectThumbAddrModeImm5S(N, 1, Base, OffImm);
1165}
1166
1167bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
1168                                            SDValue &Base, SDValue &OffImm) {
1169  if (N.getOpcode() == ISD::FrameIndex) {
1170    int FI = cast<FrameIndexSDNode>(N)->getIndex();
1171    // Only multiples of 4 are allowed for the offset, so the frame object
1172    // alignment must be at least 4.
1173    MachineFrameInfo &MFI = MF->getFrameInfo();
1174    if (MFI.getObjectAlignment(FI) < 4)
1175      MFI.setObjectAlignment(FI, 4);
1176    Base = CurDAG->getTargetFrameIndex(
1177        FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1178    OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1179    return true;
1180  }
1181
1182  if (!CurDAG->isBaseWithConstantOffset(N))
1183    return false;
1184
1185  if (N.getOperand(0).getOpcode() == ISD::FrameIndex) {
1186    // If the RHS is + imm8 * scale, fold into addr mode.
1187    int RHSC;
1188    if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) {
1189      Base = N.getOperand(0);
1190      int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1191      // Make sure the offset is inside the object, or we might fail to
1192      // allocate an emergency spill slot. (An out-of-range access is UB, but
1193      // it could show up anyway.)
1194      MachineFrameInfo &MFI = MF->getFrameInfo();
1195      if (RHSC * 4 < MFI.getObjectSize(FI)) {
1196        // For LHS+RHS to result in an offset that's a multiple of 4 the object
1197        // indexed by the LHS must be 4-byte aligned.
1198        if (!MFI.isFixedObjectIndex(FI) && MFI.getObjectAlignment(FI) < 4)
1199          MFI.setObjectAlignment(FI, 4);
1200        if (MFI.getObjectAlignment(FI) >= 4) {
1201          Base = CurDAG->getTargetFrameIndex(
1202              FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1203          OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1204          return true;
1205        }
1206      }
1207    }
1208  }
1209
1210  return false;
1211}
1212
1213template <unsigned Shift>
1214bool ARMDAGToDAGISel::SelectTAddrModeImm7(SDValue N, SDValue &Base,
1215                                          SDValue &OffImm) {
1216  if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) {
1217    int RHSC;
1218    if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80,
1219                                RHSC)) {
1220      Base = N.getOperand(0);
1221      if (N.getOpcode() == ISD::SUB)
1222        RHSC = -RHSC;
1223      OffImm =
1224          CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32);
1225      return true;
1226    }
1227  }
1228
1229  // Base only.
1230  Base = N;
1231  OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1232  return true;
1233}
1234
1235
1236//===----------------------------------------------------------------------===//
1237//                        Thumb 2 Addressing Modes
1238//===----------------------------------------------------------------------===//
1239
1240
1241bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
1242                                            SDValue &Base, SDValue &OffImm) {
1243  // Match simple R + imm12 operands.
1244
1245  // Base only.
1246  if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1247      !CurDAG->isBaseWithConstantOffset(N)) {
1248    if (N.getOpcode() == ISD::FrameIndex) {
1249      // Match frame index.
1250      int FI = cast<FrameIndexSDNode>(N)->getIndex();
1251      Base = CurDAG->getTargetFrameIndex(
1252          FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1253      OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1254      return true;
1255    }
1256
1257    if (N.getOpcode() == ARMISD::Wrapper &&
1258        N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1259        N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1260        N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1261      Base = N.getOperand(0);
1262      if (Base.getOpcode() == ISD::TargetConstantPool)
1263        return false;  // We want to select t2LDRpci instead.
1264    } else
1265      Base = N;
1266    OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1267    return true;
1268  }
1269
1270  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1271    if (SelectT2AddrModeImm8(N, Base, OffImm))
1272      // Let t2LDRi8 handle (R - imm8).
1273      return false;
1274
1275    int RHSC = (int)RHS->getZExtValue();
1276    if (N.getOpcode() == ISD::SUB)
1277      RHSC = -RHSC;
1278
1279    if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
1280      Base   = N.getOperand(0);
1281      if (Base.getOpcode() == ISD::FrameIndex) {
1282        int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1283        Base = CurDAG->getTargetFrameIndex(
1284            FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1285      }
1286      OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1287      return true;
1288    }
1289  }
1290
1291  // Base only.
1292  Base = N;
1293  OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1294  return true;
1295}
1296
1297bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
1298                                           SDValue &Base, SDValue &OffImm) {
1299  // Match simple R - imm8 operands.
1300  if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1301      !CurDAG->isBaseWithConstantOffset(N))
1302    return false;
1303
1304  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1305    int RHSC = (int)RHS->getSExtValue();
1306    if (N.getOpcode() == ISD::SUB)
1307      RHSC = -RHSC;
1308
1309    if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative)
1310      Base = N.getOperand(0);
1311      if (Base.getOpcode() == ISD::FrameIndex) {
1312        int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1313        Base = CurDAG->getTargetFrameIndex(
1314            FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1315      }
1316      OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1317      return true;
1318    }
1319  }
1320
1321  return false;
1322}
1323
1324bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
1325                                                 SDValue &OffImm){
1326  unsigned Opcode = Op->getOpcode();
1327  ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1328    ? cast<LoadSDNode>(Op)->getAddressingMode()
1329    : cast<StoreSDNode>(Op)->getAddressingMode();
1330  int RHSC;
1331  if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits.
1332    OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1333      ? CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32)
1334      : CurDAG->getTargetConstant(-RHSC, SDLoc(N), MVT::i32);
1335    return true;
1336  }
1337
1338  return false;
1339}
1340
1341template <unsigned Shift>
1342bool ARMDAGToDAGISel::SelectT2AddrModeImm7(SDValue N, SDValue &Base,
1343                                           SDValue &OffImm) {
1344  if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) {
1345    int RHSC;
1346    if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80,
1347                                RHSC)) {
1348      Base = N.getOperand(0);
1349      if (Base.getOpcode() == ISD::FrameIndex) {
1350        int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1351        Base = CurDAG->getTargetFrameIndex(
1352            FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1353      }
1354
1355      if (N.getOpcode() == ISD::SUB)
1356        RHSC = -RHSC;
1357      OffImm =
1358          CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32);
1359      return true;
1360    }
1361  }
1362
1363  // Base only.
1364  Base = N;
1365  OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1366  return true;
1367}
1368
1369template <unsigned Shift>
1370bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N,
1371                                                 SDValue &OffImm) {
1372  return SelectT2AddrModeImm7Offset(Op, N, OffImm, Shift);
1373}
1374
1375bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N,
1376                                                 SDValue &OffImm,
1377                                                 unsigned Shift) {
1378  unsigned Opcode = Op->getOpcode();
1379  ISD::MemIndexedMode AM;
1380  switch (Opcode) {
1381  case ISD::LOAD:
1382    AM = cast<LoadSDNode>(Op)->getAddressingMode();
1383    break;
1384  case ISD::STORE:
1385    AM = cast<StoreSDNode>(Op)->getAddressingMode();
1386    break;
1387  case ISD::MLOAD:
1388    AM = cast<MaskedLoadSDNode>(Op)->getAddressingMode();
1389    break;
1390  case ISD::MSTORE:
1391    AM = cast<MaskedStoreSDNode>(Op)->getAddressingMode();
1392    break;
1393  default:
1394    llvm_unreachable("Unexpected Opcode for Imm7Offset");
1395  }
1396
1397  int RHSC;
1398  // 7 bit constant, shifted by Shift.
1399  if (isScaledConstantInRange(N, 1 << Shift, 0, 0x80, RHSC)) {
1400    OffImm =
1401        ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1402            ? CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32)
1403            : CurDAG->getTargetConstant(-RHSC * (1 << Shift), SDLoc(N),
1404                                        MVT::i32);
1405    return true;
1406  }
1407  return false;
1408}
1409
1410template <int Min, int Max>
1411bool ARMDAGToDAGISel::SelectImmediateInRange(SDValue N, SDValue &OffImm) {
1412  int Val;
1413  if (isScaledConstantInRange(N, 1, Min, Max, Val)) {
1414    OffImm = CurDAG->getTargetConstant(Val, SDLoc(N), MVT::i32);
1415    return true;
1416  }
1417  return false;
1418}
1419
1420bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
1421                                            SDValue &Base,
1422                                            SDValue &OffReg, SDValue &ShImm) {
1423  // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
1424  if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N))
1425    return false;
1426
1427  // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.
1428  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1429    int RHSC = (int)RHS->getZExtValue();
1430    if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned)
1431      return false;
1432    else if (RHSC < 0 && RHSC >= -255) // 8 bits
1433      return false;
1434  }
1435
1436  // Look for (R + R) or (R + (R << [1,2,3])).
1437  unsigned ShAmt = 0;
1438  Base   = N.getOperand(0);
1439  OffReg = N.getOperand(1);
1440
1441  // Swap if it is ((R << c) + R).
1442  ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode());
1443  if (ShOpcVal != ARM_AM::lsl) {
1444    ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode());
1445    if (ShOpcVal == ARM_AM::lsl)
1446      std::swap(Base, OffReg);
1447  }
1448
1449  if (ShOpcVal == ARM_AM::lsl) {
1450    // Check to see if the RHS of the shift is a constant, if not, we can't fold
1451    // it.
1452    if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) {
1453      ShAmt = Sh->getZExtValue();
1454      if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt))
1455        OffReg = OffReg.getOperand(0);
1456      else {
1457        ShAmt = 0;
1458      }
1459    }
1460  }
1461
1462  // If OffReg is a multiply-by-constant and it's profitable to extract a shift
1463  // and use it in a shifted operand do so.
1464  if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) {
1465    unsigned PowerOfTwo = 0;
1466    SDValue NewMulConst;
1467    if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) {
1468      HandleSDNode Handle(OffReg);
1469      replaceDAGValue(OffReg.getOperand(1), NewMulConst);
1470      OffReg = Handle.getValue();
1471      ShAmt = PowerOfTwo;
1472    }
1473  }
1474
1475  ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32);
1476
1477  return true;
1478}
1479
1480bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base,
1481                                                SDValue &OffImm) {
1482  // This *must* succeed since it's used for the irreplaceable ldrex and strex
1483  // instructions.
1484  Base = N;
1485  OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1486
1487  if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N))
1488    return true;
1489
1490  ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
1491  if (!RHS)
1492    return true;
1493
1494  uint32_t RHSC = (int)RHS->getZExtValue();
1495  if (RHSC > 1020 || RHSC % 4 != 0)
1496    return true;
1497
1498  Base = N.getOperand(0);
1499  if (Base.getOpcode() == ISD::FrameIndex) {
1500    int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1501    Base = CurDAG->getTargetFrameIndex(
1502        FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1503  }
1504
1505  OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32);
1506  return true;
1507}
1508
1509//===--------------------------------------------------------------------===//
1510
1511/// getAL - Returns a ARMCC::AL immediate node.
1512static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) {
1513  return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32);
1514}
1515
1516void ARMDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
1517  MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
1518  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
1519}
1520
1521bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) {
1522  LoadSDNode *LD = cast<LoadSDNode>(N);
1523  ISD::MemIndexedMode AM = LD->getAddressingMode();
1524  if (AM == ISD::UNINDEXED)
1525    return false;
1526
1527  EVT LoadedVT = LD->getMemoryVT();
1528  SDValue Offset, AMOpc;
1529  bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1530  unsigned Opcode = 0;
1531  bool Match = false;
1532  if (LoadedVT == MVT::i32 && isPre &&
1533      SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1534    Opcode = ARM::LDR_PRE_IMM;
1535    Match = true;
1536  } else if (LoadedVT == MVT::i32 && !isPre &&
1537      SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1538    Opcode = ARM::LDR_POST_IMM;
1539    Match = true;
1540  } else if (LoadedVT == MVT::i32 &&
1541      SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1542    Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG;
1543    Match = true;
1544
1545  } else if (LoadedVT == MVT::i16 &&
1546             SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1547    Match = true;
1548    Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
1549      ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
1550      : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
1551  } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) {
1552    if (LD->getExtensionType() == ISD::SEXTLOAD) {
1553      if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1554        Match = true;
1555        Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
1556      }
1557    } else {
1558      if (isPre &&
1559          SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1560        Match = true;
1561        Opcode = ARM::LDRB_PRE_IMM;
1562      } else if (!isPre &&
1563                  SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1564        Match = true;
1565        Opcode = ARM::LDRB_POST_IMM;
1566      } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1567        Match = true;
1568        Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG;
1569      }
1570    }
1571  }
1572
1573  if (Match) {
1574    if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) {
1575      SDValue Chain = LD->getChain();
1576      SDValue Base = LD->getBasePtr();
1577      SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)),
1578                       CurDAG->getRegister(0, MVT::i32), Chain };
1579      SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1580                                           MVT::Other, Ops);
1581      transferMemOperands(N, New);
1582      ReplaceNode(N, New);
1583      return true;
1584    } else {
1585      SDValue Chain = LD->getChain();
1586      SDValue Base = LD->getBasePtr();
1587      SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)),
1588                       CurDAG->getRegister(0, MVT::i32), Chain };
1589      SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1590                                           MVT::Other, Ops);
1591      transferMemOperands(N, New);
1592      ReplaceNode(N, New);
1593      return true;
1594    }
1595  }
1596
1597  return false;
1598}
1599
1600bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) {
1601  LoadSDNode *LD = cast<LoadSDNode>(N);
1602  EVT LoadedVT = LD->getMemoryVT();
1603  ISD::MemIndexedMode AM = LD->getAddressingMode();
1604  if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD ||
1605      LoadedVT.getSimpleVT().SimpleTy != MVT::i32)
1606    return false;
1607
1608  auto *COffs = dyn_cast<ConstantSDNode>(LD->getOffset());
1609  if (!COffs || COffs->getZExtValue() != 4)
1610    return false;
1611
1612  // A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}.
1613  // The encoding of LDM is not how the rest of ISel expects a post-inc load to
1614  // look however, so we use a pseudo here and switch it for a tLDMIA_UPD after
1615  // ISel.
1616  SDValue Chain = LD->getChain();
1617  SDValue Base = LD->getBasePtr();
1618  SDValue Ops[]= { Base, getAL(CurDAG, SDLoc(N)),
1619                   CurDAG->getRegister(0, MVT::i32), Chain };
1620  SDNode *New = CurDAG->getMachineNode(ARM::tLDR_postidx, SDLoc(N), MVT::i32,
1621                                       MVT::i32, MVT::Other, Ops);
1622  transferMemOperands(N, New);
1623  ReplaceNode(N, New);
1624  return true;
1625}
1626
1627bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) {
1628  LoadSDNode *LD = cast<LoadSDNode>(N);
1629  ISD::MemIndexedMode AM = LD->getAddressingMode();
1630  if (AM == ISD::UNINDEXED)
1631    return false;
1632
1633  EVT LoadedVT = LD->getMemoryVT();
1634  bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1635  SDValue Offset;
1636  bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1637  unsigned Opcode = 0;
1638  bool Match = false;
1639  if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) {
1640    switch (LoadedVT.getSimpleVT().SimpleTy) {
1641    case MVT::i32:
1642      Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST;
1643      break;
1644    case MVT::i16:
1645      if (isSExtLd)
1646        Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST;
1647      else
1648        Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST;
1649      break;
1650    case MVT::i8:
1651    case MVT::i1:
1652      if (isSExtLd)
1653        Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST;
1654      else
1655        Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST;
1656      break;
1657    default:
1658      return false;
1659    }
1660    Match = true;
1661  }
1662
1663  if (Match) {
1664    SDValue Chain = LD->getChain();
1665    SDValue Base = LD->getBasePtr();
1666    SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)),
1667                     CurDAG->getRegister(0, MVT::i32), Chain };
1668    SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1669                                         MVT::Other, Ops);
1670    transferMemOperands(N, New);
1671    ReplaceNode(N, New);
1672    return true;
1673  }
1674
1675  return false;
1676}
1677
1678bool ARMDAGToDAGISel::tryMVEIndexedLoad(SDNode *N) {
1679  EVT LoadedVT;
1680  unsigned Opcode = 0;
1681  bool isSExtLd, isPre;
1682  unsigned Align;
1683  ARMVCC::VPTCodes Pred;
1684  SDValue PredReg;
1685  SDValue Chain, Base, Offset;
1686
1687  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
1688    ISD::MemIndexedMode AM = LD->getAddressingMode();
1689    if (AM == ISD::UNINDEXED)
1690      return false;
1691    LoadedVT = LD->getMemoryVT();
1692    if (!LoadedVT.isVector())
1693      return false;
1694
1695    Chain = LD->getChain();
1696    Base = LD->getBasePtr();
1697    Offset = LD->getOffset();
1698    Align = LD->getAlignment();
1699    isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1700    isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1701    Pred = ARMVCC::None;
1702    PredReg = CurDAG->getRegister(0, MVT::i32);
1703  } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
1704    ISD::MemIndexedMode AM = LD->getAddressingMode();
1705    if (AM == ISD::UNINDEXED)
1706      return false;
1707    LoadedVT = LD->getMemoryVT();
1708    if (!LoadedVT.isVector())
1709      return false;
1710
1711    Chain = LD->getChain();
1712    Base = LD->getBasePtr();
1713    Offset = LD->getOffset();
1714    Align = LD->getAlignment();
1715    isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1716    isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1717    Pred = ARMVCC::Then;
1718    PredReg = LD->getMask();
1719  } else
1720    llvm_unreachable("Expected a Load or a Masked Load!");
1721
1722  // We allow LE non-masked loads to change the type (for example use a vldrb.8
1723  // as opposed to a vldrw.32). This can allow extra addressing modes or
1724  // alignments for what is otherwise an equivalent instruction.
1725  bool CanChangeType = Subtarget->isLittle() && !isa<MaskedLoadSDNode>(N);
1726
1727  SDValue NewOffset;
1728  if (Align >= 2 && LoadedVT == MVT::v4i16 &&
1729      SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1)) {
1730    if (isSExtLd)
1731      Opcode = isPre ? ARM::MVE_VLDRHS32_pre : ARM::MVE_VLDRHS32_post;
1732    else
1733      Opcode = isPre ? ARM::MVE_VLDRHU32_pre : ARM::MVE_VLDRHU32_post;
1734  } else if (LoadedVT == MVT::v8i8 &&
1735             SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) {
1736    if (isSExtLd)
1737      Opcode = isPre ? ARM::MVE_VLDRBS16_pre : ARM::MVE_VLDRBS16_post;
1738    else
1739      Opcode = isPre ? ARM::MVE_VLDRBU16_pre : ARM::MVE_VLDRBU16_post;
1740  } else if (LoadedVT == MVT::v4i8 &&
1741             SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) {
1742    if (isSExtLd)
1743      Opcode = isPre ? ARM::MVE_VLDRBS32_pre : ARM::MVE_VLDRBS32_post;
1744    else
1745      Opcode = isPre ? ARM::MVE_VLDRBU32_pre : ARM::MVE_VLDRBU32_post;
1746  } else if (Align >= 4 &&
1747             (CanChangeType || LoadedVT == MVT::v4i32 ||
1748              LoadedVT == MVT::v4f32) &&
1749             SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 2))
1750    Opcode = isPre ? ARM::MVE_VLDRWU32_pre : ARM::MVE_VLDRWU32_post;
1751  else if (Align >= 2 &&
1752           (CanChangeType || LoadedVT == MVT::v8i16 ||
1753            LoadedVT == MVT::v8f16) &&
1754           SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1))
1755    Opcode = isPre ? ARM::MVE_VLDRHU16_pre : ARM::MVE_VLDRHU16_post;
1756  else if ((CanChangeType || LoadedVT == MVT::v16i8) &&
1757           SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0))
1758    Opcode = isPre ? ARM::MVE_VLDRBU8_pre : ARM::MVE_VLDRBU8_post;
1759  else
1760    return false;
1761
1762  SDValue Ops[] = {Base, NewOffset,
1763                   CurDAG->getTargetConstant(Pred, SDLoc(N), MVT::i32), PredReg,
1764                   Chain};
1765  SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), N->getValueType(0),
1766                                       MVT::i32, MVT::Other, Ops);
1767  transferMemOperands(N, New);
1768  ReplaceUses(SDValue(N, 0), SDValue(New, 1));
1769  ReplaceUses(SDValue(N, 1), SDValue(New, 0));
1770  ReplaceUses(SDValue(N, 2), SDValue(New, 2));
1771  CurDAG->RemoveDeadNode(N);
1772  return true;
1773}
1774
1775/// Form a GPRPair pseudo register from a pair of GPR regs.
1776SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
1777  SDLoc dl(V0.getNode());
1778  SDValue RegClass =
1779    CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
1780  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
1781  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
1782  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1783  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1784}
1785
1786/// Form a D register from a pair of S registers.
1787SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1788  SDLoc dl(V0.getNode());
1789  SDValue RegClass =
1790    CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32);
1791  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1792  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1793  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1794  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1795}
1796
1797/// Form a quad register from a pair of D registers.
1798SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1799  SDLoc dl(V0.getNode());
1800  SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl,
1801                                               MVT::i32);
1802  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1803  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1804  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1805  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1806}
1807
1808/// Form 4 consecutive D registers from a pair of Q registers.
1809SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1810  SDLoc dl(V0.getNode());
1811  SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1812                                               MVT::i32);
1813  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1814  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1815  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1816  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1817}
1818
1819/// Form 4 consecutive S registers.
1820SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
1821                                   SDValue V2, SDValue V3) {
1822  SDLoc dl(V0.getNode());
1823  SDValue RegClass =
1824    CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32);
1825  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1826  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1827  SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32);
1828  SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32);
1829  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1830                                    V2, SubReg2, V3, SubReg3 };
1831  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1832}
1833
1834/// Form 4 consecutive D registers.
1835SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
1836                                   SDValue V2, SDValue V3) {
1837  SDLoc dl(V0.getNode());
1838  SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1839                                               MVT::i32);
1840  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1841  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1842  SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32);
1843  SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32);
1844  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1845                                    V2, SubReg2, V3, SubReg3 };
1846  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1847}
1848
1849/// Form 4 consecutive Q registers.
1850SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
1851                                   SDValue V2, SDValue V3) {
1852  SDLoc dl(V0.getNode());
1853  SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl,
1854                                               MVT::i32);
1855  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1856  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1857  SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32);
1858  SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32);
1859  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1860                                    V2, SubReg2, V3, SubReg3 };
1861  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1862}
1863
1864/// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
1865/// of a NEON VLD or VST instruction.  The supported values depend on the
1866/// number of registers being loaded.
1867SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, const SDLoc &dl,
1868                                       unsigned NumVecs, bool is64BitVector) {
1869  unsigned NumRegs = NumVecs;
1870  if (!is64BitVector && NumVecs < 3)
1871    NumRegs *= 2;
1872
1873  unsigned Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
1874  if (Alignment >= 32 && NumRegs == 4)
1875    Alignment = 32;
1876  else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4))
1877    Alignment = 16;
1878  else if (Alignment >= 8)
1879    Alignment = 8;
1880  else
1881    Alignment = 0;
1882
1883  return CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
1884}
1885
1886static bool isVLDfixed(unsigned Opc)
1887{
1888  switch (Opc) {
1889  default: return false;
1890  case ARM::VLD1d8wb_fixed : return true;
1891  case ARM::VLD1d16wb_fixed : return true;
1892  case ARM::VLD1d64Qwb_fixed : return true;
1893  case ARM::VLD1d32wb_fixed : return true;
1894  case ARM::VLD1d64wb_fixed : return true;
1895  case ARM::VLD1d64TPseudoWB_fixed : return true;
1896  case ARM::VLD1d64QPseudoWB_fixed : return true;
1897  case ARM::VLD1q8wb_fixed : return true;
1898  case ARM::VLD1q16wb_fixed : return true;
1899  case ARM::VLD1q32wb_fixed : return true;
1900  case ARM::VLD1q64wb_fixed : return true;
1901  case ARM::VLD1DUPd8wb_fixed : return true;
1902  case ARM::VLD1DUPd16wb_fixed : return true;
1903  case ARM::VLD1DUPd32wb_fixed : return true;
1904  case ARM::VLD1DUPq8wb_fixed : return true;
1905  case ARM::VLD1DUPq16wb_fixed : return true;
1906  case ARM::VLD1DUPq32wb_fixed : return true;
1907  case ARM::VLD2d8wb_fixed : return true;
1908  case ARM::VLD2d16wb_fixed : return true;
1909  case ARM::VLD2d32wb_fixed : return true;
1910  case ARM::VLD2q8PseudoWB_fixed : return true;
1911  case ARM::VLD2q16PseudoWB_fixed : return true;
1912  case ARM::VLD2q32PseudoWB_fixed : return true;
1913  case ARM::VLD2DUPd8wb_fixed : return true;
1914  case ARM::VLD2DUPd16wb_fixed : return true;
1915  case ARM::VLD2DUPd32wb_fixed : return true;
1916  }
1917}
1918
1919static bool isVSTfixed(unsigned Opc)
1920{
1921  switch (Opc) {
1922  default: return false;
1923  case ARM::VST1d8wb_fixed : return true;
1924  case ARM::VST1d16wb_fixed : return true;
1925  case ARM::VST1d32wb_fixed : return true;
1926  case ARM::VST1d64wb_fixed : return true;
1927  case ARM::VST1q8wb_fixed : return true;
1928  case ARM::VST1q16wb_fixed : return true;
1929  case ARM::VST1q32wb_fixed : return true;
1930  case ARM::VST1q64wb_fixed : return true;
1931  case ARM::VST1d64TPseudoWB_fixed : return true;
1932  case ARM::VST1d64QPseudoWB_fixed : return true;
1933  case ARM::VST2d8wb_fixed : return true;
1934  case ARM::VST2d16wb_fixed : return true;
1935  case ARM::VST2d32wb_fixed : return true;
1936  case ARM::VST2q8PseudoWB_fixed : return true;
1937  case ARM::VST2q16PseudoWB_fixed : return true;
1938  case ARM::VST2q32PseudoWB_fixed : return true;
1939  }
1940}
1941
1942// Get the register stride update opcode of a VLD/VST instruction that
1943// is otherwise equivalent to the given fixed stride updating instruction.
1944static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
1945  assert((isVLDfixed(Opc) || isVSTfixed(Opc))
1946    && "Incorrect fixed stride updating instruction.");
1947  switch (Opc) {
1948  default: break;
1949  case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register;
1950  case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register;
1951  case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register;
1952  case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register;
1953  case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register;
1954  case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register;
1955  case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register;
1956  case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register;
1957  case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register;
1958  case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register;
1959  case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register;
1960  case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register;
1961  case ARM::VLD1DUPd8wb_fixed : return ARM::VLD1DUPd8wb_register;
1962  case ARM::VLD1DUPd16wb_fixed : return ARM::VLD1DUPd16wb_register;
1963  case ARM::VLD1DUPd32wb_fixed : return ARM::VLD1DUPd32wb_register;
1964  case ARM::VLD1DUPq8wb_fixed : return ARM::VLD1DUPq8wb_register;
1965  case ARM::VLD1DUPq16wb_fixed : return ARM::VLD1DUPq16wb_register;
1966  case ARM::VLD1DUPq32wb_fixed : return ARM::VLD1DUPq32wb_register;
1967
1968  case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register;
1969  case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register;
1970  case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register;
1971  case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register;
1972  case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register;
1973  case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register;
1974  case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register;
1975  case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register;
1976  case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register;
1977  case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register;
1978
1979  case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register;
1980  case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register;
1981  case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register;
1982  case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register;
1983  case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register;
1984  case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register;
1985
1986  case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register;
1987  case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register;
1988  case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register;
1989  case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register;
1990  case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
1991  case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
1992
1993  case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register;
1994  case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register;
1995  case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register;
1996  }
1997  return Opc; // If not one we handle, return it unchanged.
1998}
1999
2000/// Returns true if the given increment is a Constant known to be equal to the
2001/// access size performed by a NEON load/store. This means the "[rN]!" form can
2002/// be used.
2003static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs) {
2004  auto C = dyn_cast<ConstantSDNode>(Inc);
2005  return C && C->getZExtValue() == VecTy.getSizeInBits() / 8 * NumVecs;
2006}
2007
2008void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
2009                                const uint16_t *DOpcodes,
2010                                const uint16_t *QOpcodes0,
2011                                const uint16_t *QOpcodes1) {
2012  assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
2013  SDLoc dl(N);
2014
2015  SDValue MemAddr, Align;
2016  bool IsIntrinsic = !isUpdating;  // By coincidence, all supported updating
2017                                   // nodes are not intrinsics.
2018  unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2019  if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2020    return;
2021
2022  SDValue Chain = N->getOperand(0);
2023  EVT VT = N->getValueType(0);
2024  bool is64BitVector = VT.is64BitVector();
2025  Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
2026
2027  unsigned OpcodeIndex;
2028  switch (VT.getSimpleVT().SimpleTy) {
2029  default: llvm_unreachable("unhandled vld type");
2030    // Double-register operations:
2031  case MVT::v8i8:  OpcodeIndex = 0; break;
2032  case MVT::v4f16:
2033  case MVT::v4i16: OpcodeIndex = 1; break;
2034  case MVT::v2f32:
2035  case MVT::v2i32: OpcodeIndex = 2; break;
2036  case MVT::v1i64: OpcodeIndex = 3; break;
2037    // Quad-register operations:
2038  case MVT::v16i8: OpcodeIndex = 0; break;
2039  case MVT::v8f16:
2040  case MVT::v8i16: OpcodeIndex = 1; break;
2041  case MVT::v4f32:
2042  case MVT::v4i32: OpcodeIndex = 2; break;
2043  case MVT::v2f64:
2044  case MVT::v2i64: OpcodeIndex = 3; break;
2045  }
2046
2047  EVT ResTy;
2048  if (NumVecs == 1)
2049    ResTy = VT;
2050  else {
2051    unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2052    if (!is64BitVector)
2053      ResTyElts *= 2;
2054    ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
2055  }
2056  std::vector<EVT> ResTys;
2057  ResTys.push_back(ResTy);
2058  if (isUpdating)
2059    ResTys.push_back(MVT::i32);
2060  ResTys.push_back(MVT::Other);
2061
2062  SDValue Pred = getAL(CurDAG, dl);
2063  SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2064  SDNode *VLd;
2065  SmallVector<SDValue, 7> Ops;
2066
2067  // Double registers and VLD1/VLD2 quad registers are directly supported.
2068  if (is64BitVector || NumVecs <= 2) {
2069    unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2070                    QOpcodes0[OpcodeIndex]);
2071    Ops.push_back(MemAddr);
2072    Ops.push_back(Align);
2073    if (isUpdating) {
2074      SDValue Inc = N->getOperand(AddrOpIdx + 1);
2075      bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
2076      if (!IsImmUpdate) {
2077        // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
2078        // check for the opcode rather than the number of vector elements.
2079        if (isVLDfixed(Opc))
2080          Opc = getVLDSTRegisterUpdateOpcode(Opc);
2081        Ops.push_back(Inc);
2082      // VLD1/VLD2 fixed increment does not need Reg0 so only include it in
2083      // the operands if not such an opcode.
2084      } else if (!isVLDfixed(Opc))
2085        Ops.push_back(Reg0);
2086    }
2087    Ops.push_back(Pred);
2088    Ops.push_back(Reg0);
2089    Ops.push_back(Chain);
2090    VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2091
2092  } else {
2093    // Otherwise, quad registers are loaded with two separate instructions,
2094    // where one loads the even registers and the other loads the odd registers.
2095    EVT AddrTy = MemAddr.getValueType();
2096
2097    // Load the even subregs.  This is always an updating load, so that it
2098    // provides the address to the second load for the odd subregs.
2099    SDValue ImplDef =
2100      SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
2101    const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
2102    SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
2103                                          ResTy, AddrTy, MVT::Other, OpsA);
2104    Chain = SDValue(VLdA, 2);
2105
2106    // Load the odd subregs.
2107    Ops.push_back(SDValue(VLdA, 1));
2108    Ops.push_back(Align);
2109    if (isUpdating) {
2110      SDValue Inc = N->getOperand(AddrOpIdx + 1);
2111      assert(isa<ConstantSDNode>(Inc.getNode()) &&
2112             "only constant post-increment update allowed for VLD3/4");
2113      (void)Inc;
2114      Ops.push_back(Reg0);
2115    }
2116    Ops.push_back(SDValue(VLdA, 0));
2117    Ops.push_back(Pred);
2118    Ops.push_back(Reg0);
2119    Ops.push_back(Chain);
2120    VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops);
2121  }
2122
2123  // Transfer memoperands.
2124  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2125  CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLd), {MemOp});
2126
2127  if (NumVecs == 1) {
2128    ReplaceNode(N, VLd);
2129    return;
2130  }
2131
2132  // Extract out the subregisters.
2133  SDValue SuperReg = SDValue(VLd, 0);
2134  static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
2135                    ARM::qsub_3 == ARM::qsub_0 + 3,
2136                "Unexpected subreg numbering");
2137  unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0);
2138  for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2139    ReplaceUses(SDValue(N, Vec),
2140                CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
2141  ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
2142  if (isUpdating)
2143    ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
2144  CurDAG->RemoveDeadNode(N);
2145}
2146
2147void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
2148                                const uint16_t *DOpcodes,
2149                                const uint16_t *QOpcodes0,
2150                                const uint16_t *QOpcodes1) {
2151  assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
2152  SDLoc dl(N);
2153
2154  SDValue MemAddr, Align;
2155  bool IsIntrinsic = !isUpdating;  // By coincidence, all supported updating
2156                                   // nodes are not intrinsics.
2157  unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2158  unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2159  if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2160    return;
2161
2162  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2163
2164  SDValue Chain = N->getOperand(0);
2165  EVT VT = N->getOperand(Vec0Idx).getValueType();
2166  bool is64BitVector = VT.is64BitVector();
2167  Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
2168
2169  unsigned OpcodeIndex;
2170  switch (VT.getSimpleVT().SimpleTy) {
2171  default: llvm_unreachable("unhandled vst type");
2172    // Double-register operations:
2173  case MVT::v8i8:  OpcodeIndex = 0; break;
2174  case MVT::v4f16:
2175  case MVT::v4i16: OpcodeIndex = 1; break;
2176  case MVT::v2f32:
2177  case MVT::v2i32: OpcodeIndex = 2; break;
2178  case MVT::v1i64: OpcodeIndex = 3; break;
2179    // Quad-register operations:
2180  case MVT::v16i8: OpcodeIndex = 0; break;
2181  case MVT::v8f16:
2182  case MVT::v8i16: OpcodeIndex = 1; break;
2183  case MVT::v4f32:
2184  case MVT::v4i32: OpcodeIndex = 2; break;
2185  case MVT::v2f64:
2186  case MVT::v2i64: OpcodeIndex = 3; break;
2187  }
2188
2189  std::vector<EVT> ResTys;
2190  if (isUpdating)
2191    ResTys.push_back(MVT::i32);
2192  ResTys.push_back(MVT::Other);
2193
2194  SDValue Pred = getAL(CurDAG, dl);
2195  SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2196  SmallVector<SDValue, 7> Ops;
2197
2198  // Double registers and VST1/VST2 quad registers are directly supported.
2199  if (is64BitVector || NumVecs <= 2) {
2200    SDValue SrcReg;
2201    if (NumVecs == 1) {
2202      SrcReg = N->getOperand(Vec0Idx);
2203    } else if (is64BitVector) {
2204      // Form a REG_SEQUENCE to force register allocation.
2205      SDValue V0 = N->getOperand(Vec0Idx + 0);
2206      SDValue V1 = N->getOperand(Vec0Idx + 1);
2207      if (NumVecs == 2)
2208        SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2209      else {
2210        SDValue V2 = N->getOperand(Vec0Idx + 2);
2211        // If it's a vst3, form a quad D-register and leave the last part as
2212        // an undef.
2213        SDValue V3 = (NumVecs == 3)
2214          ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
2215          : N->getOperand(Vec0Idx + 3);
2216        SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2217      }
2218    } else {
2219      // Form a QQ register.
2220      SDValue Q0 = N->getOperand(Vec0Idx);
2221      SDValue Q1 = N->getOperand(Vec0Idx + 1);
2222      SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0);
2223    }
2224
2225    unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2226                    QOpcodes0[OpcodeIndex]);
2227    Ops.push_back(MemAddr);
2228    Ops.push_back(Align);
2229    if (isUpdating) {
2230      SDValue Inc = N->getOperand(AddrOpIdx + 1);
2231      bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
2232      if (!IsImmUpdate) {
2233        // We use a VST1 for v1i64 even if the pseudo says VST2/3/4, so
2234        // check for the opcode rather than the number of vector elements.
2235        if (isVSTfixed(Opc))
2236          Opc = getVLDSTRegisterUpdateOpcode(Opc);
2237        Ops.push_back(Inc);
2238      }
2239      // VST1/VST2 fixed increment does not need Reg0 so only include it in
2240      // the operands if not such an opcode.
2241      else if (!isVSTfixed(Opc))
2242        Ops.push_back(Reg0);
2243    }
2244    Ops.push_back(SrcReg);
2245    Ops.push_back(Pred);
2246    Ops.push_back(Reg0);
2247    Ops.push_back(Chain);
2248    SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2249
2250    // Transfer memoperands.
2251    CurDAG->setNodeMemRefs(cast<MachineSDNode>(VSt), {MemOp});
2252
2253    ReplaceNode(N, VSt);
2254    return;
2255  }
2256
2257  // Otherwise, quad registers are stored with two separate instructions,
2258  // where one stores the even registers and the other stores the odd registers.
2259
2260  // Form the QQQQ REG_SEQUENCE.
2261  SDValue V0 = N->getOperand(Vec0Idx + 0);
2262  SDValue V1 = N->getOperand(Vec0Idx + 1);
2263  SDValue V2 = N->getOperand(Vec0Idx + 2);
2264  SDValue V3 = (NumVecs == 3)
2265    ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2266    : N->getOperand(Vec0Idx + 3);
2267  SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2268
2269  // Store the even D registers.  This is always an updating store, so that it
2270  // provides the address to the second store for the odd subregs.
2271  const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
2272  SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
2273                                        MemAddr.getValueType(),
2274                                        MVT::Other, OpsA);
2275  CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStA), {MemOp});
2276  Chain = SDValue(VStA, 1);
2277
2278  // Store the odd D registers.
2279  Ops.push_back(SDValue(VStA, 0));
2280  Ops.push_back(Align);
2281  if (isUpdating) {
2282    SDValue Inc = N->getOperand(AddrOpIdx + 1);
2283    assert(isa<ConstantSDNode>(Inc.getNode()) &&
2284           "only constant post-increment update allowed for VST3/4");
2285    (void)Inc;
2286    Ops.push_back(Reg0);
2287  }
2288  Ops.push_back(RegSeq);
2289  Ops.push_back(Pred);
2290  Ops.push_back(Reg0);
2291  Ops.push_back(Chain);
2292  SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
2293                                        Ops);
2294  CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStB), {MemOp});
2295  ReplaceNode(N, VStB);
2296}
2297
2298void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
2299                                      unsigned NumVecs,
2300                                      const uint16_t *DOpcodes,
2301                                      const uint16_t *QOpcodes) {
2302  assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
2303  SDLoc dl(N);
2304
2305  SDValue MemAddr, Align;
2306  bool IsIntrinsic = !isUpdating;  // By coincidence, all supported updating
2307                                   // nodes are not intrinsics.
2308  unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2309  unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2310  if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2311    return;
2312
2313  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2314
2315  SDValue Chain = N->getOperand(0);
2316  unsigned Lane =
2317    cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue();
2318  EVT VT = N->getOperand(Vec0Idx).getValueType();
2319  bool is64BitVector = VT.is64BitVector();
2320
2321  unsigned Alignment = 0;
2322  if (NumVecs != 3) {
2323    Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2324    unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2325    if (Alignment > NumBytes)
2326      Alignment = NumBytes;
2327    if (Alignment < 8 && Alignment < NumBytes)
2328      Alignment = 0;
2329    // Alignment must be a power of two; make sure of that.
2330    Alignment = (Alignment & -Alignment);
2331    if (Alignment == 1)
2332      Alignment = 0;
2333  }
2334  Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2335
2336  unsigned OpcodeIndex;
2337  switch (VT.getSimpleVT().SimpleTy) {
2338  default: llvm_unreachable("unhandled vld/vst lane type");
2339    // Double-register operations:
2340  case MVT::v8i8:  OpcodeIndex = 0; break;
2341  case MVT::v4f16:
2342  case MVT::v4i16: OpcodeIndex = 1; break;
2343  case MVT::v2f32:
2344  case MVT::v2i32: OpcodeIndex = 2; break;
2345    // Quad-register operations:
2346  case MVT::v8f16:
2347  case MVT::v8i16: OpcodeIndex = 0; break;
2348  case MVT::v4f32:
2349  case MVT::v4i32: OpcodeIndex = 1; break;
2350  }
2351
2352  std::vector<EVT> ResTys;
2353  if (IsLoad) {
2354    unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2355    if (!is64BitVector)
2356      ResTyElts *= 2;
2357    ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(),
2358                                      MVT::i64, ResTyElts));
2359  }
2360  if (isUpdating)
2361    ResTys.push_back(MVT::i32);
2362  ResTys.push_back(MVT::Other);
2363
2364  SDValue Pred = getAL(CurDAG, dl);
2365  SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2366
2367  SmallVector<SDValue, 8> Ops;
2368  Ops.push_back(MemAddr);
2369  Ops.push_back(Align);
2370  if (isUpdating) {
2371    SDValue Inc = N->getOperand(AddrOpIdx + 1);
2372    bool IsImmUpdate =
2373        isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
2374    Ops.push_back(IsImmUpdate ? Reg0 : Inc);
2375  }
2376
2377  SDValue SuperReg;
2378  SDValue V0 = N->getOperand(Vec0Idx + 0);
2379  SDValue V1 = N->getOperand(Vec0Idx + 1);
2380  if (NumVecs == 2) {
2381    if (is64BitVector)
2382      SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2383    else
2384      SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0);
2385  } else {
2386    SDValue V2 = N->getOperand(Vec0Idx + 2);
2387    SDValue V3 = (NumVecs == 3)
2388      ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2389      : N->getOperand(Vec0Idx + 3);
2390    if (is64BitVector)
2391      SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2392    else
2393      SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2394  }
2395  Ops.push_back(SuperReg);
2396  Ops.push_back(getI32Imm(Lane, dl));
2397  Ops.push_back(Pred);
2398  Ops.push_back(Reg0);
2399  Ops.push_back(Chain);
2400
2401  unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2402                                  QOpcodes[OpcodeIndex]);
2403  SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2404  CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdLn), {MemOp});
2405  if (!IsLoad) {
2406    ReplaceNode(N, VLdLn);
2407    return;
2408  }
2409
2410  // Extract the subregisters.
2411  SuperReg = SDValue(VLdLn, 0);
2412  static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
2413                    ARM::qsub_3 == ARM::qsub_0 + 3,
2414                "Unexpected subreg numbering");
2415  unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
2416  for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2417    ReplaceUses(SDValue(N, Vec),
2418                CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
2419  ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));
2420  if (isUpdating)
2421    ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));
2422  CurDAG->RemoveDeadNode(N);
2423}
2424
2425template <typename SDValueVector>
2426void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
2427                                           SDValue PredicateMask) {
2428  Ops.push_back(CurDAG->getTargetConstant(ARMVCC::Then, Loc, MVT::i32));
2429  Ops.push_back(PredicateMask);
2430}
2431
2432template <typename SDValueVector>
2433void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
2434                                           SDValue PredicateMask,
2435                                           SDValue Inactive) {
2436  Ops.push_back(CurDAG->getTargetConstant(ARMVCC::Then, Loc, MVT::i32));
2437  Ops.push_back(PredicateMask);
2438  Ops.push_back(Inactive);
2439}
2440
2441template <typename SDValueVector>
2442void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc) {
2443  Ops.push_back(CurDAG->getTargetConstant(ARMVCC::None, Loc, MVT::i32));
2444  Ops.push_back(CurDAG->getRegister(0, MVT::i32));
2445}
2446
2447template <typename SDValueVector>
2448void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
2449                                                EVT InactiveTy) {
2450  Ops.push_back(CurDAG->getTargetConstant(ARMVCC::None, Loc, MVT::i32));
2451  Ops.push_back(CurDAG->getRegister(0, MVT::i32));
2452  Ops.push_back(SDValue(
2453      CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, Loc, InactiveTy), 0));
2454}
2455
2456void ARMDAGToDAGISel::SelectMVE_WB(SDNode *N, const uint16_t *Opcodes,
2457                                   bool Predicated) {
2458  SDLoc Loc(N);
2459  SmallVector<SDValue, 8> Ops;
2460
2461  uint16_t Opcode;
2462  switch (N->getValueType(1).getVectorElementType().getSizeInBits()) {
2463  case 32:
2464    Opcode = Opcodes[0];
2465    break;
2466  case 64:
2467    Opcode = Opcodes[1];
2468    break;
2469  default:
2470    llvm_unreachable("bad vector element size in SelectMVE_WB");
2471  }
2472
2473  Ops.push_back(N->getOperand(2)); // vector of base addresses
2474
2475  int32_t ImmValue = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
2476  Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate offset
2477
2478  if (Predicated)
2479    AddMVEPredicateToOps(Ops, Loc, N->getOperand(4));
2480  else
2481    AddEmptyMVEPredicateToOps(Ops, Loc);
2482
2483  Ops.push_back(N->getOperand(0)); // chain
2484
2485  CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops));
2486}
2487
2488void ARMDAGToDAGISel::SelectMVE_LongShift(SDNode *N, uint16_t Opcode,
2489                                          bool Immediate,
2490                                          bool HasSaturationOperand) {
2491  SDLoc Loc(N);
2492  SmallVector<SDValue, 8> Ops;
2493
2494  // Two 32-bit halves of the value to be shifted
2495  Ops.push_back(N->getOperand(1));
2496  Ops.push_back(N->getOperand(2));
2497
2498  // The shift count
2499  if (Immediate) {
2500    int32_t ImmValue = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
2501    Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate shift count
2502  } else {
2503    Ops.push_back(N->getOperand(3));
2504  }
2505
2506  // The immediate saturation operand, if any
2507  if (HasSaturationOperand) {
2508    int32_t SatOp = cast<ConstantSDNode>(N->getOperand(4))->getZExtValue();
2509    int SatBit = (SatOp == 64 ? 0 : 1);
2510    Ops.push_back(getI32Imm(SatBit, Loc));
2511  }
2512
2513  // MVE scalar shifts are IT-predicable, so include the standard
2514  // predicate arguments.
2515  Ops.push_back(getAL(CurDAG, Loc));
2516  Ops.push_back(CurDAG->getRegister(0, MVT::i32));
2517
2518  CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops));
2519}
2520
2521void ARMDAGToDAGISel::SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry,
2522                                        uint16_t OpcodeWithNoCarry,
2523                                        bool Add, bool Predicated) {
2524  SDLoc Loc(N);
2525  SmallVector<SDValue, 8> Ops;
2526  uint16_t Opcode;
2527
2528  unsigned FirstInputOp = Predicated ? 2 : 1;
2529
2530  // Two input vectors and the input carry flag
2531  Ops.push_back(N->getOperand(FirstInputOp));
2532  Ops.push_back(N->getOperand(FirstInputOp + 1));
2533  SDValue CarryIn = N->getOperand(FirstInputOp + 2);
2534  ConstantSDNode *CarryInConstant = dyn_cast<ConstantSDNode>(CarryIn);
2535  uint32_t CarryMask = 1 << 29;
2536  uint32_t CarryExpected = Add ? 0 : CarryMask;
2537  if (CarryInConstant &&
2538      (CarryInConstant->getZExtValue() & CarryMask) == CarryExpected) {
2539    Opcode = OpcodeWithNoCarry;
2540  } else {
2541    Ops.push_back(CarryIn);
2542    Opcode = OpcodeWithCarry;
2543  }
2544
2545  if (Predicated)
2546    AddMVEPredicateToOps(Ops, Loc,
2547                         N->getOperand(FirstInputOp + 3),  // predicate
2548                         N->getOperand(FirstInputOp - 1)); // inactive
2549  else
2550    AddEmptyMVEPredicateToOps(Ops, Loc, N->getValueType(0));
2551
2552  CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops));
2553}
2554
2555static bool SDValueToConstBool(SDValue SDVal) {
2556  assert(isa<ConstantSDNode>(SDVal) && "expected a compile-time constant");
2557  ConstantSDNode *SDValConstant = dyn_cast<ConstantSDNode>(SDVal);
2558  uint64_t Value = SDValConstant->getZExtValue();
2559  assert((Value == 0 || Value == 1) && "expected value 0 or 1");
2560  return Value;
2561}
2562
2563void ARMDAGToDAGISel::SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated,
2564                                            const uint16_t *OpcodesS,
2565                                            const uint16_t *OpcodesU,
2566                                            size_t Stride, size_t TySize) {
2567  assert(TySize < Stride && "Invalid TySize");
2568  bool IsUnsigned = SDValueToConstBool(N->getOperand(1));
2569  bool IsSub = SDValueToConstBool(N->getOperand(2));
2570  bool IsExchange = SDValueToConstBool(N->getOperand(3));
2571  if (IsUnsigned) {
2572    assert(!IsSub &&
2573           "Unsigned versions of vmlsldav[a]/vrmlsldavh[a] do not exist");
2574    assert(!IsExchange &&
2575           "Unsigned versions of vmlaldav[a]x/vrmlaldavh[a]x do not exist");
2576  }
2577
2578  auto OpIsZero = [N](size_t OpNo) {
2579    if (ConstantSDNode *OpConst = dyn_cast<ConstantSDNode>(N->getOperand(OpNo)))
2580      if (OpConst->getZExtValue() == 0)
2581        return true;
2582    return false;
2583  };
2584
2585  // If the input accumulator value is not zero, select an instruction with
2586  // accumulator, otherwise select an instruction without accumulator
2587  bool IsAccum = !(OpIsZero(4) && OpIsZero(5));
2588
2589  const uint16_t *Opcodes = IsUnsigned ? OpcodesU : OpcodesS;
2590  if (IsSub)
2591    Opcodes += 4 * Stride;
2592  if (IsExchange)
2593    Opcodes += 2 * Stride;
2594  if (IsAccum)
2595    Opcodes += Stride;
2596  uint16_t Opcode = Opcodes[TySize];
2597
2598  SDLoc Loc(N);
2599  SmallVector<SDValue, 8> Ops;
2600  // Push the accumulator operands, if they are used
2601  if (IsAccum) {
2602    Ops.push_back(N->getOperand(4));
2603    Ops.push_back(N->getOperand(5));
2604  }
2605  // Push the two vector operands
2606  Ops.push_back(N->getOperand(6));
2607  Ops.push_back(N->getOperand(7));
2608
2609  if (Predicated)
2610    AddMVEPredicateToOps(Ops, Loc, N->getOperand(8));
2611  else
2612    AddEmptyMVEPredicateToOps(Ops, Loc);
2613
2614  CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops));
2615}
2616
2617void ARMDAGToDAGISel::SelectMVE_VMLLDAV(SDNode *N, bool Predicated,
2618                                        const uint16_t *OpcodesS,
2619                                        const uint16_t *OpcodesU) {
2620  EVT VecTy = N->getOperand(6).getValueType();
2621  size_t SizeIndex;
2622  switch (VecTy.getVectorElementType().getSizeInBits()) {
2623  case 16:
2624    SizeIndex = 0;
2625    break;
2626  case 32:
2627    SizeIndex = 1;
2628    break;
2629  default:
2630    llvm_unreachable("bad vector element size");
2631  }
2632
2633  SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, 2, SizeIndex);
2634}
2635
2636void ARMDAGToDAGISel::SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated,
2637                                          const uint16_t *OpcodesS,
2638                                          const uint16_t *OpcodesU) {
2639  assert(
2640      N->getOperand(6).getValueType().getVectorElementType().getSizeInBits() ==
2641          32 &&
2642      "bad vector element size");
2643  SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, 1, 0);
2644}
2645
2646void ARMDAGToDAGISel::SelectMVE_VLD(SDNode *N, unsigned NumVecs,
2647                                    const uint16_t *const *Opcodes) {
2648  EVT VT = N->getValueType(0);
2649  SDLoc Loc(N);
2650
2651  const uint16_t *OurOpcodes;
2652  switch (VT.getVectorElementType().getSizeInBits()) {
2653  case 8:
2654    OurOpcodes = Opcodes[0];
2655    break;
2656  case 16:
2657    OurOpcodes = Opcodes[1];
2658    break;
2659  case 32:
2660    OurOpcodes = Opcodes[2];
2661    break;
2662  default:
2663    llvm_unreachable("bad vector element size in SelectMVE_VLD");
2664  }
2665
2666  EVT DataTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, NumVecs * 2);
2667  EVT ResultTys[] = {DataTy, MVT::Other};
2668
2669  auto Data = SDValue(
2670      CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, Loc, DataTy), 0);
2671  SDValue Chain = N->getOperand(0);
2672  for (unsigned Stage = 0; Stage < NumVecs; ++Stage) {
2673    SDValue Ops[] = {Data, N->getOperand(2), Chain};
2674    auto LoadInst =
2675        CurDAG->getMachineNode(OurOpcodes[Stage], Loc, ResultTys, Ops);
2676    Data = SDValue(LoadInst, 0);
2677    Chain = SDValue(LoadInst, 1);
2678  }
2679
2680  for (unsigned i = 0; i < NumVecs; i++)
2681    ReplaceUses(SDValue(N, i),
2682                CurDAG->getTargetExtractSubreg(ARM::qsub_0 + i, Loc, VT, Data));
2683  ReplaceUses(SDValue(N, NumVecs), Chain);
2684  CurDAG->RemoveDeadNode(N);
2685}
2686
2687void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool IsIntrinsic,
2688                                   bool isUpdating, unsigned NumVecs,
2689                                   const uint16_t *DOpcodes,
2690                                   const uint16_t *QOpcodes0,
2691                                   const uint16_t *QOpcodes1) {
2692  assert(NumVecs >= 1 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
2693  SDLoc dl(N);
2694
2695  SDValue MemAddr, Align;
2696  unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2697  if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2698    return;
2699
2700  SDValue Chain = N->getOperand(0);
2701  EVT VT = N->getValueType(0);
2702  bool is64BitVector = VT.is64BitVector();
2703
2704  unsigned Alignment = 0;
2705  if (NumVecs != 3) {
2706    Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2707    unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2708    if (Alignment > NumBytes)
2709      Alignment = NumBytes;
2710    if (Alignment < 8 && Alignment < NumBytes)
2711      Alignment = 0;
2712    // Alignment must be a power of two; make sure of that.
2713    Alignment = (Alignment & -Alignment);
2714    if (Alignment == 1)
2715      Alignment = 0;
2716  }
2717  Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2718
2719  unsigned OpcodeIndex;
2720  switch (VT.getSimpleVT().SimpleTy) {
2721  default: llvm_unreachable("unhandled vld-dup type");
2722  case MVT::v8i8:
2723  case MVT::v16i8: OpcodeIndex = 0; break;
2724  case MVT::v4i16:
2725  case MVT::v8i16:
2726  case MVT::v4f16:
2727  case MVT::v8f16:
2728                  OpcodeIndex = 1; break;
2729  case MVT::v2f32:
2730  case MVT::v2i32:
2731  case MVT::v4f32:
2732  case MVT::v4i32: OpcodeIndex = 2; break;
2733  case MVT::v1f64:
2734  case MVT::v1i64: OpcodeIndex = 3; break;
2735  }
2736
2737  unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2738  if (!is64BitVector)
2739    ResTyElts *= 2;
2740  EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
2741
2742  std::vector<EVT> ResTys;
2743  ResTys.push_back(ResTy);
2744  if (isUpdating)
2745    ResTys.push_back(MVT::i32);
2746  ResTys.push_back(MVT::Other);
2747
2748  SDValue Pred = getAL(CurDAG, dl);
2749  SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2750
2751  SDNode *VLdDup;
2752  if (is64BitVector || NumVecs == 1) {
2753    SmallVector<SDValue, 6> Ops;
2754    Ops.push_back(MemAddr);
2755    Ops.push_back(Align);
2756    unsigned Opc = is64BitVector ? DOpcodes[OpcodeIndex] :
2757                                   QOpcodes0[OpcodeIndex];
2758    if (isUpdating) {
2759      // fixed-stride update instructions don't have an explicit writeback
2760      // operand. It's implicit in the opcode itself.
2761      SDValue Inc = N->getOperand(2);
2762      bool IsImmUpdate =
2763          isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
2764      if (NumVecs <= 2 && !IsImmUpdate)
2765        Opc = getVLDSTRegisterUpdateOpcode(Opc);
2766      if (!IsImmUpdate)
2767        Ops.push_back(Inc);
2768      // FIXME: VLD3 and VLD4 haven't been updated to that form yet.
2769      else if (NumVecs > 2)
2770        Ops.push_back(Reg0);
2771    }
2772    Ops.push_back(Pred);
2773    Ops.push_back(Reg0);
2774    Ops.push_back(Chain);
2775    VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2776  } else if (NumVecs == 2) {
2777    const SDValue OpsA[] = { MemAddr, Align, Pred, Reg0, Chain };
2778    SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex],
2779                                          dl, ResTys, OpsA);
2780
2781    Chain = SDValue(VLdA, 1);
2782    const SDValue OpsB[] = { MemAddr, Align, Pred, Reg0, Chain };
2783    VLdDup = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, OpsB);
2784  } else {
2785    SDValue ImplDef =
2786      SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
2787    const SDValue OpsA[] = { MemAddr, Align, ImplDef, Pred, Reg0, Chain };
2788    SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex],
2789                                          dl, ResTys, OpsA);
2790
2791    SDValue SuperReg = SDValue(VLdA, 0);
2792    Chain = SDValue(VLdA, 1);
2793    const SDValue OpsB[] = { MemAddr, Align, SuperReg, Pred, Reg0, Chain };
2794    VLdDup = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, OpsB);
2795  }
2796
2797  // Transfer memoperands.
2798  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2799  CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdDup), {MemOp});
2800
2801  // Extract the subregisters.
2802  if (NumVecs == 1) {
2803    ReplaceUses(SDValue(N, 0), SDValue(VLdDup, 0));
2804  } else {
2805    SDValue SuperReg = SDValue(VLdDup, 0);
2806    static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering");
2807    unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
2808    for (unsigned Vec = 0; Vec != NumVecs; ++Vec) {
2809      ReplaceUses(SDValue(N, Vec),
2810                  CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
2811    }
2812  }
2813  ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
2814  if (isUpdating)
2815    ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
2816  CurDAG->RemoveDeadNode(N);
2817}
2818
2819bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) {
2820  if (!Subtarget->hasV6T2Ops())
2821    return false;
2822
2823  unsigned Opc = isSigned
2824    ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
2825    : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);
2826  SDLoc dl(N);
2827
2828  // For unsigned extracts, check for a shift right and mask
2829  unsigned And_imm = 0;
2830  if (N->getOpcode() == ISD::AND) {
2831    if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) {
2832
2833      // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2834      if (And_imm & (And_imm + 1))
2835        return false;
2836
2837      unsigned Srl_imm = 0;
2838      if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL,
2839                                Srl_imm)) {
2840        assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2841
2842        // Mask off the unnecessary bits of the AND immediate; normally
2843        // DAGCombine will do this, but that might not happen if
2844        // targetShrinkDemandedConstant chooses a different immediate.
2845        And_imm &= -1U >> Srl_imm;
2846
2847        // Note: The width operand is encoded as width-1.
2848        unsigned Width = countTrailingOnes(And_imm) - 1;
2849        unsigned LSB = Srl_imm;
2850
2851        SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2852
2853        if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) {
2854          // It's cheaper to use a right shift to extract the top bits.
2855          if (Subtarget->isThumb()) {
2856            Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri;
2857            SDValue Ops[] = { N->getOperand(0).getOperand(0),
2858                              CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2859                              getAL(CurDAG, dl), Reg0, Reg0 };
2860            CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2861            return true;
2862          }
2863
2864          // ARM models shift instructions as MOVsi with shifter operand.
2865          ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL);
2866          SDValue ShOpc =
2867            CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl,
2868                                      MVT::i32);
2869          SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc,
2870                            getAL(CurDAG, dl), Reg0, Reg0 };
2871          CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops);
2872          return true;
2873        }
2874
2875        assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
2876        SDValue Ops[] = { N->getOperand(0).getOperand(0),
2877                          CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2878                          CurDAG->getTargetConstant(Width, dl, MVT::i32),
2879                          getAL(CurDAG, dl), Reg0 };
2880        CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2881        return true;
2882      }
2883    }
2884    return false;
2885  }
2886
2887  // Otherwise, we're looking for a shift of a shift
2888  unsigned Shl_imm = 0;
2889  if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
2890    assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!");
2891    unsigned Srl_imm = 0;
2892    if (isInt32Immediate(N->getOperand(1), Srl_imm)) {
2893      assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2894      // Note: The width operand is encoded as width-1.
2895      unsigned Width = 32 - Srl_imm - 1;
2896      int LSB = Srl_imm - Shl_imm;
2897      if (LSB < 0)
2898        return false;
2899      SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2900      assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
2901      SDValue Ops[] = { N->getOperand(0).getOperand(0),
2902                        CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2903                        CurDAG->getTargetConstant(Width, dl, MVT::i32),
2904                        getAL(CurDAG, dl), Reg0 };
2905      CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2906      return true;
2907    }
2908  }
2909
2910  // Or we are looking for a shift of an and, with a mask operand
2911  if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_imm) &&
2912      isShiftedMask_32(And_imm)) {
2913    unsigned Srl_imm = 0;
2914    unsigned LSB = countTrailingZeros(And_imm);
2915    // Shift must be the same as the ands lsb
2916    if (isInt32Immediate(N->getOperand(1), Srl_imm) && Srl_imm == LSB) {
2917      assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2918      unsigned MSB = 31 - countLeadingZeros(And_imm);
2919      // Note: The width operand is encoded as width-1.
2920      unsigned Width = MSB - LSB;
2921      SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2922      assert(Srl_imm + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
2923      SDValue Ops[] = { N->getOperand(0).getOperand(0),
2924                        CurDAG->getTargetConstant(Srl_imm, dl, MVT::i32),
2925                        CurDAG->getTargetConstant(Width, dl, MVT::i32),
2926                        getAL(CurDAG, dl), Reg0 };
2927      CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2928      return true;
2929    }
2930  }
2931
2932  if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) {
2933    unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2934    unsigned LSB = 0;
2935    if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) &&
2936        !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB))
2937      return false;
2938
2939    if (LSB + Width > 32)
2940      return false;
2941
2942    SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2943    assert(LSB + Width <= 32 && "Shouldn't create an invalid ubfx");
2944    SDValue Ops[] = { N->getOperand(0).getOperand(0),
2945                      CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2946                      CurDAG->getTargetConstant(Width - 1, dl, MVT::i32),
2947                      getAL(CurDAG, dl), Reg0 };
2948    CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2949    return true;
2950  }
2951
2952  return false;
2953}
2954
2955/// Target-specific DAG combining for ISD::XOR.
2956/// Target-independent combining lowers SELECT_CC nodes of the form
2957/// select_cc setg[ge] X,  0,  X, -X
2958/// select_cc setgt    X, -1,  X, -X
2959/// select_cc setl[te] X,  0, -X,  X
2960/// select_cc setlt    X,  1, -X,  X
2961/// which represent Integer ABS into:
2962/// Y = sra (X, size(X)-1); xor (add (X, Y), Y)
2963/// ARM instruction selection detects the latter and matches it to
2964/// ARM::ABS or ARM::t2ABS machine node.
2965bool ARMDAGToDAGISel::tryABSOp(SDNode *N){
2966  SDValue XORSrc0 = N->getOperand(0);
2967  SDValue XORSrc1 = N->getOperand(1);
2968  EVT VT = N->getValueType(0);
2969
2970  if (Subtarget->isThumb1Only())
2971    return false;
2972
2973  if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA)
2974    return false;
2975
2976  SDValue ADDSrc0 = XORSrc0.getOperand(0);
2977  SDValue ADDSrc1 = XORSrc0.getOperand(1);
2978  SDValue SRASrc0 = XORSrc1.getOperand(0);
2979  SDValue SRASrc1 = XORSrc1.getOperand(1);
2980  ConstantSDNode *SRAConstant =  dyn_cast<ConstantSDNode>(SRASrc1);
2981  EVT XType = SRASrc0.getValueType();
2982  unsigned Size = XType.getSizeInBits() - 1;
2983
2984  if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 &&
2985      XType.isInteger() && SRAConstant != nullptr &&
2986      Size == SRAConstant->getZExtValue()) {
2987    unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS;
2988    CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0);
2989    return true;
2990  }
2991
2992  return false;
2993}
2994
2995/// We've got special pseudo-instructions for these
2996void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
2997  unsigned Opcode;
2998  EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
2999  if (MemTy == MVT::i8)
3000    Opcode = ARM::CMP_SWAP_8;
3001  else if (MemTy == MVT::i16)
3002    Opcode = ARM::CMP_SWAP_16;
3003  else if (MemTy == MVT::i32)
3004    Opcode = ARM::CMP_SWAP_32;
3005  else
3006    llvm_unreachable("Unknown AtomicCmpSwap type");
3007
3008  SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
3009                   N->getOperand(0)};
3010  SDNode *CmpSwap = CurDAG->getMachineNode(
3011      Opcode, SDLoc(N),
3012      CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other), Ops);
3013
3014  MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
3015  CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
3016
3017  ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
3018  ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
3019  CurDAG->RemoveDeadNode(N);
3020}
3021
3022static Optional<std::pair<unsigned, unsigned>>
3023getContiguousRangeOfSetBits(const APInt &A) {
3024  unsigned FirstOne = A.getBitWidth() - A.countLeadingZeros() - 1;
3025  unsigned LastOne = A.countTrailingZeros();
3026  if (A.countPopulation() != (FirstOne - LastOne + 1))
3027    return Optional<std::pair<unsigned,unsigned>>();
3028  return std::make_pair(FirstOne, LastOne);
3029}
3030
3031void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) {
3032  assert(N->getOpcode() == ARMISD::CMPZ);
3033  SwitchEQNEToPLMI = false;
3034
3035  if (!Subtarget->isThumb())
3036    // FIXME: Work out whether it is profitable to do this in A32 mode - LSL and
3037    // LSR don't exist as standalone instructions - they need the barrel shifter.
3038    return;
3039
3040  // select (cmpz (and X, C), #0) -> (LSLS X) or (LSRS X) or (LSRS (LSLS X))
3041  SDValue And = N->getOperand(0);
3042  if (!And->hasOneUse())
3043    return;
3044
3045  SDValue Zero = N->getOperand(1);
3046  if (!isa<ConstantSDNode>(Zero) || !cast<ConstantSDNode>(Zero)->isNullValue() ||
3047      And->getOpcode() != ISD::AND)
3048    return;
3049  SDValue X = And.getOperand(0);
3050  auto C = dyn_cast<ConstantSDNode>(And.getOperand(1));
3051
3052  if (!C)
3053    return;
3054  auto Range = getContiguousRangeOfSetBits(C->getAPIntValue());
3055  if (!Range)
3056    return;
3057
3058  // There are several ways to lower this:
3059  SDNode *NewN;
3060  SDLoc dl(N);
3061
3062  auto EmitShift = [&](unsigned Opc, SDValue Src, unsigned Imm) -> SDNode* {
3063    if (Subtarget->isThumb2()) {
3064      Opc = (Opc == ARM::tLSLri) ? ARM::t2LSLri : ARM::t2LSRri;
3065      SDValue Ops[] = { Src, CurDAG->getTargetConstant(Imm, dl, MVT::i32),
3066                        getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
3067                        CurDAG->getRegister(0, MVT::i32) };
3068      return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
3069    } else {
3070      SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), Src,
3071                       CurDAG->getTargetConstant(Imm, dl, MVT::i32),
3072                       getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
3073      return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
3074    }
3075  };
3076
3077  if (Range->second == 0) {
3078    //  1. Mask includes the LSB -> Simply shift the top N bits off
3079    NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
3080    ReplaceNode(And.getNode(), NewN);
3081  } else if (Range->first == 31) {
3082    //  2. Mask includes the MSB -> Simply shift the bottom N bits off
3083    NewN = EmitShift(ARM::tLSRri, X, Range->second);
3084    ReplaceNode(And.getNode(), NewN);
3085  } else if (Range->first == Range->second) {
3086    //  3. Only one bit is set. We can shift this into the sign bit and use a
3087    //     PL/MI comparison.
3088    NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
3089    ReplaceNode(And.getNode(), NewN);
3090
3091    SwitchEQNEToPLMI = true;
3092  } else if (!Subtarget->hasV6T2Ops()) {
3093    //  4. Do a double shift to clear bottom and top bits, but only in
3094    //     thumb-1 mode as in thumb-2 we can use UBFX.
3095    NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
3096    NewN = EmitShift(ARM::tLSRri, SDValue(NewN, 0),
3097                     Range->second + (31 - Range->first));
3098    ReplaceNode(And.getNode(), NewN);
3099  }
3100
3101}
3102
3103void ARMDAGToDAGISel::Select(SDNode *N) {
3104  SDLoc dl(N);
3105
3106  if (N->isMachineOpcode()) {
3107    N->setNodeId(-1);
3108    return;   // Already selected.
3109  }
3110
3111  switch (N->getOpcode()) {
3112  default: break;
3113  case ISD::STORE: {
3114    // For Thumb1, match an sp-relative store in C++. This is a little
3115    // unfortunate, but I don't think I can make the chain check work
3116    // otherwise.  (The chain of the store has to be the same as the chain
3117    // of the CopyFromReg, or else we can't replace the CopyFromReg with
3118    // a direct reference to "SP".)
3119    //
3120    // This is only necessary on Thumb1 because Thumb1 sp-relative stores use
3121    // a different addressing mode from other four-byte stores.
3122    //
3123    // This pattern usually comes up with call arguments.
3124    StoreSDNode *ST = cast<StoreSDNode>(N);
3125    SDValue Ptr = ST->getBasePtr();
3126    if (Subtarget->isThumb1Only() && ST->isUnindexed()) {
3127      int RHSC = 0;
3128      if (Ptr.getOpcode() == ISD::ADD &&
3129          isScaledConstantInRange(Ptr.getOperand(1), /*Scale=*/4, 0, 256, RHSC))
3130        Ptr = Ptr.getOperand(0);
3131
3132      if (Ptr.getOpcode() == ISD::CopyFromReg &&
3133          cast<RegisterSDNode>(Ptr.getOperand(1))->getReg() == ARM::SP &&
3134          Ptr.getOperand(0) == ST->getChain()) {
3135        SDValue Ops[] = {ST->getValue(),
3136                         CurDAG->getRegister(ARM::SP, MVT::i32),
3137                         CurDAG->getTargetConstant(RHSC, dl, MVT::i32),
3138                         getAL(CurDAG, dl),
3139                         CurDAG->getRegister(0, MVT::i32),
3140                         ST->getChain()};
3141        MachineSDNode *ResNode =
3142            CurDAG->getMachineNode(ARM::tSTRspi, dl, MVT::Other, Ops);
3143        MachineMemOperand *MemOp = ST->getMemOperand();
3144        CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});
3145        ReplaceNode(N, ResNode);
3146        return;
3147      }
3148    }
3149    break;
3150  }
3151  case ISD::WRITE_REGISTER:
3152    if (tryWriteRegister(N))
3153      return;
3154    break;
3155  case ISD::READ_REGISTER:
3156    if (tryReadRegister(N))
3157      return;
3158    break;
3159  case ISD::INLINEASM:
3160  case ISD::INLINEASM_BR:
3161    if (tryInlineAsm(N))
3162      return;
3163    break;
3164  case ISD::XOR:
3165    // Select special operations if XOR node forms integer ABS pattern
3166    if (tryABSOp(N))
3167      return;
3168    // Other cases are autogenerated.
3169    break;
3170  case ISD::Constant: {
3171    unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
3172    // If we can't materialize the constant we need to use a literal pool
3173    if (ConstantMaterializationCost(Val, Subtarget) > 2) {
3174      SDValue CPIdx = CurDAG->getTargetConstantPool(
3175          ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val),
3176          TLI->getPointerTy(CurDAG->getDataLayout()));
3177
3178      SDNode *ResNode;
3179      if (Subtarget->isThumb()) {
3180        SDValue Ops[] = {
3181          CPIdx,
3182          getAL(CurDAG, dl),
3183          CurDAG->getRegister(0, MVT::i32),
3184          CurDAG->getEntryNode()
3185        };
3186        ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other,
3187                                         Ops);
3188      } else {
3189        SDValue Ops[] = {
3190          CPIdx,
3191          CurDAG->getTargetConstant(0, dl, MVT::i32),
3192          getAL(CurDAG, dl),
3193          CurDAG->getRegister(0, MVT::i32),
3194          CurDAG->getEntryNode()
3195        };
3196        ResNode = CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
3197                                         Ops);
3198      }
3199      // Annotate the Node with memory operand information so that MachineInstr
3200      // queries work properly. This e.g. gives the register allocation the
3201      // required information for rematerialization.
3202      MachineFunction& MF = CurDAG->getMachineFunction();
3203      MachineMemOperand *MemOp =
3204          MF.getMachineMemOperand(MachinePointerInfo::getConstantPool(MF),
3205                                  MachineMemOperand::MOLoad, 4, 4);
3206
3207      CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});
3208
3209      ReplaceNode(N, ResNode);
3210      return;
3211    }
3212
3213    // Other cases are autogenerated.
3214    break;
3215  }
3216  case ISD::FrameIndex: {
3217    // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
3218    int FI = cast<FrameIndexSDNode>(N)->getIndex();
3219    SDValue TFI = CurDAG->getTargetFrameIndex(
3220        FI, TLI->getPointerTy(CurDAG->getDataLayout()));
3221    if (Subtarget->isThumb1Only()) {
3222      // Set the alignment of the frame object to 4, to avoid having to generate
3223      // more than one ADD
3224      MachineFrameInfo &MFI = MF->getFrameInfo();
3225      if (MFI.getObjectAlignment(FI) < 4)
3226        MFI.setObjectAlignment(FI, 4);
3227      CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI,
3228                           CurDAG->getTargetConstant(0, dl, MVT::i32));
3229      return;
3230    } else {
3231      unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
3232                      ARM::t2ADDri : ARM::ADDri);
3233      SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32),
3234                        getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
3235                        CurDAG->getRegister(0, MVT::i32) };
3236      CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3237      return;
3238    }
3239  }
3240  case ISD::SRL:
3241    if (tryV6T2BitfieldExtractOp(N, false))
3242      return;
3243    break;
3244  case ISD::SIGN_EXTEND_INREG:
3245  case ISD::SRA:
3246    if (tryV6T2BitfieldExtractOp(N, true))
3247      return;
3248    break;
3249  case ISD::MUL:
3250    if (Subtarget->isThumb1Only())
3251      break;
3252    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
3253      unsigned RHSV = C->getZExtValue();
3254      if (!RHSV) break;
3255      if (isPowerOf2_32(RHSV-1)) {  // 2^n+1?
3256        unsigned ShImm = Log2_32(RHSV-1);
3257        if (ShImm >= 32)
3258          break;
3259        SDValue V = N->getOperand(0);
3260        ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
3261        SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
3262        SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3263        if (Subtarget->isThumb()) {
3264          SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
3265          CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops);
3266          return;
3267        } else {
3268          SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
3269                            Reg0 };
3270          CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops);
3271          return;
3272        }
3273      }
3274      if (isPowerOf2_32(RHSV+1)) {  // 2^n-1?
3275        unsigned ShImm = Log2_32(RHSV+1);
3276        if (ShImm >= 32)
3277          break;
3278        SDValue V = N->getOperand(0);
3279        ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
3280        SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
3281        SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3282        if (Subtarget->isThumb()) {
3283          SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
3284          CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops);
3285          return;
3286        } else {
3287          SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
3288                            Reg0 };
3289          CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops);
3290          return;
3291        }
3292      }
3293    }
3294    break;
3295  case ISD::AND: {
3296    // Check for unsigned bitfield extract
3297    if (tryV6T2BitfieldExtractOp(N, false))
3298      return;
3299
3300    // If an immediate is used in an AND node, it is possible that the immediate
3301    // can be more optimally materialized when negated. If this is the case we
3302    // can negate the immediate and use a BIC instead.
3303    auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
3304    if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) {
3305      uint32_t Imm = (uint32_t) N1C->getZExtValue();
3306
3307      // In Thumb2 mode, an AND can take a 12-bit immediate. If this
3308      // immediate can be negated and fit in the immediate operand of
3309      // a t2BIC, don't do any manual transform here as this can be
3310      // handled by the generic ISel machinery.
3311      bool PreferImmediateEncoding =
3312        Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm));
3313      if (!PreferImmediateEncoding &&
3314          ConstantMaterializationCost(Imm, Subtarget) >
3315              ConstantMaterializationCost(~Imm, Subtarget)) {
3316        // The current immediate costs more to materialize than a negated
3317        // immediate, so negate the immediate and use a BIC.
3318        SDValue NewImm =
3319          CurDAG->getConstant(~N1C->getZExtValue(), dl, MVT::i32);
3320        // If the new constant didn't exist before, reposition it in the topological
3321        // ordering so it is just before N. Otherwise, don't touch its location.
3322        if (NewImm->getNodeId() == -1)
3323          CurDAG->RepositionNode(N->getIterator(), NewImm.getNode());
3324
3325        if (!Subtarget->hasThumb2()) {
3326          SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32),
3327                           N->getOperand(0), NewImm, getAL(CurDAG, dl),
3328                           CurDAG->getRegister(0, MVT::i32)};
3329          ReplaceNode(N, CurDAG->getMachineNode(ARM::tBIC, dl, MVT::i32, Ops));
3330          return;
3331        } else {
3332          SDValue Ops[] = {N->getOperand(0), NewImm, getAL(CurDAG, dl),
3333                           CurDAG->getRegister(0, MVT::i32),
3334                           CurDAG->getRegister(0, MVT::i32)};
3335          ReplaceNode(N,
3336                      CurDAG->getMachineNode(ARM::t2BICrr, dl, MVT::i32, Ops));
3337          return;
3338        }
3339      }
3340    }
3341
3342    // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
3343    // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
3344    // are entirely contributed by c2 and lower 16-bits are entirely contributed
3345    // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).
3346    // Select it to: "movt x, ((c1 & 0xffff) >> 16)
3347    EVT VT = N->getValueType(0);
3348    if (VT != MVT::i32)
3349      break;
3350    unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2())
3351      ? ARM::t2MOVTi16
3352      : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0);
3353    if (!Opc)
3354      break;
3355    SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
3356    N1C = dyn_cast<ConstantSDNode>(N1);
3357    if (!N1C)
3358      break;
3359    if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) {
3360      SDValue N2 = N0.getOperand(1);
3361      ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
3362      if (!N2C)
3363        break;
3364      unsigned N1CVal = N1C->getZExtValue();
3365      unsigned N2CVal = N2C->getZExtValue();
3366      if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) &&
3367          (N1CVal & 0xffffU) == 0xffffU &&
3368          (N2CVal & 0xffffU) == 0x0U) {
3369        SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16,
3370                                                  dl, MVT::i32);
3371        SDValue Ops[] = { N0.getOperand(0), Imm16,
3372                          getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
3373        ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
3374        return;
3375      }
3376    }
3377
3378    break;
3379  }
3380  case ARMISD::UMAAL: {
3381    unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL;
3382    SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
3383                      N->getOperand(2), N->getOperand(3),
3384                      getAL(CurDAG, dl),
3385                      CurDAG->getRegister(0, MVT::i32) };
3386    ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::i32, Ops));
3387    return;
3388  }
3389  case ARMISD::UMLAL:{
3390    if (Subtarget->isThumb()) {
3391      SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3392                        N->getOperand(3), getAL(CurDAG, dl),
3393                        CurDAG->getRegister(0, MVT::i32)};
3394      ReplaceNode(
3395          N, CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops));
3396      return;
3397    }else{
3398      SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3399                        N->getOperand(3), getAL(CurDAG, dl),
3400                        CurDAG->getRegister(0, MVT::i32),
3401                        CurDAG->getRegister(0, MVT::i32) };
3402      ReplaceNode(N, CurDAG->getMachineNode(
3403                         Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl,
3404                         MVT::i32, MVT::i32, Ops));
3405      return;
3406    }
3407  }
3408  case ARMISD::SMLAL:{
3409    if (Subtarget->isThumb()) {
3410      SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3411                        N->getOperand(3), getAL(CurDAG, dl),
3412                        CurDAG->getRegister(0, MVT::i32)};
3413      ReplaceNode(
3414          N, CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops));
3415      return;
3416    }else{
3417      SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3418                        N->getOperand(3), getAL(CurDAG, dl),
3419                        CurDAG->getRegister(0, MVT::i32),
3420                        CurDAG->getRegister(0, MVT::i32) };
3421      ReplaceNode(N, CurDAG->getMachineNode(
3422                         Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl,
3423                         MVT::i32, MVT::i32, Ops));
3424      return;
3425    }
3426  }
3427  case ARMISD::SUBE: {
3428    if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
3429      break;
3430    // Look for a pattern to match SMMLS
3431    // (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b))))
3432    if (N->getOperand(1).getOpcode() != ISD::SMUL_LOHI ||
3433        N->getOperand(2).getOpcode() != ARMISD::SUBC ||
3434        !SDValue(N, 1).use_empty())
3435      break;
3436
3437    if (Subtarget->isThumb())
3438      assert(Subtarget->hasThumb2() &&
3439             "This pattern should not be generated for Thumb");
3440
3441    SDValue SmulLoHi = N->getOperand(1);
3442    SDValue Subc = N->getOperand(2);
3443    auto *Zero = dyn_cast<ConstantSDNode>(Subc.getOperand(0));
3444
3445    if (!Zero || Zero->getZExtValue() != 0 ||
3446        Subc.getOperand(1) != SmulLoHi.getValue(0) ||
3447        N->getOperand(1) != SmulLoHi.getValue(1) ||
3448        N->getOperand(2) != Subc.getValue(1))
3449      break;
3450
3451    unsigned Opc = Subtarget->isThumb2() ? ARM::t2SMMLS : ARM::SMMLS;
3452    SDValue Ops[] = { SmulLoHi.getOperand(0), SmulLoHi.getOperand(1),
3453                      N->getOperand(0), getAL(CurDAG, dl),
3454                      CurDAG->getRegister(0, MVT::i32) };
3455    ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops));
3456    return;
3457  }
3458  case ISD::LOAD: {
3459    if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N))
3460      return;
3461    if (Subtarget->isThumb() && Subtarget->hasThumb2()) {
3462      if (tryT2IndexedLoad(N))
3463        return;
3464    } else if (Subtarget->isThumb()) {
3465      if (tryT1IndexedLoad(N))
3466        return;
3467    } else if (tryARMIndexedLoad(N))
3468      return;
3469    // Other cases are autogenerated.
3470    break;
3471  }
3472  case ISD::MLOAD:
3473    if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N))
3474      return;
3475    // Other cases are autogenerated.
3476    break;
3477  case ARMISD::WLS:
3478  case ARMISD::LE: {
3479    SDValue Ops[] = { N->getOperand(1),
3480                      N->getOperand(2),
3481                      N->getOperand(0) };
3482    unsigned Opc = N->getOpcode() == ARMISD::WLS ?
3483      ARM::t2WhileLoopStart : ARM::t2LoopEnd;
3484    SDNode *New = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
3485    ReplaceUses(N, New);
3486    CurDAG->RemoveDeadNode(N);
3487    return;
3488  }
3489  case ARMISD::LOOP_DEC: {
3490    SDValue Ops[] = { N->getOperand(1),
3491                      N->getOperand(2),
3492                      N->getOperand(0) };
3493    SDNode *Dec =
3494      CurDAG->getMachineNode(ARM::t2LoopDec, dl,
3495                             CurDAG->getVTList(MVT::i32, MVT::Other), Ops);
3496    ReplaceUses(N, Dec);
3497    CurDAG->RemoveDeadNode(N);
3498    return;
3499  }
3500  case ARMISD::BRCOND: {
3501    // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
3502    // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
3503    // Pattern complexity = 6  cost = 1  size = 0
3504
3505    // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
3506    // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
3507    // Pattern complexity = 6  cost = 1  size = 0
3508
3509    // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
3510    // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
3511    // Pattern complexity = 6  cost = 1  size = 0
3512
3513    unsigned Opc = Subtarget->isThumb() ?
3514      ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
3515    SDValue Chain = N->getOperand(0);
3516    SDValue N1 = N->getOperand(1);
3517    SDValue N2 = N->getOperand(2);
3518    SDValue N3 = N->getOperand(3);
3519    SDValue InFlag = N->getOperand(4);
3520    assert(N1.getOpcode() == ISD::BasicBlock);
3521    assert(N2.getOpcode() == ISD::Constant);
3522    assert(N3.getOpcode() == ISD::Register);
3523
3524    unsigned CC = (unsigned) cast<ConstantSDNode>(N2)->getZExtValue();
3525
3526    if (InFlag.getOpcode() == ARMISD::CMPZ) {
3527      if (InFlag.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN) {
3528        SDValue Int = InFlag.getOperand(0);
3529        uint64_t ID = cast<ConstantSDNode>(Int->getOperand(1))->getZExtValue();
3530
3531        // Handle low-overhead loops.
3532        if (ID == Intrinsic::loop_decrement_reg) {
3533          SDValue Elements = Int.getOperand(2);
3534          SDValue Size = CurDAG->getTargetConstant(
3535            cast<ConstantSDNode>(Int.getOperand(3))->getZExtValue(), dl,
3536                                 MVT::i32);
3537
3538          SDValue Args[] = { Elements, Size, Int.getOperand(0) };
3539          SDNode *LoopDec =
3540            CurDAG->getMachineNode(ARM::t2LoopDec, dl,
3541                                   CurDAG->getVTList(MVT::i32, MVT::Other),
3542                                   Args);
3543          ReplaceUses(Int.getNode(), LoopDec);
3544
3545          SDValue EndArgs[] = { SDValue(LoopDec, 0), N1, Chain };
3546          SDNode *LoopEnd =
3547            CurDAG->getMachineNode(ARM::t2LoopEnd, dl, MVT::Other, EndArgs);
3548
3549          ReplaceUses(N, LoopEnd);
3550          CurDAG->RemoveDeadNode(N);
3551          CurDAG->RemoveDeadNode(InFlag.getNode());
3552          CurDAG->RemoveDeadNode(Int.getNode());
3553          return;
3554        }
3555      }
3556
3557      bool SwitchEQNEToPLMI;
3558      SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
3559      InFlag = N->getOperand(4);
3560
3561      if (SwitchEQNEToPLMI) {
3562        switch ((ARMCC::CondCodes)CC) {
3563        default: llvm_unreachable("CMPZ must be either NE or EQ!");
3564        case ARMCC::NE:
3565          CC = (unsigned)ARMCC::MI;
3566          break;
3567        case ARMCC::EQ:
3568          CC = (unsigned)ARMCC::PL;
3569          break;
3570        }
3571      }
3572    }
3573
3574    SDValue Tmp2 = CurDAG->getTargetConstant(CC, dl, MVT::i32);
3575    SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag };
3576    SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
3577                                             MVT::Glue, Ops);
3578    Chain = SDValue(ResNode, 0);
3579    if (N->getNumValues() == 2) {
3580      InFlag = SDValue(ResNode, 1);
3581      ReplaceUses(SDValue(N, 1), InFlag);
3582    }
3583    ReplaceUses(SDValue(N, 0),
3584                SDValue(Chain.getNode(), Chain.getResNo()));
3585    CurDAG->RemoveDeadNode(N);
3586    return;
3587  }
3588
3589  case ARMISD::CMPZ: {
3590    // select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0)
3591    //   This allows us to avoid materializing the expensive negative constant.
3592    //   The CMPZ #0 is useless and will be peepholed away but we need to keep it
3593    //   for its glue output.
3594    SDValue X = N->getOperand(0);
3595    auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1).getNode());
3596    if (C && C->getSExtValue() < 0 && Subtarget->isThumb()) {
3597      int64_t Addend = -C->getSExtValue();
3598
3599      SDNode *Add = nullptr;
3600      // ADDS can be better than CMN if the immediate fits in a
3601      // 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3.
3602      // Outside that range we can just use a CMN which is 32-bit but has a
3603      // 12-bit immediate range.
3604      if (Addend < 1<<8) {
3605        if (Subtarget->isThumb2()) {
3606          SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32),
3607                            getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
3608                            CurDAG->getRegister(0, MVT::i32) };
3609          Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops);
3610        } else {
3611          unsigned Opc = (Addend < 1<<3) ? ARM::tADDi3 : ARM::tADDi8;
3612          SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X,
3613                           CurDAG->getTargetConstant(Addend, dl, MVT::i32),
3614                           getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
3615          Add = CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
3616        }
3617      }
3618      if (Add) {
3619        SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(0, dl, MVT::i32)};
3620        CurDAG->MorphNodeTo(N, ARMISD::CMPZ, CurDAG->getVTList(MVT::Glue), Ops2);
3621      }
3622    }
3623    // Other cases are autogenerated.
3624    break;
3625  }
3626
3627  case ARMISD::CMOV: {
3628    SDValue InFlag = N->getOperand(4);
3629
3630    if (InFlag.getOpcode() == ARMISD::CMPZ) {
3631      bool SwitchEQNEToPLMI;
3632      SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
3633
3634      if (SwitchEQNEToPLMI) {
3635        SDValue ARMcc = N->getOperand(2);
3636        ARMCC::CondCodes CC =
3637          (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();
3638
3639        switch (CC) {
3640        default: llvm_unreachable("CMPZ must be either NE or EQ!");
3641        case ARMCC::NE:
3642          CC = ARMCC::MI;
3643          break;
3644        case ARMCC::EQ:
3645          CC = ARMCC::PL;
3646          break;
3647        }
3648        SDValue NewARMcc = CurDAG->getConstant((unsigned)CC, dl, MVT::i32);
3649        SDValue Ops[] = {N->getOperand(0), N->getOperand(1), NewARMcc,
3650                         N->getOperand(3), N->getOperand(4)};
3651        CurDAG->MorphNodeTo(N, ARMISD::CMOV, N->getVTList(), Ops);
3652      }
3653
3654    }
3655    // Other cases are autogenerated.
3656    break;
3657  }
3658
3659  case ARMISD::VZIP: {
3660    unsigned Opc = 0;
3661    EVT VT = N->getValueType(0);
3662    switch (VT.getSimpleVT().SimpleTy) {
3663    default: return;
3664    case MVT::v8i8:  Opc = ARM::VZIPd8; break;
3665    case MVT::v4f16:
3666    case MVT::v4i16: Opc = ARM::VZIPd16; break;
3667    case MVT::v2f32:
3668    // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
3669    case MVT::v2i32: Opc = ARM::VTRNd32; break;
3670    case MVT::v16i8: Opc = ARM::VZIPq8; break;
3671    case MVT::v8f16:
3672    case MVT::v8i16: Opc = ARM::VZIPq16; break;
3673    case MVT::v4f32:
3674    case MVT::v4i32: Opc = ARM::VZIPq32; break;
3675    }
3676    SDValue Pred = getAL(CurDAG, dl);
3677    SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3678    SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3679    ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3680    return;
3681  }
3682  case ARMISD::VUZP: {
3683    unsigned Opc = 0;
3684    EVT VT = N->getValueType(0);
3685    switch (VT.getSimpleVT().SimpleTy) {
3686    default: return;
3687    case MVT::v8i8:  Opc = ARM::VUZPd8; break;
3688    case MVT::v4f16:
3689    case MVT::v4i16: Opc = ARM::VUZPd16; break;
3690    case MVT::v2f32:
3691    // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
3692    case MVT::v2i32: Opc = ARM::VTRNd32; break;
3693    case MVT::v16i8: Opc = ARM::VUZPq8; break;
3694    case MVT::v8f16:
3695    case MVT::v8i16: Opc = ARM::VUZPq16; break;
3696    case MVT::v4f32:
3697    case MVT::v4i32: Opc = ARM::VUZPq32; break;
3698    }
3699    SDValue Pred = getAL(CurDAG, dl);
3700    SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3701    SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3702    ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3703    return;
3704  }
3705  case ARMISD::VTRN: {
3706    unsigned Opc = 0;
3707    EVT VT = N->getValueType(0);
3708    switch (VT.getSimpleVT().SimpleTy) {
3709    default: return;
3710    case MVT::v8i8:  Opc = ARM::VTRNd8; break;
3711    case MVT::v4f16:
3712    case MVT::v4i16: Opc = ARM::VTRNd16; break;
3713    case MVT::v2f32:
3714    case MVT::v2i32: Opc = ARM::VTRNd32; break;
3715    case MVT::v16i8: Opc = ARM::VTRNq8; break;
3716    case MVT::v8f16:
3717    case MVT::v8i16: Opc = ARM::VTRNq16; break;
3718    case MVT::v4f32:
3719    case MVT::v4i32: Opc = ARM::VTRNq32; break;
3720    }
3721    SDValue Pred = getAL(CurDAG, dl);
3722    SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3723    SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3724    ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3725    return;
3726  }
3727  case ARMISD::BUILD_VECTOR: {
3728    EVT VecVT = N->getValueType(0);
3729    EVT EltVT = VecVT.getVectorElementType();
3730    unsigned NumElts = VecVT.getVectorNumElements();
3731    if (EltVT == MVT::f64) {
3732      assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
3733      ReplaceNode(
3734          N, createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
3735      return;
3736    }
3737    assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
3738    if (NumElts == 2) {
3739      ReplaceNode(
3740          N, createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
3741      return;
3742    }
3743    assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
3744    ReplaceNode(N,
3745                createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1),
3746                                    N->getOperand(2), N->getOperand(3)));
3747    return;
3748  }
3749
3750  case ARMISD::VLD1DUP: {
3751    static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8, ARM::VLD1DUPd16,
3752                                         ARM::VLD1DUPd32 };
3753    static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8, ARM::VLD1DUPq16,
3754                                         ARM::VLD1DUPq32 };
3755    SelectVLDDup(N, /* IsIntrinsic= */ false, false, 1, DOpcodes, QOpcodes);
3756    return;
3757  }
3758
3759  case ARMISD::VLD2DUP: {
3760    static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
3761                                        ARM::VLD2DUPd32 };
3762    SelectVLDDup(N, /* IsIntrinsic= */ false, false, 2, Opcodes);
3763    return;
3764  }
3765
3766  case ARMISD::VLD3DUP: {
3767    static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo,
3768                                        ARM::VLD3DUPd16Pseudo,
3769                                        ARM::VLD3DUPd32Pseudo };
3770    SelectVLDDup(N, /* IsIntrinsic= */ false, false, 3, Opcodes);
3771    return;
3772  }
3773
3774  case ARMISD::VLD4DUP: {
3775    static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo,
3776                                        ARM::VLD4DUPd16Pseudo,
3777                                        ARM::VLD4DUPd32Pseudo };
3778    SelectVLDDup(N, /* IsIntrinsic= */ false, false, 4, Opcodes);
3779    return;
3780  }
3781
3782  case ARMISD::VLD1DUP_UPD: {
3783    static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8wb_fixed,
3784                                         ARM::VLD1DUPd16wb_fixed,
3785                                         ARM::VLD1DUPd32wb_fixed };
3786    static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8wb_fixed,
3787                                         ARM::VLD1DUPq16wb_fixed,
3788                                         ARM::VLD1DUPq32wb_fixed };
3789    SelectVLDDup(N, /* IsIntrinsic= */ false, true, 1, DOpcodes, QOpcodes);
3790    return;
3791  }
3792
3793  case ARMISD::VLD2DUP_UPD: {
3794    static const uint16_t Opcodes[] = { ARM::VLD2DUPd8wb_fixed,
3795                                        ARM::VLD2DUPd16wb_fixed,
3796                                        ARM::VLD2DUPd32wb_fixed };
3797    SelectVLDDup(N, /* IsIntrinsic= */ false, true, 2, Opcodes);
3798    return;
3799  }
3800
3801  case ARMISD::VLD3DUP_UPD: {
3802    static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo_UPD,
3803                                        ARM::VLD3DUPd16Pseudo_UPD,
3804                                        ARM::VLD3DUPd32Pseudo_UPD };
3805    SelectVLDDup(N, /* IsIntrinsic= */ false, true, 3, Opcodes);
3806    return;
3807  }
3808
3809  case ARMISD::VLD4DUP_UPD: {
3810    static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo_UPD,
3811                                        ARM::VLD4DUPd16Pseudo_UPD,
3812                                        ARM::VLD4DUPd32Pseudo_UPD };
3813    SelectVLDDup(N, /* IsIntrinsic= */ false, true, 4, Opcodes);
3814    return;
3815  }
3816
3817  case ARMISD::VLD1_UPD: {
3818    static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed,
3819                                         ARM::VLD1d16wb_fixed,
3820                                         ARM::VLD1d32wb_fixed,
3821                                         ARM::VLD1d64wb_fixed };
3822    static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed,
3823                                         ARM::VLD1q16wb_fixed,
3824                                         ARM::VLD1q32wb_fixed,
3825                                         ARM::VLD1q64wb_fixed };
3826    SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr);
3827    return;
3828  }
3829
3830  case ARMISD::VLD2_UPD: {
3831    static const uint16_t DOpcodes[] = { ARM::VLD2d8wb_fixed,
3832                                         ARM::VLD2d16wb_fixed,
3833                                         ARM::VLD2d32wb_fixed,
3834                                         ARM::VLD1q64wb_fixed};
3835    static const uint16_t QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed,
3836                                         ARM::VLD2q16PseudoWB_fixed,
3837                                         ARM::VLD2q32PseudoWB_fixed };
3838    SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
3839    return;
3840  }
3841
3842  case ARMISD::VLD3_UPD: {
3843    static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD,
3844                                         ARM::VLD3d16Pseudo_UPD,
3845                                         ARM::VLD3d32Pseudo_UPD,
3846                                         ARM::VLD1d64TPseudoWB_fixed};
3847    static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
3848                                          ARM::VLD3q16Pseudo_UPD,
3849                                          ARM::VLD3q32Pseudo_UPD };
3850    static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
3851                                          ARM::VLD3q16oddPseudo_UPD,
3852                                          ARM::VLD3q32oddPseudo_UPD };
3853    SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
3854    return;
3855  }
3856
3857  case ARMISD::VLD4_UPD: {
3858    static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo_UPD,
3859                                         ARM::VLD4d16Pseudo_UPD,
3860                                         ARM::VLD4d32Pseudo_UPD,
3861                                         ARM::VLD1d64QPseudoWB_fixed};
3862    static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
3863                                          ARM::VLD4q16Pseudo_UPD,
3864                                          ARM::VLD4q32Pseudo_UPD };
3865    static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD,
3866                                          ARM::VLD4q16oddPseudo_UPD,
3867                                          ARM::VLD4q32oddPseudo_UPD };
3868    SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
3869    return;
3870  }
3871
3872  case ARMISD::VLD2LN_UPD: {
3873    static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD,
3874                                         ARM::VLD2LNd16Pseudo_UPD,
3875                                         ARM::VLD2LNd32Pseudo_UPD };
3876    static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
3877                                         ARM::VLD2LNq32Pseudo_UPD };
3878    SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes);
3879    return;
3880  }
3881
3882  case ARMISD::VLD3LN_UPD: {
3883    static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD,
3884                                         ARM::VLD3LNd16Pseudo_UPD,
3885                                         ARM::VLD3LNd32Pseudo_UPD };
3886    static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
3887                                         ARM::VLD3LNq32Pseudo_UPD };
3888    SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes);
3889    return;
3890  }
3891
3892  case ARMISD::VLD4LN_UPD: {
3893    static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD,
3894                                         ARM::VLD4LNd16Pseudo_UPD,
3895                                         ARM::VLD4LNd32Pseudo_UPD };
3896    static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
3897                                         ARM::VLD4LNq32Pseudo_UPD };
3898    SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes);
3899    return;
3900  }
3901
3902  case ARMISD::VST1_UPD: {
3903    static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed,
3904                                         ARM::VST1d16wb_fixed,
3905                                         ARM::VST1d32wb_fixed,
3906                                         ARM::VST1d64wb_fixed };
3907    static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed,
3908                                         ARM::VST1q16wb_fixed,
3909                                         ARM::VST1q32wb_fixed,
3910                                         ARM::VST1q64wb_fixed };
3911    SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr);
3912    return;
3913  }
3914
3915  case ARMISD::VST2_UPD: {
3916    static const uint16_t DOpcodes[] = { ARM::VST2d8wb_fixed,
3917                                         ARM::VST2d16wb_fixed,
3918                                         ARM::VST2d32wb_fixed,
3919                                         ARM::VST1q64wb_fixed};
3920    static const uint16_t QOpcodes[] = { ARM::VST2q8PseudoWB_fixed,
3921                                         ARM::VST2q16PseudoWB_fixed,
3922                                         ARM::VST2q32PseudoWB_fixed };
3923    SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
3924    return;
3925  }
3926
3927  case ARMISD::VST3_UPD: {
3928    static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD,
3929                                         ARM::VST3d16Pseudo_UPD,
3930                                         ARM::VST3d32Pseudo_UPD,
3931                                         ARM::VST1d64TPseudoWB_fixed};
3932    static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
3933                                          ARM::VST3q16Pseudo_UPD,
3934                                          ARM::VST3q32Pseudo_UPD };
3935    static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
3936                                          ARM::VST3q16oddPseudo_UPD,
3937                                          ARM::VST3q32oddPseudo_UPD };
3938    SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
3939    return;
3940  }
3941
3942  case ARMISD::VST4_UPD: {
3943    static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo_UPD,
3944                                         ARM::VST4d16Pseudo_UPD,
3945                                         ARM::VST4d32Pseudo_UPD,
3946                                         ARM::VST1d64QPseudoWB_fixed};
3947    static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
3948                                          ARM::VST4q16Pseudo_UPD,
3949                                          ARM::VST4q32Pseudo_UPD };
3950    static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD,
3951                                          ARM::VST4q16oddPseudo_UPD,
3952                                          ARM::VST4q32oddPseudo_UPD };
3953    SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
3954    return;
3955  }
3956
3957  case ARMISD::VST2LN_UPD: {
3958    static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD,
3959                                         ARM::VST2LNd16Pseudo_UPD,
3960                                         ARM::VST2LNd32Pseudo_UPD };
3961    static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
3962                                         ARM::VST2LNq32Pseudo_UPD };
3963    SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes);
3964    return;
3965  }
3966
3967  case ARMISD::VST3LN_UPD: {
3968    static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD,
3969                                         ARM::VST3LNd16Pseudo_UPD,
3970                                         ARM::VST3LNd32Pseudo_UPD };
3971    static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
3972                                         ARM::VST3LNq32Pseudo_UPD };
3973    SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes);
3974    return;
3975  }
3976
3977  case ARMISD::VST4LN_UPD: {
3978    static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD,
3979                                         ARM::VST4LNd16Pseudo_UPD,
3980                                         ARM::VST4LNd32Pseudo_UPD };
3981    static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
3982                                         ARM::VST4LNq32Pseudo_UPD };
3983    SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes);
3984    return;
3985  }
3986
3987  case ISD::INTRINSIC_VOID:
3988  case ISD::INTRINSIC_W_CHAIN: {
3989    unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
3990    switch (IntNo) {
3991    default:
3992      break;
3993
3994    case Intrinsic::arm_mrrc:
3995    case Intrinsic::arm_mrrc2: {
3996      SDLoc dl(N);
3997      SDValue Chain = N->getOperand(0);
3998      unsigned Opc;
3999
4000      if (Subtarget->isThumb())
4001        Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::t2MRRC : ARM::t2MRRC2);
4002      else
4003        Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2);
4004
4005      SmallVector<SDValue, 5> Ops;
4006      Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(), dl)); /* coproc */
4007      Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(), dl)); /* opc */
4008      Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(4))->getZExtValue(), dl)); /* CRm */
4009
4010      // The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded
4011      // instruction will always be '1111' but it is possible in assembly language to specify
4012      // AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction.
4013      if (Opc != ARM::MRRC2) {
4014        Ops.push_back(getAL(CurDAG, dl));
4015        Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4016      }
4017
4018      Ops.push_back(Chain);
4019
4020      // Writes to two registers.
4021      const EVT RetType[] = {MVT::i32, MVT::i32, MVT::Other};
4022
4023      ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, RetType, Ops));
4024      return;
4025    }
4026    case Intrinsic::arm_ldaexd:
4027    case Intrinsic::arm_ldrexd: {
4028      SDLoc dl(N);
4029      SDValue Chain = N->getOperand(0);
4030      SDValue MemAddr = N->getOperand(2);
4031      bool isThumb = Subtarget->isThumb() && Subtarget->hasV8MBaselineOps();
4032
4033      bool IsAcquire = IntNo == Intrinsic::arm_ldaexd;
4034      unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD)
4035                                : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD);
4036
4037      // arm_ldrexd returns a i64 value in {i32, i32}
4038      std::vector<EVT> ResTys;
4039      if (isThumb) {
4040        ResTys.push_back(MVT::i32);
4041        ResTys.push_back(MVT::i32);
4042      } else
4043        ResTys.push_back(MVT::Untyped);
4044      ResTys.push_back(MVT::Other);
4045
4046      // Place arguments in the right order.
4047      SDValue Ops[] = {MemAddr, getAL(CurDAG, dl),
4048                       CurDAG->getRegister(0, MVT::i32), Chain};
4049      SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
4050      // Transfer memoperands.
4051      MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
4052      CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
4053
4054      // Remap uses.
4055      SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1);
4056      if (!SDValue(N, 0).use_empty()) {
4057        SDValue Result;
4058        if (isThumb)
4059          Result = SDValue(Ld, 0);
4060        else {
4061          SDValue SubRegIdx =
4062            CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
4063          SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
4064              dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
4065          Result = SDValue(ResNode,0);
4066        }
4067        ReplaceUses(SDValue(N, 0), Result);
4068      }
4069      if (!SDValue(N, 1).use_empty()) {
4070        SDValue Result;
4071        if (isThumb)
4072          Result = SDValue(Ld, 1);
4073        else {
4074          SDValue SubRegIdx =
4075            CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
4076          SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
4077              dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
4078          Result = SDValue(ResNode,0);
4079        }
4080        ReplaceUses(SDValue(N, 1), Result);
4081      }
4082      ReplaceUses(SDValue(N, 2), OutChain);
4083      CurDAG->RemoveDeadNode(N);
4084      return;
4085    }
4086    case Intrinsic::arm_stlexd:
4087    case Intrinsic::arm_strexd: {
4088      SDLoc dl(N);
4089      SDValue Chain = N->getOperand(0);
4090      SDValue Val0 = N->getOperand(2);
4091      SDValue Val1 = N->getOperand(3);
4092      SDValue MemAddr = N->getOperand(4);
4093
4094      // Store exclusive double return a i32 value which is the return status
4095      // of the issued store.
4096      const EVT ResTys[] = {MVT::i32, MVT::Other};
4097
4098      bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
4099      // Place arguments in the right order.
4100      SmallVector<SDValue, 7> Ops;
4101      if (isThumb) {
4102        Ops.push_back(Val0);
4103        Ops.push_back(Val1);
4104      } else
4105        // arm_strexd uses GPRPair.
4106        Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0));
4107      Ops.push_back(MemAddr);
4108      Ops.push_back(getAL(CurDAG, dl));
4109      Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4110      Ops.push_back(Chain);
4111
4112      bool IsRelease = IntNo == Intrinsic::arm_stlexd;
4113      unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD)
4114                                : (IsRelease ? ARM::STLEXD : ARM::STREXD);
4115
4116      SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
4117      // Transfer memoperands.
4118      MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
4119      CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
4120
4121      ReplaceNode(N, St);
4122      return;
4123    }
4124
4125    case Intrinsic::arm_neon_vld1: {
4126      static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
4127                                           ARM::VLD1d32, ARM::VLD1d64 };
4128      static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
4129                                           ARM::VLD1q32, ARM::VLD1q64};
4130      SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr);
4131      return;
4132    }
4133
4134    case Intrinsic::arm_neon_vld1x2: {
4135      static const uint16_t DOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
4136                                           ARM::VLD1q32, ARM::VLD1q64 };
4137      static const uint16_t QOpcodes[] = { ARM::VLD1d8QPseudo,
4138                                           ARM::VLD1d16QPseudo,
4139                                           ARM::VLD1d32QPseudo,
4140                                           ARM::VLD1d64QPseudo };
4141      SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
4142      return;
4143    }
4144
4145    case Intrinsic::arm_neon_vld1x3: {
4146      static const uint16_t DOpcodes[] = { ARM::VLD1d8TPseudo,
4147                                           ARM::VLD1d16TPseudo,
4148                                           ARM::VLD1d32TPseudo,
4149                                           ARM::VLD1d64TPseudo };
4150      static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowTPseudo_UPD,
4151                                            ARM::VLD1q16LowTPseudo_UPD,
4152                                            ARM::VLD1q32LowTPseudo_UPD,
4153                                            ARM::VLD1q64LowTPseudo_UPD };
4154      static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighTPseudo,
4155                                            ARM::VLD1q16HighTPseudo,
4156                                            ARM::VLD1q32HighTPseudo,
4157                                            ARM::VLD1q64HighTPseudo };
4158      SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
4159      return;
4160    }
4161
4162    case Intrinsic::arm_neon_vld1x4: {
4163      static const uint16_t DOpcodes[] = { ARM::VLD1d8QPseudo,
4164                                           ARM::VLD1d16QPseudo,
4165                                           ARM::VLD1d32QPseudo,
4166                                           ARM::VLD1d64QPseudo };
4167      static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowQPseudo_UPD,
4168                                            ARM::VLD1q16LowQPseudo_UPD,
4169                                            ARM::VLD1q32LowQPseudo_UPD,
4170                                            ARM::VLD1q64LowQPseudo_UPD };
4171      static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighQPseudo,
4172                                            ARM::VLD1q16HighQPseudo,
4173                                            ARM::VLD1q32HighQPseudo,
4174                                            ARM::VLD1q64HighQPseudo };
4175      SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
4176      return;
4177    }
4178
4179    case Intrinsic::arm_neon_vld2: {
4180      static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
4181                                           ARM::VLD2d32, ARM::VLD1q64 };
4182      static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
4183                                           ARM::VLD2q32Pseudo };
4184      SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
4185      return;
4186    }
4187
4188    case Intrinsic::arm_neon_vld3: {
4189      static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo,
4190                                           ARM::VLD3d16Pseudo,
4191                                           ARM::VLD3d32Pseudo,
4192                                           ARM::VLD1d64TPseudo };
4193      static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
4194                                            ARM::VLD3q16Pseudo_UPD,
4195                                            ARM::VLD3q32Pseudo_UPD };
4196      static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo,
4197                                            ARM::VLD3q16oddPseudo,
4198                                            ARM::VLD3q32oddPseudo };
4199      SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
4200      return;
4201    }
4202
4203    case Intrinsic::arm_neon_vld4: {
4204      static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo,
4205                                           ARM::VLD4d16Pseudo,
4206                                           ARM::VLD4d32Pseudo,
4207                                           ARM::VLD1d64QPseudo };
4208      static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
4209                                            ARM::VLD4q16Pseudo_UPD,
4210                                            ARM::VLD4q32Pseudo_UPD };
4211      static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo,
4212                                            ARM::VLD4q16oddPseudo,
4213                                            ARM::VLD4q32oddPseudo };
4214      SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
4215      return;
4216    }
4217
4218    case Intrinsic::arm_neon_vld2dup: {
4219      static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
4220                                           ARM::VLD2DUPd32, ARM::VLD1q64 };
4221      static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo,
4222                                            ARM::VLD2DUPq16EvenPseudo,
4223                                            ARM::VLD2DUPq32EvenPseudo };
4224      static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudo,
4225                                            ARM::VLD2DUPq16OddPseudo,
4226                                            ARM::VLD2DUPq32OddPseudo };
4227      SelectVLDDup(N, /* IsIntrinsic= */ true, false, 2,
4228                   DOpcodes, QOpcodes0, QOpcodes1);
4229      return;
4230    }
4231
4232    case Intrinsic::arm_neon_vld3dup: {
4233      static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo,
4234                                           ARM::VLD3DUPd16Pseudo,
4235                                           ARM::VLD3DUPd32Pseudo,
4236                                           ARM::VLD1d64TPseudo };
4237      static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo,
4238                                            ARM::VLD3DUPq16EvenPseudo,
4239                                            ARM::VLD3DUPq32EvenPseudo };
4240      static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo,
4241                                            ARM::VLD3DUPq16OddPseudo,
4242                                            ARM::VLD3DUPq32OddPseudo };
4243      SelectVLDDup(N, /* IsIntrinsic= */ true, false, 3,
4244                   DOpcodes, QOpcodes0, QOpcodes1);
4245      return;
4246    }
4247
4248    case Intrinsic::arm_neon_vld4dup: {
4249      static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo,
4250                                           ARM::VLD4DUPd16Pseudo,
4251                                           ARM::VLD4DUPd32Pseudo,
4252                                           ARM::VLD1d64QPseudo };
4253      static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo,
4254                                            ARM::VLD4DUPq16EvenPseudo,
4255                                            ARM::VLD4DUPq32EvenPseudo };
4256      static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo,
4257                                            ARM::VLD4DUPq16OddPseudo,
4258                                            ARM::VLD4DUPq32OddPseudo };
4259      SelectVLDDup(N, /* IsIntrinsic= */ true, false, 4,
4260                   DOpcodes, QOpcodes0, QOpcodes1);
4261      return;
4262    }
4263
4264    case Intrinsic::arm_neon_vld2lane: {
4265      static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo,
4266                                           ARM::VLD2LNd16Pseudo,
4267                                           ARM::VLD2LNd32Pseudo };
4268      static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo,
4269                                           ARM::VLD2LNq32Pseudo };
4270      SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes);
4271      return;
4272    }
4273
4274    case Intrinsic::arm_neon_vld3lane: {
4275      static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo,
4276                                           ARM::VLD3LNd16Pseudo,
4277                                           ARM::VLD3LNd32Pseudo };
4278      static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo,
4279                                           ARM::VLD3LNq32Pseudo };
4280      SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes);
4281      return;
4282    }
4283
4284    case Intrinsic::arm_neon_vld4lane: {
4285      static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo,
4286                                           ARM::VLD4LNd16Pseudo,
4287                                           ARM::VLD4LNd32Pseudo };
4288      static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo,
4289                                           ARM::VLD4LNq32Pseudo };
4290      SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes);
4291      return;
4292    }
4293
4294    case Intrinsic::arm_neon_vst1: {
4295      static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
4296                                           ARM::VST1d32, ARM::VST1d64 };
4297      static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
4298                                           ARM::VST1q32, ARM::VST1q64 };
4299      SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr);
4300      return;
4301    }
4302
4303    case Intrinsic::arm_neon_vst1x2: {
4304      static const uint16_t DOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
4305                                           ARM::VST1q32, ARM::VST1q64 };
4306      static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudo,
4307                                           ARM::VST1d16QPseudo,
4308                                           ARM::VST1d32QPseudo,
4309                                           ARM::VST1d64QPseudo };
4310      SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
4311      return;
4312    }
4313
4314    case Intrinsic::arm_neon_vst1x3: {
4315      static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudo,
4316                                           ARM::VST1d16TPseudo,
4317                                           ARM::VST1d32TPseudo,
4318                                           ARM::VST1d64TPseudo };
4319      static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD,
4320                                            ARM::VST1q16LowTPseudo_UPD,
4321                                            ARM::VST1q32LowTPseudo_UPD,
4322                                            ARM::VST1q64LowTPseudo_UPD };
4323      static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo,
4324                                            ARM::VST1q16HighTPseudo,
4325                                            ARM::VST1q32HighTPseudo,
4326                                            ARM::VST1q64HighTPseudo };
4327      SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
4328      return;
4329    }
4330
4331    case Intrinsic::arm_neon_vst1x4: {
4332      static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudo,
4333                                           ARM::VST1d16QPseudo,
4334                                           ARM::VST1d32QPseudo,
4335                                           ARM::VST1d64QPseudo };
4336      static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD,
4337                                            ARM::VST1q16LowQPseudo_UPD,
4338                                            ARM::VST1q32LowQPseudo_UPD,
4339                                            ARM::VST1q64LowQPseudo_UPD };
4340      static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo,
4341                                            ARM::VST1q16HighQPseudo,
4342                                            ARM::VST1q32HighQPseudo,
4343                                            ARM::VST1q64HighQPseudo };
4344      SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
4345      return;
4346    }
4347
4348    case Intrinsic::arm_neon_vst2: {
4349      static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
4350                                           ARM::VST2d32, ARM::VST1q64 };
4351      static const uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
4352                                           ARM::VST2q32Pseudo };
4353      SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
4354      return;
4355    }
4356
4357    case Intrinsic::arm_neon_vst3: {
4358      static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo,
4359                                           ARM::VST3d16Pseudo,
4360                                           ARM::VST3d32Pseudo,
4361                                           ARM::VST1d64TPseudo };
4362      static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
4363                                            ARM::VST3q16Pseudo_UPD,
4364                                            ARM::VST3q32Pseudo_UPD };
4365      static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo,
4366                                            ARM::VST3q16oddPseudo,
4367                                            ARM::VST3q32oddPseudo };
4368      SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
4369      return;
4370    }
4371
4372    case Intrinsic::arm_neon_vst4: {
4373      static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo,
4374                                           ARM::VST4d16Pseudo,
4375                                           ARM::VST4d32Pseudo,
4376                                           ARM::VST1d64QPseudo };
4377      static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
4378                                            ARM::VST4q16Pseudo_UPD,
4379                                            ARM::VST4q32Pseudo_UPD };
4380      static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo,
4381                                            ARM::VST4q16oddPseudo,
4382                                            ARM::VST4q32oddPseudo };
4383      SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
4384      return;
4385    }
4386
4387    case Intrinsic::arm_neon_vst2lane: {
4388      static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo,
4389                                           ARM::VST2LNd16Pseudo,
4390                                           ARM::VST2LNd32Pseudo };
4391      static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo,
4392                                           ARM::VST2LNq32Pseudo };
4393      SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes);
4394      return;
4395    }
4396
4397    case Intrinsic::arm_neon_vst3lane: {
4398      static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo,
4399                                           ARM::VST3LNd16Pseudo,
4400                                           ARM::VST3LNd32Pseudo };
4401      static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo,
4402                                           ARM::VST3LNq32Pseudo };
4403      SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes);
4404      return;
4405    }
4406
4407    case Intrinsic::arm_neon_vst4lane: {
4408      static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo,
4409                                           ARM::VST4LNd16Pseudo,
4410                                           ARM::VST4LNd32Pseudo };
4411      static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo,
4412                                           ARM::VST4LNq32Pseudo };
4413      SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes);
4414      return;
4415    }
4416
4417    case Intrinsic::arm_mve_vldr_gather_base_wb:
4418    case Intrinsic::arm_mve_vldr_gather_base_wb_predicated: {
4419      static const uint16_t Opcodes[] = {ARM::MVE_VLDRWU32_qi_pre,
4420                                         ARM::MVE_VLDRDU64_qi_pre};
4421      SelectMVE_WB(N, Opcodes,
4422                   IntNo == Intrinsic::arm_mve_vldr_gather_base_wb_predicated);
4423      return;
4424    }
4425
4426    case Intrinsic::arm_mve_vld2q: {
4427      static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8, ARM::MVE_VLD21_8};
4428      static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16,
4429                                           ARM::MVE_VLD21_16};
4430      static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32,
4431                                           ARM::MVE_VLD21_32};
4432      static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
4433      SelectMVE_VLD(N, 2, Opcodes);
4434      return;
4435    }
4436
4437    case Intrinsic::arm_mve_vld4q: {
4438      static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8,
4439                                          ARM::MVE_VLD42_8, ARM::MVE_VLD43_8};
4440      static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16,
4441                                           ARM::MVE_VLD42_16,
4442                                           ARM::MVE_VLD43_16};
4443      static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32,
4444                                           ARM::MVE_VLD42_32,
4445                                           ARM::MVE_VLD43_32};
4446      static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
4447      SelectMVE_VLD(N, 4, Opcodes);
4448      return;
4449    }
4450    }
4451    break;
4452  }
4453
4454  case ISD::INTRINSIC_WO_CHAIN: {
4455    unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
4456    switch (IntNo) {
4457    default:
4458      break;
4459
4460    case Intrinsic::arm_mve_urshrl:
4461      SelectMVE_LongShift(N, ARM::MVE_URSHRL, true, false);
4462      return;
4463    case Intrinsic::arm_mve_uqshll:
4464      SelectMVE_LongShift(N, ARM::MVE_UQSHLL, true, false);
4465      return;
4466    case Intrinsic::arm_mve_srshrl:
4467      SelectMVE_LongShift(N, ARM::MVE_SRSHRL, true, false);
4468      return;
4469    case Intrinsic::arm_mve_sqshll:
4470      SelectMVE_LongShift(N, ARM::MVE_SQSHLL, true, false);
4471      return;
4472    case Intrinsic::arm_mve_uqrshll:
4473      SelectMVE_LongShift(N, ARM::MVE_UQRSHLL, false, true);
4474      return;
4475    case Intrinsic::arm_mve_sqrshrl:
4476      SelectMVE_LongShift(N, ARM::MVE_SQRSHRL, false, true);
4477      return;
4478    case Intrinsic::arm_mve_lsll:
4479      SelectMVE_LongShift(N, ARM::MVE_LSLLr, false, false);
4480      return;
4481    case Intrinsic::arm_mve_asrl:
4482      SelectMVE_LongShift(N, ARM::MVE_ASRLr, false, false);
4483      return;
4484
4485    case Intrinsic::arm_mve_vadc:
4486    case Intrinsic::arm_mve_vadc_predicated:
4487      SelectMVE_VADCSBC(N, ARM::MVE_VADC, ARM::MVE_VADCI, true,
4488                        IntNo == Intrinsic::arm_mve_vadc_predicated);
4489      return;
4490
4491    case Intrinsic::arm_mve_vmlldava:
4492    case Intrinsic::arm_mve_vmlldava_predicated: {
4493      static const uint16_t OpcodesU[] = {
4494          ARM::MVE_VMLALDAVu16,   ARM::MVE_VMLALDAVu32,
4495          ARM::MVE_VMLALDAVau16,  ARM::MVE_VMLALDAVau32,
4496      };
4497      static const uint16_t OpcodesS[] = {
4498          ARM::MVE_VMLALDAVs16,   ARM::MVE_VMLALDAVs32,
4499          ARM::MVE_VMLALDAVas16,  ARM::MVE_VMLALDAVas32,
4500          ARM::MVE_VMLALDAVxs16,  ARM::MVE_VMLALDAVxs32,
4501          ARM::MVE_VMLALDAVaxs16, ARM::MVE_VMLALDAVaxs32,
4502          ARM::MVE_VMLSLDAVs16,   ARM::MVE_VMLSLDAVs32,
4503          ARM::MVE_VMLSLDAVas16,  ARM::MVE_VMLSLDAVas32,
4504          ARM::MVE_VMLSLDAVxs16,  ARM::MVE_VMLSLDAVxs32,
4505          ARM::MVE_VMLSLDAVaxs16, ARM::MVE_VMLSLDAVaxs32,
4506      };
4507      SelectMVE_VMLLDAV(N, IntNo == Intrinsic::arm_mve_vmlldava_predicated,
4508                        OpcodesS, OpcodesU);
4509      return;
4510    }
4511
4512    case Intrinsic::arm_mve_vrmlldavha:
4513    case Intrinsic::arm_mve_vrmlldavha_predicated: {
4514      static const uint16_t OpcodesU[] = {
4515          ARM::MVE_VRMLALDAVHu32,  ARM::MVE_VRMLALDAVHau32,
4516      };
4517      static const uint16_t OpcodesS[] = {
4518          ARM::MVE_VRMLALDAVHs32,  ARM::MVE_VRMLALDAVHas32,
4519          ARM::MVE_VRMLALDAVHxs32, ARM::MVE_VRMLALDAVHaxs32,
4520          ARM::MVE_VRMLSLDAVHs32,  ARM::MVE_VRMLSLDAVHas32,
4521          ARM::MVE_VRMLSLDAVHxs32, ARM::MVE_VRMLSLDAVHaxs32,
4522      };
4523      SelectMVE_VRMLLDAVH(N, IntNo == Intrinsic::arm_mve_vrmlldavha_predicated,
4524                          OpcodesS, OpcodesU);
4525      return;
4526    }
4527    }
4528    break;
4529  }
4530
4531  case ISD::ATOMIC_CMP_SWAP:
4532    SelectCMP_SWAP(N);
4533    return;
4534  }
4535
4536  SelectCode(N);
4537}
4538
4539// Inspect a register string of the form
4540// cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or
4541// cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string
4542// and obtain the integer operands from them, adding these operands to the
4543// provided vector.
4544static void getIntOperandsFromRegisterString(StringRef RegString,
4545                                             SelectionDAG *CurDAG,
4546                                             const SDLoc &DL,
4547                                             std::vector<SDValue> &Ops) {
4548  SmallVector<StringRef, 5> Fields;
4549  RegString.split(Fields, ':');
4550
4551  if (Fields.size() > 1) {
4552    bool AllIntFields = true;
4553
4554    for (StringRef Field : Fields) {
4555      // Need to trim out leading 'cp' characters and get the integer field.
4556      unsigned IntField;
4557      AllIntFields &= !Field.trim("CPcp").getAsInteger(10, IntField);
4558      Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32));
4559    }
4560
4561    assert(AllIntFields &&
4562            "Unexpected non-integer value in special register string.");
4563  }
4564}
4565
4566// Maps a Banked Register string to its mask value. The mask value returned is
4567// for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register
4568// mask operand, which expresses which register is to be used, e.g. r8, and in
4569// which mode it is to be used, e.g. usr. Returns -1 to signify that the string
4570// was invalid.
4571static inline int getBankedRegisterMask(StringRef RegString) {
4572  auto TheReg = ARMBankedReg::lookupBankedRegByName(RegString.lower());
4573  if (!TheReg)
4574     return -1;
4575  return TheReg->Encoding;
4576}
4577
4578// The flags here are common to those allowed for apsr in the A class cores and
4579// those allowed for the special registers in the M class cores. Returns a
4580// value representing which flags were present, -1 if invalid.
4581static inline int getMClassFlagsMask(StringRef Flags) {
4582  return StringSwitch<int>(Flags)
4583          .Case("", 0x2) // no flags means nzcvq for psr registers, and 0x2 is
4584                         // correct when flags are not permitted
4585          .Case("g", 0x1)
4586          .Case("nzcvq", 0x2)
4587          .Case("nzcvqg", 0x3)
4588          .Default(-1);
4589}
4590
4591// Maps MClass special registers string to its value for use in the
4592// t2MRS_M/t2MSR_M instruction nodes as the SYSm value operand.
4593// Returns -1 to signify that the string was invalid.
4594static int getMClassRegisterMask(StringRef Reg, const ARMSubtarget *Subtarget) {
4595  auto TheReg = ARMSysReg::lookupMClassSysRegByName(Reg);
4596  const FeatureBitset &FeatureBits = Subtarget->getFeatureBits();
4597  if (!TheReg || !TheReg->hasRequiredFeatures(FeatureBits))
4598    return -1;
4599  return (int)(TheReg->Encoding & 0xFFF); // SYSm value
4600}
4601
4602static int getARClassRegisterMask(StringRef Reg, StringRef Flags) {
4603  // The mask operand contains the special register (R Bit) in bit 4, whether
4604  // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and
4605  // bits 3-0 contains the fields to be accessed in the special register, set by
4606  // the flags provided with the register.
4607  int Mask = 0;
4608  if (Reg == "apsr") {
4609    // The flags permitted for apsr are the same flags that are allowed in
4610    // M class registers. We get the flag value and then shift the flags into
4611    // the correct place to combine with the mask.
4612    Mask = getMClassFlagsMask(Flags);
4613    if (Mask == -1)
4614      return -1;
4615    return Mask << 2;
4616  }
4617
4618  if (Reg != "cpsr" && Reg != "spsr") {
4619    return -1;
4620  }
4621
4622  // This is the same as if the flags were "fc"
4623  if (Flags.empty() || Flags == "all")
4624    return Mask | 0x9;
4625
4626  // Inspect the supplied flags string and set the bits in the mask for
4627  // the relevant and valid flags allowed for cpsr and spsr.
4628  for (char Flag : Flags) {
4629    int FlagVal;
4630    switch (Flag) {
4631      case 'c':
4632        FlagVal = 0x1;
4633        break;
4634      case 'x':
4635        FlagVal = 0x2;
4636        break;
4637      case 's':
4638        FlagVal = 0x4;
4639        break;
4640      case 'f':
4641        FlagVal = 0x8;
4642        break;
4643      default:
4644        FlagVal = 0;
4645    }
4646
4647    // This avoids allowing strings where the same flag bit appears twice.
4648    if (!FlagVal || (Mask & FlagVal))
4649      return -1;
4650    Mask |= FlagVal;
4651  }
4652
4653  // If the register is spsr then we need to set the R bit.
4654  if (Reg == "spsr")
4655    Mask |= 0x10;
4656
4657  return Mask;
4658}
4659
4660// Lower the read_register intrinsic to ARM specific DAG nodes
4661// using the supplied metadata string to select the instruction node to use
4662// and the registers/masks to construct as operands for the node.
4663bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){
4664  const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
4665  const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
4666  bool IsThumb2 = Subtarget->isThumb2();
4667  SDLoc DL(N);
4668
4669  std::vector<SDValue> Ops;
4670  getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
4671
4672  if (!Ops.empty()) {
4673    // If the special register string was constructed of fields (as defined
4674    // in the ACLE) then need to lower to MRC node (32 bit) or
4675    // MRRC node(64 bit), we can make the distinction based on the number of
4676    // operands we have.
4677    unsigned Opcode;
4678    SmallVector<EVT, 3> ResTypes;
4679    if (Ops.size() == 5){
4680      Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC;
4681      ResTypes.append({ MVT::i32, MVT::Other });
4682    } else {
4683      assert(Ops.size() == 3 &&
4684              "Invalid number of fields in special register string.");
4685      Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC;
4686      ResTypes.append({ MVT::i32, MVT::i32, MVT::Other });
4687    }
4688
4689    Ops.push_back(getAL(CurDAG, DL));
4690    Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4691    Ops.push_back(N->getOperand(0));
4692    ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops));
4693    return true;
4694  }
4695
4696  std::string SpecialReg = RegString->getString().lower();
4697
4698  int BankedReg = getBankedRegisterMask(SpecialReg);
4699  if (BankedReg != -1) {
4700    Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32),
4701            getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4702            N->getOperand(0) };
4703    ReplaceNode(
4704        N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked,
4705                                  DL, MVT::i32, MVT::Other, Ops));
4706    return true;
4707  }
4708
4709  // The VFP registers are read by creating SelectionDAG nodes with opcodes
4710  // corresponding to the register that is being read from. So we switch on the
4711  // string to find which opcode we need to use.
4712  unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
4713                    .Case("fpscr", ARM::VMRS)
4714                    .Case("fpexc", ARM::VMRS_FPEXC)
4715                    .Case("fpsid", ARM::VMRS_FPSID)
4716                    .Case("mvfr0", ARM::VMRS_MVFR0)
4717                    .Case("mvfr1", ARM::VMRS_MVFR1)
4718                    .Case("mvfr2", ARM::VMRS_MVFR2)
4719                    .Case("fpinst", ARM::VMRS_FPINST)
4720                    .Case("fpinst2", ARM::VMRS_FPINST2)
4721                    .Default(0);
4722
4723  // If an opcode was found then we can lower the read to a VFP instruction.
4724  if (Opcode) {
4725    if (!Subtarget->hasVFP2Base())
4726      return false;
4727    if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8Base())
4728      return false;
4729
4730    Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4731            N->getOperand(0) };
4732    ReplaceNode(N,
4733                CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops));
4734    return true;
4735  }
4736
4737  // If the target is M Class then need to validate that the register string
4738  // is an acceptable value, so check that a mask can be constructed from the
4739  // string.
4740  if (Subtarget->isMClass()) {
4741    int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget);
4742    if (SYSmValue == -1)
4743      return false;
4744
4745    SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
4746                      getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4747                      N->getOperand(0) };
4748    ReplaceNode(
4749        N, CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops));
4750    return true;
4751  }
4752
4753  // Here we know the target is not M Class so we need to check if it is one
4754  // of the remaining possible values which are apsr, cpsr or spsr.
4755  if (SpecialReg == "apsr" || SpecialReg == "cpsr") {
4756    Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4757            N->getOperand(0) };
4758    ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS,
4759                                          DL, MVT::i32, MVT::Other, Ops));
4760    return true;
4761  }
4762
4763  if (SpecialReg == "spsr") {
4764    Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4765            N->getOperand(0) };
4766    ReplaceNode(
4767        N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys, DL,
4768                                  MVT::i32, MVT::Other, Ops));
4769    return true;
4770  }
4771
4772  return false;
4773}
4774
4775// Lower the write_register intrinsic to ARM specific DAG nodes
4776// using the supplied metadata string to select the instruction node to use
4777// and the registers/masks to use in the nodes
4778bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){
4779  const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
4780  const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
4781  bool IsThumb2 = Subtarget->isThumb2();
4782  SDLoc DL(N);
4783
4784  std::vector<SDValue> Ops;
4785  getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
4786
4787  if (!Ops.empty()) {
4788    // If the special register string was constructed of fields (as defined
4789    // in the ACLE) then need to lower to MCR node (32 bit) or
4790    // MCRR node(64 bit), we can make the distinction based on the number of
4791    // operands we have.
4792    unsigned Opcode;
4793    if (Ops.size() == 5) {
4794      Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR;
4795      Ops.insert(Ops.begin()+2, N->getOperand(2));
4796    } else {
4797      assert(Ops.size() == 3 &&
4798              "Invalid number of fields in special register string.");
4799      Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR;
4800      SDValue WriteValue[] = { N->getOperand(2), N->getOperand(3) };
4801      Ops.insert(Ops.begin()+2, WriteValue, WriteValue+2);
4802    }
4803
4804    Ops.push_back(getAL(CurDAG, DL));
4805    Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4806    Ops.push_back(N->getOperand(0));
4807
4808    ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
4809    return true;
4810  }
4811
4812  std::string SpecialReg = RegString->getString().lower();
4813  int BankedReg = getBankedRegisterMask(SpecialReg);
4814  if (BankedReg != -1) {
4815    Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2),
4816            getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4817            N->getOperand(0) };
4818    ReplaceNode(
4819        N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked,
4820                                  DL, MVT::Other, Ops));
4821    return true;
4822  }
4823
4824  // The VFP registers are written to by creating SelectionDAG nodes with
4825  // opcodes corresponding to the register that is being written. So we switch
4826  // on the string to find which opcode we need to use.
4827  unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
4828                    .Case("fpscr", ARM::VMSR)
4829                    .Case("fpexc", ARM::VMSR_FPEXC)
4830                    .Case("fpsid", ARM::VMSR_FPSID)
4831                    .Case("fpinst", ARM::VMSR_FPINST)
4832                    .Case("fpinst2", ARM::VMSR_FPINST2)
4833                    .Default(0);
4834
4835  if (Opcode) {
4836    if (!Subtarget->hasVFP2Base())
4837      return false;
4838    Ops = { N->getOperand(2), getAL(CurDAG, DL),
4839            CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
4840    ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
4841    return true;
4842  }
4843
4844  std::pair<StringRef, StringRef> Fields;
4845  Fields = StringRef(SpecialReg).rsplit('_');
4846  std::string Reg = Fields.first.str();
4847  StringRef Flags = Fields.second;
4848
4849  // If the target was M Class then need to validate the special register value
4850  // and retrieve the mask for use in the instruction node.
4851  if (Subtarget->isMClass()) {
4852    int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget);
4853    if (SYSmValue == -1)
4854      return false;
4855
4856    SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
4857                      N->getOperand(2), getAL(CurDAG, DL),
4858                      CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
4859    ReplaceNode(N, CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops));
4860    return true;
4861  }
4862
4863  // We then check to see if a valid mask can be constructed for one of the
4864  // register string values permitted for the A and R class cores. These values
4865  // are apsr, spsr and cpsr; these are also valid on older cores.
4866  int Mask = getARClassRegisterMask(Reg, Flags);
4867  if (Mask != -1) {
4868    Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2),
4869            getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4870            N->getOperand(0) };
4871    ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR,
4872                                          DL, MVT::Other, Ops));
4873    return true;
4874  }
4875
4876  return false;
4877}
4878
4879bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){
4880  std::vector<SDValue> AsmNodeOperands;
4881  unsigned Flag, Kind;
4882  bool Changed = false;
4883  unsigned NumOps = N->getNumOperands();
4884
4885  // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint.
4886  // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require
4887  // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs
4888  // respectively. Since there is no constraint to explicitly specify a
4889  // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb,
4890  // the 64-bit data may be referred by H, Q, R modifiers, so we still pack
4891  // them into a GPRPair.
4892
4893  SDLoc dl(N);
4894  SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1)
4895                                   : SDValue(nullptr,0);
4896
4897  SmallVector<bool, 8> OpChanged;
4898  // Glue node will be appended late.
4899  for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) {
4900    SDValue op = N->getOperand(i);
4901    AsmNodeOperands.push_back(op);
4902
4903    if (i < InlineAsm::Op_FirstOperand)
4904      continue;
4905
4906    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) {
4907      Flag = C->getZExtValue();
4908      Kind = InlineAsm::getKind(Flag);
4909    }
4910    else
4911      continue;
4912
4913    // Immediate operands to inline asm in the SelectionDAG are modeled with
4914    // two operands. The first is a constant of value InlineAsm::Kind_Imm, and
4915    // the second is a constant with the value of the immediate. If we get here
4916    // and we have a Kind_Imm, skip the next operand, and continue.
4917    if (Kind == InlineAsm::Kind_Imm) {
4918      SDValue op = N->getOperand(++i);
4919      AsmNodeOperands.push_back(op);
4920      continue;
4921    }
4922
4923    unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag);
4924    if (NumRegs)
4925      OpChanged.push_back(false);
4926
4927    unsigned DefIdx = 0;
4928    bool IsTiedToChangedOp = false;
4929    // If it's a use that is tied with a previous def, it has no
4930    // reg class constraint.
4931    if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx))
4932      IsTiedToChangedOp = OpChanged[DefIdx];
4933
4934    // Memory operands to inline asm in the SelectionDAG are modeled with two
4935    // operands: a constant of value InlineAsm::Kind_Mem followed by the input
4936    // operand. If we get here and we have a Kind_Mem, skip the next operand (so
4937    // it doesn't get misinterpreted), and continue. We do this here because
4938    // it's important to update the OpChanged array correctly before moving on.
4939    if (Kind == InlineAsm::Kind_Mem) {
4940      SDValue op = N->getOperand(++i);
4941      AsmNodeOperands.push_back(op);
4942      continue;
4943    }
4944
4945    if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef
4946        && Kind != InlineAsm::Kind_RegDefEarlyClobber)
4947      continue;
4948
4949    unsigned RC;
4950    bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC);
4951    if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID))
4952        || NumRegs != 2)
4953      continue;
4954
4955    assert((i+2 < NumOps) && "Invalid number of operands in inline asm");
4956    SDValue V0 = N->getOperand(i+1);
4957    SDValue V1 = N->getOperand(i+2);
4958    unsigned Reg0 = cast<RegisterSDNode>(V0)->getReg();
4959    unsigned Reg1 = cast<RegisterSDNode>(V1)->getReg();
4960    SDValue PairedReg;
4961    MachineRegisterInfo &MRI = MF->getRegInfo();
4962
4963    if (Kind == InlineAsm::Kind_RegDef ||
4964        Kind == InlineAsm::Kind_RegDefEarlyClobber) {
4965      // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
4966      // the original GPRs.
4967
4968      Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
4969      PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
4970      SDValue Chain = SDValue(N,0);
4971
4972      SDNode *GU = N->getGluedUser();
4973      SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped,
4974                                               Chain.getValue(1));
4975
4976      // Extract values from a GPRPair reg and copy to the original GPR reg.
4977      SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
4978                                                    RegCopy);
4979      SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
4980                                                    RegCopy);
4981      SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0,
4982                                        RegCopy.getValue(1));
4983      SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1));
4984
4985      // Update the original glue user.
4986      std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1);
4987      Ops.push_back(T1.getValue(1));
4988      CurDAG->UpdateNodeOperands(GU, Ops);
4989    }
4990    else {
4991      // For Kind  == InlineAsm::Kind_RegUse, we first copy two GPRs into a
4992      // GPRPair and then pass the GPRPair to the inline asm.
4993      SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain];
4994
4995      // As REG_SEQ doesn't take RegisterSDNode, we copy them first.
4996      SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32,
4997                                          Chain.getValue(1));
4998      SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32,
4999                                          T0.getValue(1));
5000      SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0);
5001
5002      // Copy REG_SEQ into a GPRPair-typed VR and replace the original two
5003      // i32 VRs of inline asm with it.
5004      Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
5005      PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
5006      Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1));
5007
5008      AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
5009      Glue = Chain.getValue(1);
5010    }
5011
5012    Changed = true;
5013
5014    if(PairedReg.getNode()) {
5015      OpChanged[OpChanged.size() -1 ] = true;
5016      Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/);
5017      if (IsTiedToChangedOp)
5018        Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx);
5019      else
5020        Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID);
5021      // Replace the current flag.
5022      AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant(
5023          Flag, dl, MVT::i32);
5024      // Add the new register node and skip the original two GPRs.
5025      AsmNodeOperands.push_back(PairedReg);
5026      // Skip the next two GPRs.
5027      i += 2;
5028    }
5029  }
5030
5031  if (Glue.getNode())
5032    AsmNodeOperands.push_back(Glue);
5033  if (!Changed)
5034    return false;
5035
5036  SDValue New = CurDAG->getNode(N->getOpcode(), SDLoc(N),
5037      CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
5038  New->setNodeId(-1);
5039  ReplaceNode(N, New.getNode());
5040  return true;
5041}
5042
5043
5044bool ARMDAGToDAGISel::
5045SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
5046                             std::vector<SDValue> &OutOps) {
5047  switch(ConstraintID) {
5048  default:
5049    llvm_unreachable("Unexpected asm memory constraint");
5050  case InlineAsm::Constraint_m:
5051  case InlineAsm::Constraint_o:
5052  case InlineAsm::Constraint_Q:
5053  case InlineAsm::Constraint_Um:
5054  case InlineAsm::Constraint_Un:
5055  case InlineAsm::Constraint_Uq:
5056  case InlineAsm::Constraint_Us:
5057  case InlineAsm::Constraint_Ut:
5058  case InlineAsm::Constraint_Uv:
5059  case InlineAsm::Constraint_Uy:
5060    // Require the address to be in a register.  That is safe for all ARM
5061    // variants and it is hard to do anything much smarter without knowing
5062    // how the operand is used.
5063    OutOps.push_back(Op);
5064    return false;
5065  }
5066  return true;
5067}
5068
5069/// createARMISelDag - This pass converts a legalized DAG into a
5070/// ARM-specific DAG, ready for instruction scheduling.
5071///
5072FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM,
5073                                     CodeGenOpt::Level OptLevel) {
5074  return new ARMDAGToDAGISel(TM, OptLevel);
5075}
5076