1//===-- RISCVISelDAGToDAG.cpp - A dag to dag inst selector for RISC-V -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the RISC-V target.
10//
11//===----------------------------------------------------------------------===//
12
13#include "RISCVISelDAGToDAG.h"
14#include "MCTargetDesc/RISCVBaseInfo.h"
15#include "MCTargetDesc/RISCVMCTargetDesc.h"
16#include "MCTargetDesc/RISCVMatInt.h"
17#include "RISCVISelLowering.h"
18#include "RISCVMachineFunctionInfo.h"
19#include "llvm/CodeGen/MachineFrameInfo.h"
20#include "llvm/IR/IntrinsicsRISCV.h"
21#include "llvm/Support/Alignment.h"
22#include "llvm/Support/Debug.h"
23#include "llvm/Support/MathExtras.h"
24#include "llvm/Support/raw_ostream.h"
25
26using namespace llvm;
27
28#define DEBUG_TYPE "riscv-isel"
29#define PASS_NAME "RISC-V DAG->DAG Pattern Instruction Selection"
30
31static cl::opt<bool> UsePseudoMovImm(
32    "riscv-use-rematerializable-movimm", cl::Hidden,
33    cl::desc("Use a rematerializable pseudoinstruction for 2 instruction "
34             "constant materialization"),
35    cl::init(false));
36
37namespace llvm::RISCV {
38#define GET_RISCVVSSEGTable_IMPL
39#define GET_RISCVVLSEGTable_IMPL
40#define GET_RISCVVLXSEGTable_IMPL
41#define GET_RISCVVSXSEGTable_IMPL
42#define GET_RISCVVLETable_IMPL
43#define GET_RISCVVSETable_IMPL
44#define GET_RISCVVLXTable_IMPL
45#define GET_RISCVVSXTable_IMPL
46#define GET_RISCVMaskedPseudosTable_IMPL
47#include "RISCVGenSearchableTables.inc"
48} // namespace llvm::RISCV
49
50void RISCVDAGToDAGISel::PreprocessISelDAG() {
51  SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
52
53  bool MadeChange = false;
54  while (Position != CurDAG->allnodes_begin()) {
55    SDNode *N = &*--Position;
56    if (N->use_empty())
57      continue;
58
59    SDValue Result;
60    switch (N->getOpcode()) {
61    case ISD::SPLAT_VECTOR: {
62      // Convert integer SPLAT_VECTOR to VMV_V_X_VL and floating-point
63      // SPLAT_VECTOR to VFMV_V_F_VL to reduce isel burden.
64      MVT VT = N->getSimpleValueType(0);
65      unsigned Opc =
66          VT.isInteger() ? RISCVISD::VMV_V_X_VL : RISCVISD::VFMV_V_F_VL;
67      SDLoc DL(N);
68      SDValue VL = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT());
69      SDValue Src = N->getOperand(0);
70      if (VT.isInteger())
71        Src = CurDAG->getNode(ISD::ANY_EXTEND, DL, Subtarget->getXLenVT(),
72                              N->getOperand(0));
73      Result = CurDAG->getNode(Opc, DL, VT, CurDAG->getUNDEF(VT), Src, VL);
74      break;
75    }
76    case RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL: {
77      // Lower SPLAT_VECTOR_SPLIT_I64 to two scalar stores and a stride 0 vector
78      // load. Done after lowering and combining so that we have a chance to
79      // optimize this to VMV_V_X_VL when the upper bits aren't needed.
80      assert(N->getNumOperands() == 4 && "Unexpected number of operands");
81      MVT VT = N->getSimpleValueType(0);
82      SDValue Passthru = N->getOperand(0);
83      SDValue Lo = N->getOperand(1);
84      SDValue Hi = N->getOperand(2);
85      SDValue VL = N->getOperand(3);
86      assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() &&
87             Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 &&
88             "Unexpected VTs!");
89      MachineFunction &MF = CurDAG->getMachineFunction();
90      SDLoc DL(N);
91
92      // Create temporary stack for each expanding node.
93      SDValue StackSlot =
94          CurDAG->CreateStackTemporary(TypeSize::getFixed(8), Align(8));
95      int FI = cast<FrameIndexSDNode>(StackSlot.getNode())->getIndex();
96      MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
97
98      SDValue Chain = CurDAG->getEntryNode();
99      Lo = CurDAG->getStore(Chain, DL, Lo, StackSlot, MPI, Align(8));
100
101      SDValue OffsetSlot =
102          CurDAG->getMemBasePlusOffset(StackSlot, TypeSize::getFixed(4), DL);
103      Hi = CurDAG->getStore(Chain, DL, Hi, OffsetSlot, MPI.getWithOffset(4),
104                            Align(8));
105
106      Chain = CurDAG->getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
107
108      SDVTList VTs = CurDAG->getVTList({VT, MVT::Other});
109      SDValue IntID =
110          CurDAG->getTargetConstant(Intrinsic::riscv_vlse, DL, MVT::i64);
111      SDValue Ops[] = {Chain,
112                       IntID,
113                       Passthru,
114                       StackSlot,
115                       CurDAG->getRegister(RISCV::X0, MVT::i64),
116                       VL};
117
118      Result = CurDAG->getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
119                                           MVT::i64, MPI, Align(8),
120                                           MachineMemOperand::MOLoad);
121      break;
122    }
123    }
124
125    if (Result) {
126      LLVM_DEBUG(dbgs() << "RISC-V DAG preprocessing replacing:\nOld:    ");
127      LLVM_DEBUG(N->dump(CurDAG));
128      LLVM_DEBUG(dbgs() << "\nNew: ");
129      LLVM_DEBUG(Result->dump(CurDAG));
130      LLVM_DEBUG(dbgs() << "\n");
131
132      CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
133      MadeChange = true;
134    }
135  }
136
137  if (MadeChange)
138    CurDAG->RemoveDeadNodes();
139}
140
141void RISCVDAGToDAGISel::PostprocessISelDAG() {
142  HandleSDNode Dummy(CurDAG->getRoot());
143  SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
144
145  bool MadeChange = false;
146  while (Position != CurDAG->allnodes_begin()) {
147    SDNode *N = &*--Position;
148    // Skip dead nodes and any non-machine opcodes.
149    if (N->use_empty() || !N->isMachineOpcode())
150      continue;
151
152    MadeChange |= doPeepholeSExtW(N);
153
154    // FIXME: This is here only because the VMerge transform doesn't
155    // know how to handle masked true inputs.  Once that has been moved
156    // to post-ISEL, this can be deleted as well.
157    MadeChange |= doPeepholeMaskedRVV(cast<MachineSDNode>(N));
158  }
159
160  CurDAG->setRoot(Dummy.getValue());
161
162  MadeChange |= doPeepholeMergeVVMFold();
163
164  // After we're done with everything else, convert IMPLICIT_DEF
165  // passthru operands to NoRegister.  This is required to workaround
166  // an optimization deficiency in MachineCSE.  This really should
167  // be merged back into each of the patterns (i.e. there's no good
168  // reason not to go directly to NoReg), but is being done this way
169  // to allow easy backporting.
170  MadeChange |= doPeepholeNoRegPassThru();
171
172  if (MadeChange)
173    CurDAG->RemoveDeadNodes();
174}
175
176static SDValue selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
177                            RISCVMatInt::InstSeq &Seq) {
178  SDValue SrcReg = CurDAG->getRegister(RISCV::X0, VT);
179  for (const RISCVMatInt::Inst &Inst : Seq) {
180    SDValue SDImm = CurDAG->getTargetConstant(Inst.getImm(), DL, VT);
181    SDNode *Result = nullptr;
182    switch (Inst.getOpndKind()) {
183    case RISCVMatInt::Imm:
184      Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SDImm);
185      break;
186    case RISCVMatInt::RegX0:
187      Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg,
188                                      CurDAG->getRegister(RISCV::X0, VT));
189      break;
190    case RISCVMatInt::RegReg:
191      Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SrcReg);
192      break;
193    case RISCVMatInt::RegImm:
194      Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SDImm);
195      break;
196    }
197
198    // Only the first instruction has X0 as its source.
199    SrcReg = SDValue(Result, 0);
200  }
201
202  return SrcReg;
203}
204
205static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
206                         int64_t Imm, const RISCVSubtarget &Subtarget) {
207  RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Imm, Subtarget);
208
209  // Use a rematerializable pseudo instruction for short sequences if enabled.
210  if (Seq.size() == 2 && UsePseudoMovImm)
211    return SDValue(
212        CurDAG->getMachineNode(RISCV::PseudoMovImm, DL, VT,
213                               CurDAG->getTargetConstant(Imm, DL, VT)),
214        0);
215
216  // See if we can create this constant as (ADD (SLLI X, C), X) where X is at
217  // worst an LUI+ADDIW. This will require an extra register, but avoids a
218  // constant pool.
219  // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
220  // low and high 32 bits are the same and bit 31 and 63 are set.
221  if (Seq.size() > 3) {
222    unsigned ShiftAmt, AddOpc;
223    RISCVMatInt::InstSeq SeqLo =
224        RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);
225    if (!SeqLo.empty() && (SeqLo.size() + 2) < Seq.size()) {
226      SDValue Lo = selectImmSeq(CurDAG, DL, VT, SeqLo);
227
228      SDValue SLLI = SDValue(
229          CurDAG->getMachineNode(RISCV::SLLI, DL, VT, Lo,
230                                 CurDAG->getTargetConstant(ShiftAmt, DL, VT)),
231          0);
232      return SDValue(CurDAG->getMachineNode(AddOpc, DL, VT, Lo, SLLI), 0);
233    }
234  }
235
236  // Otherwise, use the original sequence.
237  return selectImmSeq(CurDAG, DL, VT, Seq);
238}
239
240static SDValue createTuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs,
241                           unsigned NF, RISCVII::VLMUL LMUL) {
242  static const unsigned M1TupleRegClassIDs[] = {
243      RISCV::VRN2M1RegClassID, RISCV::VRN3M1RegClassID, RISCV::VRN4M1RegClassID,
244      RISCV::VRN5M1RegClassID, RISCV::VRN6M1RegClassID, RISCV::VRN7M1RegClassID,
245      RISCV::VRN8M1RegClassID};
246  static const unsigned M2TupleRegClassIDs[] = {RISCV::VRN2M2RegClassID,
247                                                RISCV::VRN3M2RegClassID,
248                                                RISCV::VRN4M2RegClassID};
249
250  assert(Regs.size() >= 2 && Regs.size() <= 8);
251
252  unsigned RegClassID;
253  unsigned SubReg0;
254  switch (LMUL) {
255  default:
256    llvm_unreachable("Invalid LMUL.");
257  case RISCVII::VLMUL::LMUL_F8:
258  case RISCVII::VLMUL::LMUL_F4:
259  case RISCVII::VLMUL::LMUL_F2:
260  case RISCVII::VLMUL::LMUL_1:
261    static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
262                  "Unexpected subreg numbering");
263    SubReg0 = RISCV::sub_vrm1_0;
264    RegClassID = M1TupleRegClassIDs[NF - 2];
265    break;
266  case RISCVII::VLMUL::LMUL_2:
267    static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
268                  "Unexpected subreg numbering");
269    SubReg0 = RISCV::sub_vrm2_0;
270    RegClassID = M2TupleRegClassIDs[NF - 2];
271    break;
272  case RISCVII::VLMUL::LMUL_4:
273    static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
274                  "Unexpected subreg numbering");
275    SubReg0 = RISCV::sub_vrm4_0;
276    RegClassID = RISCV::VRN2M4RegClassID;
277    break;
278  }
279
280  SDLoc DL(Regs[0]);
281  SmallVector<SDValue, 8> Ops;
282
283  Ops.push_back(CurDAG.getTargetConstant(RegClassID, DL, MVT::i32));
284
285  for (unsigned I = 0; I < Regs.size(); ++I) {
286    Ops.push_back(Regs[I]);
287    Ops.push_back(CurDAG.getTargetConstant(SubReg0 + I, DL, MVT::i32));
288  }
289  SDNode *N =
290      CurDAG.getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
291  return SDValue(N, 0);
292}
293
294void RISCVDAGToDAGISel::addVectorLoadStoreOperands(
295    SDNode *Node, unsigned Log2SEW, const SDLoc &DL, unsigned CurOp,
296    bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl<SDValue> &Operands,
297    bool IsLoad, MVT *IndexVT) {
298  SDValue Chain = Node->getOperand(0);
299  SDValue Glue;
300
301  Operands.push_back(Node->getOperand(CurOp++)); // Base pointer.
302
303  if (IsStridedOrIndexed) {
304    Operands.push_back(Node->getOperand(CurOp++)); // Index.
305    if (IndexVT)
306      *IndexVT = Operands.back()->getSimpleValueType(0);
307  }
308
309  if (IsMasked) {
310    // Mask needs to be copied to V0.
311    SDValue Mask = Node->getOperand(CurOp++);
312    Chain = CurDAG->getCopyToReg(Chain, DL, RISCV::V0, Mask, SDValue());
313    Glue = Chain.getValue(1);
314    Operands.push_back(CurDAG->getRegister(RISCV::V0, Mask.getValueType()));
315  }
316  SDValue VL;
317  selectVLOp(Node->getOperand(CurOp++), VL);
318  Operands.push_back(VL);
319
320  MVT XLenVT = Subtarget->getXLenVT();
321  SDValue SEWOp = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
322  Operands.push_back(SEWOp);
323
324  // At the IR layer, all the masked load intrinsics have policy operands,
325  // none of the others do.  All have passthru operands.  For our pseudos,
326  // all loads have policy operands.
327  if (IsLoad) {
328    uint64_t Policy = RISCVII::MASK_AGNOSTIC;
329    if (IsMasked)
330      Policy = Node->getConstantOperandVal(CurOp++);
331    SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT);
332    Operands.push_back(PolicyOp);
333  }
334
335  Operands.push_back(Chain); // Chain.
336  if (Glue)
337    Operands.push_back(Glue);
338}
339
340void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, bool IsMasked,
341                                    bool IsStrided) {
342  SDLoc DL(Node);
343  unsigned NF = Node->getNumValues() - 1;
344  MVT VT = Node->getSimpleValueType(0);
345  unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
346  RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
347
348  unsigned CurOp = 2;
349  SmallVector<SDValue, 8> Operands;
350
351  SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,
352                               Node->op_begin() + CurOp + NF);
353  SDValue Merge = createTuple(*CurDAG, Regs, NF, LMUL);
354  Operands.push_back(Merge);
355  CurOp += NF;
356
357  addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
358                             Operands, /*IsLoad=*/true);
359
360  const RISCV::VLSEGPseudo *P =
361      RISCV::getVLSEGPseudo(NF, IsMasked, IsStrided, /*FF*/ false, Log2SEW,
362                            static_cast<unsigned>(LMUL));
363  MachineSDNode *Load =
364      CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
365
366  if (auto *MemOp = dyn_cast<MemSDNode>(Node))
367    CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
368
369  SDValue SuperReg = SDValue(Load, 0);
370  for (unsigned I = 0; I < NF; ++I) {
371    unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I);
372    ReplaceUses(SDValue(Node, I),
373                CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg));
374  }
375
376  ReplaceUses(SDValue(Node, NF), SDValue(Load, 1));
377  CurDAG->RemoveDeadNode(Node);
378}
379
380void RISCVDAGToDAGISel::selectVLSEGFF(SDNode *Node, bool IsMasked) {
381  SDLoc DL(Node);
382  unsigned NF = Node->getNumValues() - 2; // Do not count VL and Chain.
383  MVT VT = Node->getSimpleValueType(0);
384  MVT XLenVT = Subtarget->getXLenVT();
385  unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
386  RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
387
388  unsigned CurOp = 2;
389  SmallVector<SDValue, 7> Operands;
390
391  SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,
392                               Node->op_begin() + CurOp + NF);
393  SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL);
394  Operands.push_back(MaskedOff);
395  CurOp += NF;
396
397  addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
398                             /*IsStridedOrIndexed*/ false, Operands,
399                             /*IsLoad=*/true);
400
401  const RISCV::VLSEGPseudo *P =
402      RISCV::getVLSEGPseudo(NF, IsMasked, /*Strided*/ false, /*FF*/ true,
403                            Log2SEW, static_cast<unsigned>(LMUL));
404  MachineSDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped,
405                                               XLenVT, MVT::Other, Operands);
406
407  if (auto *MemOp = dyn_cast<MemSDNode>(Node))
408    CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
409
410  SDValue SuperReg = SDValue(Load, 0);
411  for (unsigned I = 0; I < NF; ++I) {
412    unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I);
413    ReplaceUses(SDValue(Node, I),
414                CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg));
415  }
416
417  ReplaceUses(SDValue(Node, NF), SDValue(Load, 1));     // VL
418  ReplaceUses(SDValue(Node, NF + 1), SDValue(Load, 2)); // Chain
419  CurDAG->RemoveDeadNode(Node);
420}
421
422void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, bool IsMasked,
423                                     bool IsOrdered) {
424  SDLoc DL(Node);
425  unsigned NF = Node->getNumValues() - 1;
426  MVT VT = Node->getSimpleValueType(0);
427  unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
428  RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
429
430  unsigned CurOp = 2;
431  SmallVector<SDValue, 8> Operands;
432
433  SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,
434                               Node->op_begin() + CurOp + NF);
435  SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL);
436  Operands.push_back(MaskedOff);
437  CurOp += NF;
438
439  MVT IndexVT;
440  addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
441                             /*IsStridedOrIndexed*/ true, Operands,
442                             /*IsLoad=*/true, &IndexVT);
443
444  assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
445         "Element count mismatch");
446
447  RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
448  unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
449  if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
450    report_fatal_error("The V extension does not support EEW=64 for index "
451                       "values when XLEN=32");
452  }
453  const RISCV::VLXSEGPseudo *P = RISCV::getVLXSEGPseudo(
454      NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
455      static_cast<unsigned>(IndexLMUL));
456  MachineSDNode *Load =
457      CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
458
459  if (auto *MemOp = dyn_cast<MemSDNode>(Node))
460    CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
461
462  SDValue SuperReg = SDValue(Load, 0);
463  for (unsigned I = 0; I < NF; ++I) {
464    unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I);
465    ReplaceUses(SDValue(Node, I),
466                CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg));
467  }
468
469  ReplaceUses(SDValue(Node, NF), SDValue(Load, 1));
470  CurDAG->RemoveDeadNode(Node);
471}
472
473void RISCVDAGToDAGISel::selectVSSEG(SDNode *Node, bool IsMasked,
474                                    bool IsStrided) {
475  SDLoc DL(Node);
476  unsigned NF = Node->getNumOperands() - 4;
477  if (IsStrided)
478    NF--;
479  if (IsMasked)
480    NF--;
481  MVT VT = Node->getOperand(2)->getSimpleValueType(0);
482  unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
483  RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
484  SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF);
485  SDValue StoreVal = createTuple(*CurDAG, Regs, NF, LMUL);
486
487  SmallVector<SDValue, 8> Operands;
488  Operands.push_back(StoreVal);
489  unsigned CurOp = 2 + NF;
490
491  addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
492                             Operands);
493
494  const RISCV::VSSEGPseudo *P = RISCV::getVSSEGPseudo(
495      NF, IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));
496  MachineSDNode *Store =
497      CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);
498
499  if (auto *MemOp = dyn_cast<MemSDNode>(Node))
500    CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
501
502  ReplaceNode(Node, Store);
503}
504
505void RISCVDAGToDAGISel::selectVSXSEG(SDNode *Node, bool IsMasked,
506                                     bool IsOrdered) {
507  SDLoc DL(Node);
508  unsigned NF = Node->getNumOperands() - 5;
509  if (IsMasked)
510    --NF;
511  MVT VT = Node->getOperand(2)->getSimpleValueType(0);
512  unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
513  RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
514  SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF);
515  SDValue StoreVal = createTuple(*CurDAG, Regs, NF, LMUL);
516
517  SmallVector<SDValue, 8> Operands;
518  Operands.push_back(StoreVal);
519  unsigned CurOp = 2 + NF;
520
521  MVT IndexVT;
522  addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
523                             /*IsStridedOrIndexed*/ true, Operands,
524                             /*IsLoad=*/false, &IndexVT);
525
526  assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
527         "Element count mismatch");
528
529  RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
530  unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
531  if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
532    report_fatal_error("The V extension does not support EEW=64 for index "
533                       "values when XLEN=32");
534  }
535  const RISCV::VSXSEGPseudo *P = RISCV::getVSXSEGPseudo(
536      NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
537      static_cast<unsigned>(IndexLMUL));
538  MachineSDNode *Store =
539      CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);
540
541  if (auto *MemOp = dyn_cast<MemSDNode>(Node))
542    CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
543
544  ReplaceNode(Node, Store);
545}
546
547void RISCVDAGToDAGISel::selectVSETVLI(SDNode *Node) {
548  if (!Subtarget->hasVInstructions())
549    return;
550
551  assert(Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Unexpected opcode");
552
553  SDLoc DL(Node);
554  MVT XLenVT = Subtarget->getXLenVT();
555
556  unsigned IntNo = Node->getConstantOperandVal(0);
557
558  assert((IntNo == Intrinsic::riscv_vsetvli ||
559          IntNo == Intrinsic::riscv_vsetvlimax) &&
560         "Unexpected vsetvli intrinsic");
561
562  bool VLMax = IntNo == Intrinsic::riscv_vsetvlimax;
563  unsigned Offset = (VLMax ? 1 : 2);
564
565  assert(Node->getNumOperands() == Offset + 2 &&
566         "Unexpected number of operands");
567
568  unsigned SEW =
569      RISCVVType::decodeVSEW(Node->getConstantOperandVal(Offset) & 0x7);
570  RISCVII::VLMUL VLMul = static_cast<RISCVII::VLMUL>(
571      Node->getConstantOperandVal(Offset + 1) & 0x7);
572
573  unsigned VTypeI = RISCVVType::encodeVTYPE(VLMul, SEW, /*TailAgnostic*/ true,
574                                            /*MaskAgnostic*/ true);
575  SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT);
576
577  SDValue VLOperand;
578  unsigned Opcode = RISCV::PseudoVSETVLI;
579  if (auto *C = dyn_cast<ConstantSDNode>(Node->getOperand(1))) {
580    const unsigned VLEN = Subtarget->getRealMinVLen();
581    if (VLEN == Subtarget->getRealMaxVLen())
582      if (VLEN / RISCVVType::getSEWLMULRatio(SEW, VLMul) == C->getZExtValue())
583        VLMax = true;
584  }
585  if (VLMax || isAllOnesConstant(Node->getOperand(1))) {
586    VLOperand = CurDAG->getRegister(RISCV::X0, XLenVT);
587    Opcode = RISCV::PseudoVSETVLIX0;
588  } else {
589    VLOperand = Node->getOperand(1);
590
591    if (auto *C = dyn_cast<ConstantSDNode>(VLOperand)) {
592      uint64_t AVL = C->getZExtValue();
593      if (isUInt<5>(AVL)) {
594        SDValue VLImm = CurDAG->getTargetConstant(AVL, DL, XLenVT);
595        ReplaceNode(Node, CurDAG->getMachineNode(RISCV::PseudoVSETIVLI, DL,
596                                                 XLenVT, VLImm, VTypeIOp));
597        return;
598      }
599    }
600  }
601
602  ReplaceNode(Node,
603              CurDAG->getMachineNode(Opcode, DL, XLenVT, VLOperand, VTypeIOp));
604}
605
606bool RISCVDAGToDAGISel::tryShrinkShlLogicImm(SDNode *Node) {
607  MVT VT = Node->getSimpleValueType(0);
608  unsigned Opcode = Node->getOpcode();
609  assert((Opcode == ISD::AND || Opcode == ISD::OR || Opcode == ISD::XOR) &&
610         "Unexpected opcode");
611  SDLoc DL(Node);
612
613  // For operations of the form (x << C1) op C2, check if we can use
614  // ANDI/ORI/XORI by transforming it into (x op (C2>>C1)) << C1.
615  SDValue N0 = Node->getOperand(0);
616  SDValue N1 = Node->getOperand(1);
617
618  ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N1);
619  if (!Cst)
620    return false;
621
622  int64_t Val = Cst->getSExtValue();
623
624  // Check if immediate can already use ANDI/ORI/XORI.
625  if (isInt<12>(Val))
626    return false;
627
628  SDValue Shift = N0;
629
630  // If Val is simm32 and we have a sext_inreg from i32, then the binop
631  // produces at least 33 sign bits. We can peek through the sext_inreg and use
632  // a SLLIW at the end.
633  bool SignExt = false;
634  if (isInt<32>(Val) && N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
635      N0.hasOneUse() && cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i32) {
636    SignExt = true;
637    Shift = N0.getOperand(0);
638  }
639
640  if (Shift.getOpcode() != ISD::SHL || !Shift.hasOneUse())
641    return false;
642
643  ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
644  if (!ShlCst)
645    return false;
646
647  uint64_t ShAmt = ShlCst->getZExtValue();
648
649  // Make sure that we don't change the operation by removing bits.
650  // This only matters for OR and XOR, AND is unaffected.
651  uint64_t RemovedBitsMask = maskTrailingOnes<uint64_t>(ShAmt);
652  if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0)
653    return false;
654
655  int64_t ShiftedVal = Val >> ShAmt;
656  if (!isInt<12>(ShiftedVal))
657    return false;
658
659  // If we peeked through a sext_inreg, make sure the shift is valid for SLLIW.
660  if (SignExt && ShAmt >= 32)
661    return false;
662
663  // Ok, we can reorder to get a smaller immediate.
664  unsigned BinOpc;
665  switch (Opcode) {
666  default: llvm_unreachable("Unexpected opcode");
667  case ISD::AND: BinOpc = RISCV::ANDI; break;
668  case ISD::OR:  BinOpc = RISCV::ORI;  break;
669  case ISD::XOR: BinOpc = RISCV::XORI; break;
670  }
671
672  unsigned ShOpc = SignExt ? RISCV::SLLIW : RISCV::SLLI;
673
674  SDNode *BinOp =
675      CurDAG->getMachineNode(BinOpc, DL, VT, Shift.getOperand(0),
676                             CurDAG->getTargetConstant(ShiftedVal, DL, VT));
677  SDNode *SLLI =
678      CurDAG->getMachineNode(ShOpc, DL, VT, SDValue(BinOp, 0),
679                             CurDAG->getTargetConstant(ShAmt, DL, VT));
680  ReplaceNode(Node, SLLI);
681  return true;
682}
683
684bool RISCVDAGToDAGISel::trySignedBitfieldExtract(SDNode *Node) {
685  // Only supported with XTHeadBb at the moment.
686  if (!Subtarget->hasVendorXTHeadBb())
687    return false;
688
689  auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
690  if (!N1C)
691    return false;
692
693  SDValue N0 = Node->getOperand(0);
694  if (!N0.hasOneUse())
695    return false;
696
697  auto BitfieldExtract = [&](SDValue N0, unsigned Msb, unsigned Lsb, SDLoc DL,
698                             MVT VT) {
699    return CurDAG->getMachineNode(RISCV::TH_EXT, DL, VT, N0.getOperand(0),
700                                  CurDAG->getTargetConstant(Msb, DL, VT),
701                                  CurDAG->getTargetConstant(Lsb, DL, VT));
702  };
703
704  SDLoc DL(Node);
705  MVT VT = Node->getSimpleValueType(0);
706  const unsigned RightShAmt = N1C->getZExtValue();
707
708  // Transform (sra (shl X, C1) C2) with C1 < C2
709  //        -> (TH.EXT X, msb, lsb)
710  if (N0.getOpcode() == ISD::SHL) {
711    auto *N01C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
712    if (!N01C)
713      return false;
714
715    const unsigned LeftShAmt = N01C->getZExtValue();
716    // Make sure that this is a bitfield extraction (i.e., the shift-right
717    // amount can not be less than the left-shift).
718    if (LeftShAmt > RightShAmt)
719      return false;
720
721    const unsigned MsbPlusOne = VT.getSizeInBits() - LeftShAmt;
722    const unsigned Msb = MsbPlusOne - 1;
723    const unsigned Lsb = RightShAmt - LeftShAmt;
724
725    SDNode *TH_EXT = BitfieldExtract(N0, Msb, Lsb, DL, VT);
726    ReplaceNode(Node, TH_EXT);
727    return true;
728  }
729
730  // Transform (sra (sext_inreg X, _), C) ->
731  //           (TH.EXT X, msb, lsb)
732  if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
733    unsigned ExtSize =
734        cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
735
736    // ExtSize of 32 should use sraiw via tablegen pattern.
737    if (ExtSize == 32)
738      return false;
739
740    const unsigned Msb = ExtSize - 1;
741    const unsigned Lsb = RightShAmt;
742
743    SDNode *TH_EXT = BitfieldExtract(N0, Msb, Lsb, DL, VT);
744    ReplaceNode(Node, TH_EXT);
745    return true;
746  }
747
748  return false;
749}
750
751bool RISCVDAGToDAGISel::tryIndexedLoad(SDNode *Node) {
752  // Target does not support indexed loads.
753  if (!Subtarget->hasVendorXTHeadMemIdx())
754    return false;
755
756  LoadSDNode *Ld = cast<LoadSDNode>(Node);
757  ISD::MemIndexedMode AM = Ld->getAddressingMode();
758  if (AM == ISD::UNINDEXED)
759    return false;
760
761  const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Ld->getOffset());
762  if (!C)
763    return false;
764
765  EVT LoadVT = Ld->getMemoryVT();
766  assert((AM == ISD::PRE_INC || AM == ISD::POST_INC) &&
767         "Unexpected addressing mode");
768  bool IsPre = AM == ISD::PRE_INC;
769  bool IsPost = AM == ISD::POST_INC;
770  int64_t Offset = C->getSExtValue();
771
772  // The constants that can be encoded in the THeadMemIdx instructions
773  // are of the form (sign_extend(imm5) << imm2).
774  int64_t Shift;
775  for (Shift = 0; Shift < 4; Shift++)
776    if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0))
777      break;
778
779  // Constant cannot be encoded.
780  if (Shift == 4)
781    return false;
782
783  bool IsZExt = (Ld->getExtensionType() == ISD::ZEXTLOAD);
784  unsigned Opcode;
785  if (LoadVT == MVT::i8 && IsPre)
786    Opcode = IsZExt ? RISCV::TH_LBUIB : RISCV::TH_LBIB;
787  else if (LoadVT == MVT::i8 && IsPost)
788    Opcode = IsZExt ? RISCV::TH_LBUIA : RISCV::TH_LBIA;
789  else if (LoadVT == MVT::i16 && IsPre)
790    Opcode = IsZExt ? RISCV::TH_LHUIB : RISCV::TH_LHIB;
791  else if (LoadVT == MVT::i16 && IsPost)
792    Opcode = IsZExt ? RISCV::TH_LHUIA : RISCV::TH_LHIA;
793  else if (LoadVT == MVT::i32 && IsPre)
794    Opcode = IsZExt ? RISCV::TH_LWUIB : RISCV::TH_LWIB;
795  else if (LoadVT == MVT::i32 && IsPost)
796    Opcode = IsZExt ? RISCV::TH_LWUIA : RISCV::TH_LWIA;
797  else if (LoadVT == MVT::i64 && IsPre)
798    Opcode = RISCV::TH_LDIB;
799  else if (LoadVT == MVT::i64 && IsPost)
800    Opcode = RISCV::TH_LDIA;
801  else
802    return false;
803
804  EVT Ty = Ld->getOffset().getValueType();
805  SDValue Ops[] = {Ld->getBasePtr(),
806                   CurDAG->getTargetConstant(Offset >> Shift, SDLoc(Node), Ty),
807                   CurDAG->getTargetConstant(Shift, SDLoc(Node), Ty),
808                   Ld->getChain()};
809  SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(Node), Ld->getValueType(0),
810                                       Ld->getValueType(1), MVT::Other, Ops);
811
812  MachineMemOperand *MemOp = cast<MemSDNode>(Node)->getMemOperand();
813  CurDAG->setNodeMemRefs(cast<MachineSDNode>(New), {MemOp});
814
815  ReplaceNode(Node, New);
816
817  return true;
818}
819
820void RISCVDAGToDAGISel::Select(SDNode *Node) {
821  // If we have a custom node, we have already selected.
822  if (Node->isMachineOpcode()) {
823    LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n");
824    Node->setNodeId(-1);
825    return;
826  }
827
828  // Instruction Selection not handled by the auto-generated tablegen selection
829  // should be handled here.
830  unsigned Opcode = Node->getOpcode();
831  MVT XLenVT = Subtarget->getXLenVT();
832  SDLoc DL(Node);
833  MVT VT = Node->getSimpleValueType(0);
834
835  bool HasBitTest = Subtarget->hasStdExtZbs() || Subtarget->hasVendorXTHeadBs();
836
837  switch (Opcode) {
838  case ISD::Constant: {
839    assert((VT == Subtarget->getXLenVT() || VT == MVT::i32) && "Unexpected VT");
840    auto *ConstNode = cast<ConstantSDNode>(Node);
841    if (ConstNode->isZero()) {
842      SDValue New =
843          CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, RISCV::X0, VT);
844      ReplaceNode(Node, New.getNode());
845      return;
846    }
847    int64_t Imm = ConstNode->getSExtValue();
848    // If the upper XLen-16 bits are not used, try to convert this to a simm12
849    // by sign extending bit 15.
850    if (isUInt<16>(Imm) && isInt<12>(SignExtend64<16>(Imm)) &&
851        hasAllHUsers(Node))
852      Imm = SignExtend64<16>(Imm);
853    // If the upper 32-bits are not used try to convert this into a simm32 by
854    // sign extending bit 32.
855    if (!isInt<32>(Imm) && isUInt<32>(Imm) && hasAllWUsers(Node))
856      Imm = SignExtend64<32>(Imm);
857
858    ReplaceNode(Node, selectImm(CurDAG, DL, VT, Imm, *Subtarget).getNode());
859    return;
860  }
861  case ISD::ConstantFP: {
862    const APFloat &APF = cast<ConstantFPSDNode>(Node)->getValueAPF();
863    auto [FPImm, NeedsFNeg] =
864        static_cast<const RISCVTargetLowering *>(TLI)->getLegalZfaFPImm(APF,
865                                                                        VT);
866    if (FPImm >= 0) {
867      unsigned Opc;
868      unsigned FNegOpc;
869      switch (VT.SimpleTy) {
870      default:
871        llvm_unreachable("Unexpected size");
872      case MVT::f16:
873        Opc = RISCV::FLI_H;
874        FNegOpc = RISCV::FSGNJN_H;
875        break;
876      case MVT::f32:
877        Opc = RISCV::FLI_S;
878        FNegOpc = RISCV::FSGNJN_S;
879        break;
880      case MVT::f64:
881        Opc = RISCV::FLI_D;
882        FNegOpc = RISCV::FSGNJN_D;
883        break;
884      }
885      SDNode *Res = CurDAG->getMachineNode(
886          Opc, DL, VT, CurDAG->getTargetConstant(FPImm, DL, XLenVT));
887      if (NeedsFNeg)
888        Res = CurDAG->getMachineNode(FNegOpc, DL, VT, SDValue(Res, 0),
889                                     SDValue(Res, 0));
890
891      ReplaceNode(Node, Res);
892      return;
893    }
894
895    bool NegZeroF64 = APF.isNegZero() && VT == MVT::f64;
896    SDValue Imm;
897    // For +0.0 or f64 -0.0 we need to start from X0. For all others, we will
898    // create an integer immediate.
899    if (APF.isPosZero() || NegZeroF64)
900      Imm = CurDAG->getRegister(RISCV::X0, XLenVT);
901    else
902      Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(),
903                      *Subtarget);
904
905    bool HasZdinx = Subtarget->hasStdExtZdinx();
906    bool Is64Bit = Subtarget->is64Bit();
907    unsigned Opc;
908    switch (VT.SimpleTy) {
909    default:
910      llvm_unreachable("Unexpected size");
911    case MVT::bf16:
912      assert(Subtarget->hasStdExtZfbfmin());
913      Opc = RISCV::FMV_H_X;
914      break;
915    case MVT::f16:
916      Opc = Subtarget->hasStdExtZhinxmin() ? RISCV::COPY : RISCV::FMV_H_X;
917      break;
918    case MVT::f32:
919      Opc = Subtarget->hasStdExtZfinx() ? RISCV::COPY : RISCV::FMV_W_X;
920      break;
921    case MVT::f64:
922      // For RV32, we can't move from a GPR, we need to convert instead. This
923      // should only happen for +0.0 and -0.0.
924      assert((Subtarget->is64Bit() || APF.isZero()) && "Unexpected constant");
925      if (Is64Bit)
926        Opc = HasZdinx ? RISCV::COPY : RISCV::FMV_D_X;
927      else
928        Opc = HasZdinx ? RISCV::FCVT_D_W_IN32X : RISCV::FCVT_D_W;
929      break;
930    }
931
932    SDNode *Res;
933    if (Opc == RISCV::FCVT_D_W_IN32X || Opc == RISCV::FCVT_D_W)
934      Res = CurDAG->getMachineNode(
935          Opc, DL, VT, Imm,
936          CurDAG->getTargetConstant(RISCVFPRndMode::RNE, DL, XLenVT));
937    else
938      Res = CurDAG->getMachineNode(Opc, DL, VT, Imm);
939
940    // For f64 -0.0, we need to insert a fneg.d idiom.
941    if (NegZeroF64) {
942      Opc = RISCV::FSGNJN_D;
943      if (HasZdinx)
944        Opc = Is64Bit ? RISCV::FSGNJN_D_INX : RISCV::FSGNJN_D_IN32X;
945      Res =
946          CurDAG->getMachineNode(Opc, DL, VT, SDValue(Res, 0), SDValue(Res, 0));
947    }
948
949    ReplaceNode(Node, Res);
950    return;
951  }
952  case RISCVISD::SplitF64: {
953    if (!Subtarget->hasStdExtZfa())
954      break;
955    assert(Subtarget->hasStdExtD() && !Subtarget->is64Bit() &&
956           "Unexpected subtarget");
957
958    // With Zfa, lower to fmv.x.w and fmvh.x.d.
959    if (!SDValue(Node, 0).use_empty()) {
960      SDNode *Lo = CurDAG->getMachineNode(RISCV::FMV_X_W_FPR64, DL, VT,
961                                          Node->getOperand(0));
962      ReplaceUses(SDValue(Node, 0), SDValue(Lo, 0));
963    }
964    if (!SDValue(Node, 1).use_empty()) {
965      SDNode *Hi = CurDAG->getMachineNode(RISCV::FMVH_X_D, DL, VT,
966                                          Node->getOperand(0));
967      ReplaceUses(SDValue(Node, 1), SDValue(Hi, 0));
968    }
969
970    CurDAG->RemoveDeadNode(Node);
971    return;
972  }
973  case ISD::SHL: {
974    auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
975    if (!N1C)
976      break;
977    SDValue N0 = Node->getOperand(0);
978    if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
979        !isa<ConstantSDNode>(N0.getOperand(1)))
980      break;
981    unsigned ShAmt = N1C->getZExtValue();
982    uint64_t Mask = N0.getConstantOperandVal(1);
983
984    // Optimize (shl (and X, C2), C) -> (slli (srliw X, C3), C3+C) where C2 has
985    // 32 leading zeros and C3 trailing zeros.
986    if (ShAmt <= 32 && isShiftedMask_64(Mask)) {
987      unsigned XLen = Subtarget->getXLen();
988      unsigned LeadingZeros = XLen - llvm::bit_width(Mask);
989      unsigned TrailingZeros = llvm::countr_zero(Mask);
990      if (TrailingZeros > 0 && LeadingZeros == 32) {
991        SDNode *SRLIW = CurDAG->getMachineNode(
992            RISCV::SRLIW, DL, VT, N0->getOperand(0),
993            CurDAG->getTargetConstant(TrailingZeros, DL, VT));
994        SDNode *SLLI = CurDAG->getMachineNode(
995            RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
996            CurDAG->getTargetConstant(TrailingZeros + ShAmt, DL, VT));
997        ReplaceNode(Node, SLLI);
998        return;
999      }
1000    }
1001    break;
1002  }
1003  case ISD::SRL: {
1004    auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1005    if (!N1C)
1006      break;
1007    SDValue N0 = Node->getOperand(0);
1008    if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1)))
1009      break;
1010    unsigned ShAmt = N1C->getZExtValue();
1011    uint64_t Mask = N0.getConstantOperandVal(1);
1012
1013    // Optimize (srl (and X, C2), C) -> (slli (srliw X, C3), C3-C) where C2 has
1014    // 32 leading zeros and C3 trailing zeros.
1015    if (isShiftedMask_64(Mask) && N0.hasOneUse()) {
1016      unsigned XLen = Subtarget->getXLen();
1017      unsigned LeadingZeros = XLen - llvm::bit_width(Mask);
1018      unsigned TrailingZeros = llvm::countr_zero(Mask);
1019      if (LeadingZeros == 32 && TrailingZeros > ShAmt) {
1020        SDNode *SRLIW = CurDAG->getMachineNode(
1021            RISCV::SRLIW, DL, VT, N0->getOperand(0),
1022            CurDAG->getTargetConstant(TrailingZeros, DL, VT));
1023        SDNode *SLLI = CurDAG->getMachineNode(
1024            RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1025            CurDAG->getTargetConstant(TrailingZeros - ShAmt, DL, VT));
1026        ReplaceNode(Node, SLLI);
1027        return;
1028      }
1029    }
1030
1031    // Optimize (srl (and X, C2), C) ->
1032    //          (srli (slli X, (XLen-C3), (XLen-C3) + C)
1033    // Where C2 is a mask with C3 trailing ones.
1034    // Taking into account that the C2 may have had lower bits unset by
1035    // SimplifyDemandedBits. This avoids materializing the C2 immediate.
1036    // This pattern occurs when type legalizing right shifts for types with
1037    // less than XLen bits.
1038    Mask |= maskTrailingOnes<uint64_t>(ShAmt);
1039    if (!isMask_64(Mask))
1040      break;
1041    unsigned TrailingOnes = llvm::countr_one(Mask);
1042    if (ShAmt >= TrailingOnes)
1043      break;
1044    // If the mask has 32 trailing ones, use SRLI on RV32 or SRLIW on RV64.
1045    if (TrailingOnes == 32) {
1046      SDNode *SRLI = CurDAG->getMachineNode(
1047          Subtarget->is64Bit() ? RISCV::SRLIW : RISCV::SRLI, DL, VT,
1048          N0->getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT));
1049      ReplaceNode(Node, SRLI);
1050      return;
1051    }
1052
1053    // Only do the remaining transforms if the AND has one use.
1054    if (!N0.hasOneUse())
1055      break;
1056
1057    // If C2 is (1 << ShAmt) use bexti or th.tst if possible.
1058    if (HasBitTest && ShAmt + 1 == TrailingOnes) {
1059      SDNode *BEXTI = CurDAG->getMachineNode(
1060          Subtarget->hasStdExtZbs() ? RISCV::BEXTI : RISCV::TH_TST, DL, VT,
1061          N0->getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT));
1062      ReplaceNode(Node, BEXTI);
1063      return;
1064    }
1065
1066    unsigned LShAmt = Subtarget->getXLen() - TrailingOnes;
1067    SDNode *SLLI =
1068        CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0),
1069                               CurDAG->getTargetConstant(LShAmt, DL, VT));
1070    SDNode *SRLI = CurDAG->getMachineNode(
1071        RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1072        CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
1073    ReplaceNode(Node, SRLI);
1074    return;
1075  }
1076  case ISD::SRA: {
1077    if (trySignedBitfieldExtract(Node))
1078      return;
1079
1080    // Optimize (sra (sext_inreg X, i16), C) ->
1081    //          (srai (slli X, (XLen-16), (XLen-16) + C)
1082    // And      (sra (sext_inreg X, i8), C) ->
1083    //          (srai (slli X, (XLen-8), (XLen-8) + C)
1084    // This can occur when Zbb is enabled, which makes sext_inreg i16/i8 legal.
1085    // This transform matches the code we get without Zbb. The shifts are more
1086    // compressible, and this can help expose CSE opportunities in the sdiv by
1087    // constant optimization.
1088    auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1089    if (!N1C)
1090      break;
1091    SDValue N0 = Node->getOperand(0);
1092    if (N0.getOpcode() != ISD::SIGN_EXTEND_INREG || !N0.hasOneUse())
1093      break;
1094    unsigned ShAmt = N1C->getZExtValue();
1095    unsigned ExtSize =
1096        cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
1097    // ExtSize of 32 should use sraiw via tablegen pattern.
1098    if (ExtSize >= 32 || ShAmt >= ExtSize)
1099      break;
1100    unsigned LShAmt = Subtarget->getXLen() - ExtSize;
1101    SDNode *SLLI =
1102        CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0),
1103                               CurDAG->getTargetConstant(LShAmt, DL, VT));
1104    SDNode *SRAI = CurDAG->getMachineNode(
1105        RISCV::SRAI, DL, VT, SDValue(SLLI, 0),
1106        CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
1107    ReplaceNode(Node, SRAI);
1108    return;
1109  }
1110  case ISD::OR:
1111  case ISD::XOR:
1112    if (tryShrinkShlLogicImm(Node))
1113      return;
1114
1115    break;
1116  case ISD::AND: {
1117    auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1118    if (!N1C)
1119      break;
1120    uint64_t C1 = N1C->getZExtValue();
1121    const bool isC1Mask = isMask_64(C1);
1122    const bool isC1ANDI = isInt<12>(C1);
1123
1124    SDValue N0 = Node->getOperand(0);
1125
1126    auto tryUnsignedBitfieldExtract = [&](SDNode *Node, SDLoc DL, MVT VT,
1127                                          SDValue X, unsigned Msb,
1128                                          unsigned Lsb) {
1129      if (!Subtarget->hasVendorXTHeadBb())
1130        return false;
1131
1132      SDNode *TH_EXTU = CurDAG->getMachineNode(
1133          RISCV::TH_EXTU, DL, VT, X, CurDAG->getTargetConstant(Msb, DL, VT),
1134          CurDAG->getTargetConstant(Lsb, DL, VT));
1135      ReplaceNode(Node, TH_EXTU);
1136      return true;
1137    };
1138
1139    bool LeftShift = N0.getOpcode() == ISD::SHL;
1140    if (LeftShift || N0.getOpcode() == ISD::SRL) {
1141      auto *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
1142      if (!C)
1143        break;
1144      unsigned C2 = C->getZExtValue();
1145      unsigned XLen = Subtarget->getXLen();
1146      assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!");
1147
1148      // Keep track of whether this is a c.andi. If we can't use c.andi, the
1149      // shift pair might offer more compression opportunities.
1150      // TODO: We could check for C extension here, but we don't have many lit
1151      // tests with the C extension enabled so not checking gets better
1152      // coverage.
1153      // TODO: What if ANDI faster than shift?
1154      bool IsCANDI = isInt<6>(N1C->getSExtValue());
1155
1156      // Clear irrelevant bits in the mask.
1157      if (LeftShift)
1158        C1 &= maskTrailingZeros<uint64_t>(C2);
1159      else
1160        C1 &= maskTrailingOnes<uint64_t>(XLen - C2);
1161
1162      // Some transforms should only be done if the shift has a single use or
1163      // the AND would become (srli (slli X, 32), 32)
1164      bool OneUseOrZExtW = N0.hasOneUse() || C1 == UINT64_C(0xFFFFFFFF);
1165
1166      SDValue X = N0.getOperand(0);
1167
1168      // Turn (and (srl x, c2) c1) -> (srli (slli x, c3-c2), c3) if c1 is a mask
1169      // with c3 leading zeros.
1170      if (!LeftShift && isC1Mask) {
1171        unsigned Leading = XLen - llvm::bit_width(C1);
1172        if (C2 < Leading) {
1173          // If the number of leading zeros is C2+32 this can be SRLIW.
1174          if (C2 + 32 == Leading) {
1175            SDNode *SRLIW = CurDAG->getMachineNode(
1176                RISCV::SRLIW, DL, VT, X, CurDAG->getTargetConstant(C2, DL, VT));
1177            ReplaceNode(Node, SRLIW);
1178            return;
1179          }
1180
1181          // (and (srl (sexti32 Y), c2), c1) -> (srliw (sraiw Y, 31), c3 - 32)
1182          // if c1 is a mask with c3 leading zeros and c2 >= 32 and c3-c2==1.
1183          //
1184          // This pattern occurs when (i32 (srl (sra 31), c3 - 32)) is type
1185          // legalized and goes through DAG combine.
1186          if (C2 >= 32 && (Leading - C2) == 1 && N0.hasOneUse() &&
1187              X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1188              cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32) {
1189            SDNode *SRAIW =
1190                CurDAG->getMachineNode(RISCV::SRAIW, DL, VT, X.getOperand(0),
1191                                       CurDAG->getTargetConstant(31, DL, VT));
1192            SDNode *SRLIW = CurDAG->getMachineNode(
1193                RISCV::SRLIW, DL, VT, SDValue(SRAIW, 0),
1194                CurDAG->getTargetConstant(Leading - 32, DL, VT));
1195            ReplaceNode(Node, SRLIW);
1196            return;
1197          }
1198
1199          // Try to use an unsigned bitfield extract (e.g., th.extu) if
1200          // available.
1201          // Transform (and (srl x, C2), C1)
1202          //        -> (<bfextract> x, msb, lsb)
1203          //
1204          // Make sure to keep this below the SRLIW cases, as we always want to
1205          // prefer the more common instruction.
1206          const unsigned Msb = llvm::bit_width(C1) + C2 - 1;
1207          const unsigned Lsb = C2;
1208          if (tryUnsignedBitfieldExtract(Node, DL, VT, X, Msb, Lsb))
1209            return;
1210
1211          // (srli (slli x, c3-c2), c3).
1212          // Skip if we could use (zext.w (sraiw X, C2)).
1213          bool Skip = Subtarget->hasStdExtZba() && Leading == 32 &&
1214                      X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1215                      cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32;
1216          // Also Skip if we can use bexti or th.tst.
1217          Skip |= HasBitTest && Leading == XLen - 1;
1218          if (OneUseOrZExtW && !Skip) {
1219            SDNode *SLLI = CurDAG->getMachineNode(
1220                RISCV::SLLI, DL, VT, X,
1221                CurDAG->getTargetConstant(Leading - C2, DL, VT));
1222            SDNode *SRLI = CurDAG->getMachineNode(
1223                RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1224                CurDAG->getTargetConstant(Leading, DL, VT));
1225            ReplaceNode(Node, SRLI);
1226            return;
1227          }
1228        }
1229      }
1230
1231      // Turn (and (shl x, c2), c1) -> (srli (slli c2+c3), c3) if c1 is a mask
1232      // shifted by c2 bits with c3 leading zeros.
1233      if (LeftShift && isShiftedMask_64(C1)) {
1234        unsigned Leading = XLen - llvm::bit_width(C1);
1235
1236        if (C2 + Leading < XLen &&
1237            C1 == (maskTrailingOnes<uint64_t>(XLen - (C2 + Leading)) << C2)) {
1238          // Use slli.uw when possible.
1239          if ((XLen - (C2 + Leading)) == 32 && Subtarget->hasStdExtZba()) {
1240            SDNode *SLLI_UW =
1241                CurDAG->getMachineNode(RISCV::SLLI_UW, DL, VT, X,
1242                                       CurDAG->getTargetConstant(C2, DL, VT));
1243            ReplaceNode(Node, SLLI_UW);
1244            return;
1245          }
1246
1247          // (srli (slli c2+c3), c3)
1248          if (OneUseOrZExtW && !IsCANDI) {
1249            SDNode *SLLI = CurDAG->getMachineNode(
1250                RISCV::SLLI, DL, VT, X,
1251                CurDAG->getTargetConstant(C2 + Leading, DL, VT));
1252            SDNode *SRLI = CurDAG->getMachineNode(
1253                RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1254                CurDAG->getTargetConstant(Leading, DL, VT));
1255            ReplaceNode(Node, SRLI);
1256            return;
1257          }
1258        }
1259      }
1260
1261      // Turn (and (shr x, c2), c1) -> (slli (srli x, c2+c3), c3) if c1 is a
1262      // shifted mask with c2 leading zeros and c3 trailing zeros.
1263      if (!LeftShift && isShiftedMask_64(C1)) {
1264        unsigned Leading = XLen - llvm::bit_width(C1);
1265        unsigned Trailing = llvm::countr_zero(C1);
1266        if (Leading == C2 && C2 + Trailing < XLen && OneUseOrZExtW &&
1267            !IsCANDI) {
1268          unsigned SrliOpc = RISCV::SRLI;
1269          // If the input is zexti32 we should use SRLIW.
1270          if (X.getOpcode() == ISD::AND &&
1271              isa<ConstantSDNode>(X.getOperand(1)) &&
1272              X.getConstantOperandVal(1) == UINT64_C(0xFFFFFFFF)) {
1273            SrliOpc = RISCV::SRLIW;
1274            X = X.getOperand(0);
1275          }
1276          SDNode *SRLI = CurDAG->getMachineNode(
1277              SrliOpc, DL, VT, X,
1278              CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1279          SDNode *SLLI = CurDAG->getMachineNode(
1280              RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1281              CurDAG->getTargetConstant(Trailing, DL, VT));
1282          ReplaceNode(Node, SLLI);
1283          return;
1284        }
1285        // If the leading zero count is C2+32, we can use SRLIW instead of SRLI.
1286        if (Leading > 32 && (Leading - 32) == C2 && C2 + Trailing < 32 &&
1287            OneUseOrZExtW && !IsCANDI) {
1288          SDNode *SRLIW = CurDAG->getMachineNode(
1289              RISCV::SRLIW, DL, VT, X,
1290              CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1291          SDNode *SLLI = CurDAG->getMachineNode(
1292              RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1293              CurDAG->getTargetConstant(Trailing, DL, VT));
1294          ReplaceNode(Node, SLLI);
1295          return;
1296        }
1297      }
1298
1299      // Turn (and (shl x, c2), c1) -> (slli (srli x, c3-c2), c3) if c1 is a
1300      // shifted mask with no leading zeros and c3 trailing zeros.
1301      if (LeftShift && isShiftedMask_64(C1)) {
1302        unsigned Leading = XLen - llvm::bit_width(C1);
1303        unsigned Trailing = llvm::countr_zero(C1);
1304        if (Leading == 0 && C2 < Trailing && OneUseOrZExtW && !IsCANDI) {
1305          SDNode *SRLI = CurDAG->getMachineNode(
1306              RISCV::SRLI, DL, VT, X,
1307              CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1308          SDNode *SLLI = CurDAG->getMachineNode(
1309              RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1310              CurDAG->getTargetConstant(Trailing, DL, VT));
1311          ReplaceNode(Node, SLLI);
1312          return;
1313        }
1314        // If we have (32-C2) leading zeros, we can use SRLIW instead of SRLI.
1315        if (C2 < Trailing && Leading + C2 == 32 && OneUseOrZExtW && !IsCANDI) {
1316          SDNode *SRLIW = CurDAG->getMachineNode(
1317              RISCV::SRLIW, DL, VT, X,
1318              CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1319          SDNode *SLLI = CurDAG->getMachineNode(
1320              RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1321              CurDAG->getTargetConstant(Trailing, DL, VT));
1322          ReplaceNode(Node, SLLI);
1323          return;
1324        }
1325      }
1326    }
1327
1328    // If C1 masks off the upper bits only (but can't be formed as an
1329    // ANDI), use an unsigned bitfield extract (e.g., th.extu), if
1330    // available.
1331    // Transform (and x, C1)
1332    //        -> (<bfextract> x, msb, lsb)
1333    if (isC1Mask && !isC1ANDI) {
1334      const unsigned Msb = llvm::bit_width(C1) - 1;
1335      if (tryUnsignedBitfieldExtract(Node, DL, VT, N0, Msb, 0))
1336        return;
1337    }
1338
1339    if (tryShrinkShlLogicImm(Node))
1340      return;
1341
1342    break;
1343  }
1344  case ISD::MUL: {
1345    // Special case for calculating (mul (and X, C2), C1) where the full product
1346    // fits in XLen bits. We can shift X left by the number of leading zeros in
1347    // C2 and shift C1 left by XLen-lzcnt(C2). This will ensure the final
1348    // product has XLen trailing zeros, putting it in the output of MULHU. This
1349    // can avoid materializing a constant in a register for C2.
1350
1351    // RHS should be a constant.
1352    auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1353    if (!N1C || !N1C->hasOneUse())
1354      break;
1355
1356    // LHS should be an AND with constant.
1357    SDValue N0 = Node->getOperand(0);
1358    if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1)))
1359      break;
1360
1361    uint64_t C2 = N0.getConstantOperandVal(1);
1362
1363    // Constant should be a mask.
1364    if (!isMask_64(C2))
1365      break;
1366
1367    // If this can be an ANDI or ZEXT.H, don't do this if the ANDI/ZEXT has
1368    // multiple users or the constant is a simm12. This prevents inserting a
1369    // shift and still have uses of the AND/ZEXT. Shifting a simm12 will likely
1370    // make it more costly to materialize. Otherwise, using a SLLI might allow
1371    // it to be compressed.
1372    bool IsANDIOrZExt =
1373        isInt<12>(C2) ||
1374        (C2 == UINT64_C(0xFFFF) && Subtarget->hasStdExtZbb());
1375    // With XTHeadBb, we can use TH.EXTU.
1376    IsANDIOrZExt |= C2 == UINT64_C(0xFFFF) && Subtarget->hasVendorXTHeadBb();
1377    if (IsANDIOrZExt && (isInt<12>(N1C->getSExtValue()) || !N0.hasOneUse()))
1378      break;
1379    // If this can be a ZEXT.w, don't do this if the ZEXT has multiple users or
1380    // the constant is a simm32.
1381    bool IsZExtW = C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba();
1382    // With XTHeadBb, we can use TH.EXTU.
1383    IsZExtW |= C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasVendorXTHeadBb();
1384    if (IsZExtW && (isInt<32>(N1C->getSExtValue()) || !N0.hasOneUse()))
1385      break;
1386
1387    // We need to shift left the AND input and C1 by a total of XLen bits.
1388
1389    // How far left do we need to shift the AND input?
1390    unsigned XLen = Subtarget->getXLen();
1391    unsigned LeadingZeros = XLen - llvm::bit_width(C2);
1392
1393    // The constant gets shifted by the remaining amount unless that would
1394    // shift bits out.
1395    uint64_t C1 = N1C->getZExtValue();
1396    unsigned ConstantShift = XLen - LeadingZeros;
1397    if (ConstantShift > (XLen - llvm::bit_width(C1)))
1398      break;
1399
1400    uint64_t ShiftedC1 = C1 << ConstantShift;
1401    // If this RV32, we need to sign extend the constant.
1402    if (XLen == 32)
1403      ShiftedC1 = SignExtend64<32>(ShiftedC1);
1404
1405    // Create (mulhu (slli X, lzcnt(C2)), C1 << (XLen - lzcnt(C2))).
1406    SDNode *Imm = selectImm(CurDAG, DL, VT, ShiftedC1, *Subtarget).getNode();
1407    SDNode *SLLI =
1408        CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0),
1409                               CurDAG->getTargetConstant(LeadingZeros, DL, VT));
1410    SDNode *MULHU = CurDAG->getMachineNode(RISCV::MULHU, DL, VT,
1411                                           SDValue(SLLI, 0), SDValue(Imm, 0));
1412    ReplaceNode(Node, MULHU);
1413    return;
1414  }
1415  case ISD::LOAD: {
1416    if (tryIndexedLoad(Node))
1417      return;
1418    break;
1419  }
1420  case ISD::INTRINSIC_WO_CHAIN: {
1421    unsigned IntNo = Node->getConstantOperandVal(0);
1422    switch (IntNo) {
1423      // By default we do not custom select any intrinsic.
1424    default:
1425      break;
1426    case Intrinsic::riscv_vmsgeu:
1427    case Intrinsic::riscv_vmsge: {
1428      SDValue Src1 = Node->getOperand(1);
1429      SDValue Src2 = Node->getOperand(2);
1430      bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu;
1431      bool IsCmpUnsignedZero = false;
1432      // Only custom select scalar second operand.
1433      if (Src2.getValueType() != XLenVT)
1434        break;
1435      // Small constants are handled with patterns.
1436      if (auto *C = dyn_cast<ConstantSDNode>(Src2)) {
1437        int64_t CVal = C->getSExtValue();
1438        if (CVal >= -15 && CVal <= 16) {
1439          if (!IsUnsigned || CVal != 0)
1440            break;
1441          IsCmpUnsignedZero = true;
1442        }
1443      }
1444      MVT Src1VT = Src1.getSimpleValueType();
1445      unsigned VMSLTOpcode, VMNANDOpcode, VMSetOpcode;
1446      switch (RISCVTargetLowering::getLMUL(Src1VT)) {
1447      default:
1448        llvm_unreachable("Unexpected LMUL!");
1449#define CASE_VMSLT_VMNAND_VMSET_OPCODES(lmulenum, suffix, suffix_b)            \
1450  case RISCVII::VLMUL::lmulenum:                                               \
1451    VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix                 \
1452                             : RISCV::PseudoVMSLT_VX_##suffix;                 \
1453    VMNANDOpcode = RISCV::PseudoVMNAND_MM_##suffix;                            \
1454    VMSetOpcode = RISCV::PseudoVMSET_M_##suffix_b;                             \
1455    break;
1456        CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F8, MF8, B1)
1457        CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F4, MF4, B2)
1458        CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F2, MF2, B4)
1459        CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_1, M1, B8)
1460        CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_2, M2, B16)
1461        CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_4, M4, B32)
1462        CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_8, M8, B64)
1463#undef CASE_VMSLT_VMNAND_VMSET_OPCODES
1464      }
1465      SDValue SEW = CurDAG->getTargetConstant(
1466          Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);
1467      SDValue VL;
1468      selectVLOp(Node->getOperand(3), VL);
1469
1470      // If vmsgeu with 0 immediate, expand it to vmset.
1471      if (IsCmpUnsignedZero) {
1472        ReplaceNode(Node, CurDAG->getMachineNode(VMSetOpcode, DL, VT, VL, SEW));
1473        return;
1474      }
1475
1476      // Expand to
1477      // vmslt{u}.vx vd, va, x; vmnand.mm vd, vd, vd
1478      SDValue Cmp = SDValue(
1479          CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}),
1480          0);
1481      ReplaceNode(Node, CurDAG->getMachineNode(VMNANDOpcode, DL, VT,
1482                                               {Cmp, Cmp, VL, SEW}));
1483      return;
1484    }
1485    case Intrinsic::riscv_vmsgeu_mask:
1486    case Intrinsic::riscv_vmsge_mask: {
1487      SDValue Src1 = Node->getOperand(2);
1488      SDValue Src2 = Node->getOperand(3);
1489      bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu_mask;
1490      bool IsCmpUnsignedZero = false;
1491      // Only custom select scalar second operand.
1492      if (Src2.getValueType() != XLenVT)
1493        break;
1494      // Small constants are handled with patterns.
1495      if (auto *C = dyn_cast<ConstantSDNode>(Src2)) {
1496        int64_t CVal = C->getSExtValue();
1497        if (CVal >= -15 && CVal <= 16) {
1498          if (!IsUnsigned || CVal != 0)
1499            break;
1500          IsCmpUnsignedZero = true;
1501        }
1502      }
1503      MVT Src1VT = Src1.getSimpleValueType();
1504      unsigned VMSLTOpcode, VMSLTMaskOpcode, VMXOROpcode, VMANDNOpcode,
1505          VMOROpcode;
1506      switch (RISCVTargetLowering::getLMUL(Src1VT)) {
1507      default:
1508        llvm_unreachable("Unexpected LMUL!");
1509#define CASE_VMSLT_OPCODES(lmulenum, suffix, suffix_b)                         \
1510  case RISCVII::VLMUL::lmulenum:                                               \
1511    VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix                 \
1512                             : RISCV::PseudoVMSLT_VX_##suffix;                 \
1513    VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix##_MASK      \
1514                                 : RISCV::PseudoVMSLT_VX_##suffix##_MASK;      \
1515    break;
1516        CASE_VMSLT_OPCODES(LMUL_F8, MF8, B1)
1517        CASE_VMSLT_OPCODES(LMUL_F4, MF4, B2)
1518        CASE_VMSLT_OPCODES(LMUL_F2, MF2, B4)
1519        CASE_VMSLT_OPCODES(LMUL_1, M1, B8)
1520        CASE_VMSLT_OPCODES(LMUL_2, M2, B16)
1521        CASE_VMSLT_OPCODES(LMUL_4, M4, B32)
1522        CASE_VMSLT_OPCODES(LMUL_8, M8, B64)
1523#undef CASE_VMSLT_OPCODES
1524      }
1525      // Mask operations use the LMUL from the mask type.
1526      switch (RISCVTargetLowering::getLMUL(VT)) {
1527      default:
1528        llvm_unreachable("Unexpected LMUL!");
1529#define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix)                       \
1530  case RISCVII::VLMUL::lmulenum:                                               \
1531    VMXOROpcode = RISCV::PseudoVMXOR_MM_##suffix;                              \
1532    VMANDNOpcode = RISCV::PseudoVMANDN_MM_##suffix;                            \
1533    VMOROpcode = RISCV::PseudoVMOR_MM_##suffix;                                \
1534    break;
1535        CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F8, MF8)
1536        CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F4, MF4)
1537        CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F2, MF2)
1538        CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_1, M1)
1539        CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_2, M2)
1540        CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_4, M4)
1541        CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_8, M8)
1542#undef CASE_VMXOR_VMANDN_VMOR_OPCODES
1543      }
1544      SDValue SEW = CurDAG->getTargetConstant(
1545          Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);
1546      SDValue MaskSEW = CurDAG->getTargetConstant(0, DL, XLenVT);
1547      SDValue VL;
1548      selectVLOp(Node->getOperand(5), VL);
1549      SDValue MaskedOff = Node->getOperand(1);
1550      SDValue Mask = Node->getOperand(4);
1551
1552      // If vmsgeu_mask with 0 immediate, expand it to vmor mask, maskedoff.
1553      if (IsCmpUnsignedZero) {
1554        // We don't need vmor if the MaskedOff and the Mask are the same
1555        // value.
1556        if (Mask == MaskedOff) {
1557          ReplaceUses(Node, Mask.getNode());
1558          return;
1559        }
1560        ReplaceNode(Node,
1561                    CurDAG->getMachineNode(VMOROpcode, DL, VT,
1562                                           {Mask, MaskedOff, VL, MaskSEW}));
1563        return;
1564      }
1565
1566      // If the MaskedOff value and the Mask are the same value use
1567      // vmslt{u}.vx vt, va, x;  vmandn.mm vd, vd, vt
1568      // This avoids needing to copy v0 to vd before starting the next sequence.
1569      if (Mask == MaskedOff) {
1570        SDValue Cmp = SDValue(
1571            CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}),
1572            0);
1573        ReplaceNode(Node, CurDAG->getMachineNode(VMANDNOpcode, DL, VT,
1574                                                 {Mask, Cmp, VL, MaskSEW}));
1575        return;
1576      }
1577
1578      // Mask needs to be copied to V0.
1579      SDValue Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,
1580                                           RISCV::V0, Mask, SDValue());
1581      SDValue Glue = Chain.getValue(1);
1582      SDValue V0 = CurDAG->getRegister(RISCV::V0, VT);
1583
1584      // Otherwise use
1585      // vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0
1586      // The result is mask undisturbed.
1587      // We use the same instructions to emulate mask agnostic behavior, because
1588      // the agnostic result can be either undisturbed or all 1.
1589      SDValue Cmp = SDValue(
1590          CurDAG->getMachineNode(VMSLTMaskOpcode, DL, VT,
1591                                 {MaskedOff, Src1, Src2, V0, VL, SEW, Glue}),
1592          0);
1593      // vmxor.mm vd, vd, v0 is used to update active value.
1594      ReplaceNode(Node, CurDAG->getMachineNode(VMXOROpcode, DL, VT,
1595                                               {Cmp, Mask, VL, MaskSEW}));
1596      return;
1597    }
1598    case Intrinsic::riscv_vsetvli:
1599    case Intrinsic::riscv_vsetvlimax:
1600      return selectVSETVLI(Node);
1601    }
1602    break;
1603  }
1604  case ISD::INTRINSIC_W_CHAIN: {
1605    unsigned IntNo = Node->getConstantOperandVal(1);
1606    switch (IntNo) {
1607      // By default we do not custom select any intrinsic.
1608    default:
1609      break;
1610    case Intrinsic::riscv_vlseg2:
1611    case Intrinsic::riscv_vlseg3:
1612    case Intrinsic::riscv_vlseg4:
1613    case Intrinsic::riscv_vlseg5:
1614    case Intrinsic::riscv_vlseg6:
1615    case Intrinsic::riscv_vlseg7:
1616    case Intrinsic::riscv_vlseg8: {
1617      selectVLSEG(Node, /*IsMasked*/ false, /*IsStrided*/ false);
1618      return;
1619    }
1620    case Intrinsic::riscv_vlseg2_mask:
1621    case Intrinsic::riscv_vlseg3_mask:
1622    case Intrinsic::riscv_vlseg4_mask:
1623    case Intrinsic::riscv_vlseg5_mask:
1624    case Intrinsic::riscv_vlseg6_mask:
1625    case Intrinsic::riscv_vlseg7_mask:
1626    case Intrinsic::riscv_vlseg8_mask: {
1627      selectVLSEG(Node, /*IsMasked*/ true, /*IsStrided*/ false);
1628      return;
1629    }
1630    case Intrinsic::riscv_vlsseg2:
1631    case Intrinsic::riscv_vlsseg3:
1632    case Intrinsic::riscv_vlsseg4:
1633    case Intrinsic::riscv_vlsseg5:
1634    case Intrinsic::riscv_vlsseg6:
1635    case Intrinsic::riscv_vlsseg7:
1636    case Intrinsic::riscv_vlsseg8: {
1637      selectVLSEG(Node, /*IsMasked*/ false, /*IsStrided*/ true);
1638      return;
1639    }
1640    case Intrinsic::riscv_vlsseg2_mask:
1641    case Intrinsic::riscv_vlsseg3_mask:
1642    case Intrinsic::riscv_vlsseg4_mask:
1643    case Intrinsic::riscv_vlsseg5_mask:
1644    case Intrinsic::riscv_vlsseg6_mask:
1645    case Intrinsic::riscv_vlsseg7_mask:
1646    case Intrinsic::riscv_vlsseg8_mask: {
1647      selectVLSEG(Node, /*IsMasked*/ true, /*IsStrided*/ true);
1648      return;
1649    }
1650    case Intrinsic::riscv_vloxseg2:
1651    case Intrinsic::riscv_vloxseg3:
1652    case Intrinsic::riscv_vloxseg4:
1653    case Intrinsic::riscv_vloxseg5:
1654    case Intrinsic::riscv_vloxseg6:
1655    case Intrinsic::riscv_vloxseg7:
1656    case Intrinsic::riscv_vloxseg8:
1657      selectVLXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ true);
1658      return;
1659    case Intrinsic::riscv_vluxseg2:
1660    case Intrinsic::riscv_vluxseg3:
1661    case Intrinsic::riscv_vluxseg4:
1662    case Intrinsic::riscv_vluxseg5:
1663    case Intrinsic::riscv_vluxseg6:
1664    case Intrinsic::riscv_vluxseg7:
1665    case Intrinsic::riscv_vluxseg8:
1666      selectVLXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ false);
1667      return;
1668    case Intrinsic::riscv_vloxseg2_mask:
1669    case Intrinsic::riscv_vloxseg3_mask:
1670    case Intrinsic::riscv_vloxseg4_mask:
1671    case Intrinsic::riscv_vloxseg5_mask:
1672    case Intrinsic::riscv_vloxseg6_mask:
1673    case Intrinsic::riscv_vloxseg7_mask:
1674    case Intrinsic::riscv_vloxseg8_mask:
1675      selectVLXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ true);
1676      return;
1677    case Intrinsic::riscv_vluxseg2_mask:
1678    case Intrinsic::riscv_vluxseg3_mask:
1679    case Intrinsic::riscv_vluxseg4_mask:
1680    case Intrinsic::riscv_vluxseg5_mask:
1681    case Intrinsic::riscv_vluxseg6_mask:
1682    case Intrinsic::riscv_vluxseg7_mask:
1683    case Intrinsic::riscv_vluxseg8_mask:
1684      selectVLXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ false);
1685      return;
1686    case Intrinsic::riscv_vlseg8ff:
1687    case Intrinsic::riscv_vlseg7ff:
1688    case Intrinsic::riscv_vlseg6ff:
1689    case Intrinsic::riscv_vlseg5ff:
1690    case Intrinsic::riscv_vlseg4ff:
1691    case Intrinsic::riscv_vlseg3ff:
1692    case Intrinsic::riscv_vlseg2ff: {
1693      selectVLSEGFF(Node, /*IsMasked*/ false);
1694      return;
1695    }
1696    case Intrinsic::riscv_vlseg8ff_mask:
1697    case Intrinsic::riscv_vlseg7ff_mask:
1698    case Intrinsic::riscv_vlseg6ff_mask:
1699    case Intrinsic::riscv_vlseg5ff_mask:
1700    case Intrinsic::riscv_vlseg4ff_mask:
1701    case Intrinsic::riscv_vlseg3ff_mask:
1702    case Intrinsic::riscv_vlseg2ff_mask: {
1703      selectVLSEGFF(Node, /*IsMasked*/ true);
1704      return;
1705    }
1706    case Intrinsic::riscv_vloxei:
1707    case Intrinsic::riscv_vloxei_mask:
1708    case Intrinsic::riscv_vluxei:
1709    case Intrinsic::riscv_vluxei_mask: {
1710      bool IsMasked = IntNo == Intrinsic::riscv_vloxei_mask ||
1711                      IntNo == Intrinsic::riscv_vluxei_mask;
1712      bool IsOrdered = IntNo == Intrinsic::riscv_vloxei ||
1713                       IntNo == Intrinsic::riscv_vloxei_mask;
1714
1715      MVT VT = Node->getSimpleValueType(0);
1716      unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
1717
1718      unsigned CurOp = 2;
1719      SmallVector<SDValue, 8> Operands;
1720      Operands.push_back(Node->getOperand(CurOp++));
1721
1722      MVT IndexVT;
1723      addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
1724                                 /*IsStridedOrIndexed*/ true, Operands,
1725                                 /*IsLoad=*/true, &IndexVT);
1726
1727      assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
1728             "Element count mismatch");
1729
1730      RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
1731      RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
1732      unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
1733      if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
1734        report_fatal_error("The V extension does not support EEW=64 for index "
1735                           "values when XLEN=32");
1736      }
1737      const RISCV::VLX_VSXPseudo *P = RISCV::getVLXPseudo(
1738          IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
1739          static_cast<unsigned>(IndexLMUL));
1740      MachineSDNode *Load =
1741          CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
1742
1743      if (auto *MemOp = dyn_cast<MemSDNode>(Node))
1744        CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
1745
1746      ReplaceNode(Node, Load);
1747      return;
1748    }
1749    case Intrinsic::riscv_vlm:
1750    case Intrinsic::riscv_vle:
1751    case Intrinsic::riscv_vle_mask:
1752    case Intrinsic::riscv_vlse:
1753    case Intrinsic::riscv_vlse_mask: {
1754      bool IsMasked = IntNo == Intrinsic::riscv_vle_mask ||
1755                      IntNo == Intrinsic::riscv_vlse_mask;
1756      bool IsStrided =
1757          IntNo == Intrinsic::riscv_vlse || IntNo == Intrinsic::riscv_vlse_mask;
1758
1759      MVT VT = Node->getSimpleValueType(0);
1760      unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
1761
1762      // The riscv_vlm intrinsic are always tail agnostic and no passthru
1763      // operand at the IR level.  In pseudos, they have both policy and
1764      // passthru operand. The passthru operand is needed to track the
1765      // "tail undefined" state, and the policy is there just for
1766      // for consistency - it will always be "don't care" for the
1767      // unmasked form.
1768      bool HasPassthruOperand = IntNo != Intrinsic::riscv_vlm;
1769      unsigned CurOp = 2;
1770      SmallVector<SDValue, 8> Operands;
1771      if (HasPassthruOperand)
1772        Operands.push_back(Node->getOperand(CurOp++));
1773      else {
1774        // We eagerly lower to implicit_def (instead of undef), as we
1775        // otherwise fail to select nodes such as: nxv1i1 = undef
1776        SDNode *Passthru =
1777          CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT);
1778        Operands.push_back(SDValue(Passthru, 0));
1779      }
1780      addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
1781                                 Operands, /*IsLoad=*/true);
1782
1783      RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
1784      const RISCV::VLEPseudo *P =
1785          RISCV::getVLEPseudo(IsMasked, IsStrided, /*FF*/ false, Log2SEW,
1786                              static_cast<unsigned>(LMUL));
1787      MachineSDNode *Load =
1788          CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
1789
1790      if (auto *MemOp = dyn_cast<MemSDNode>(Node))
1791        CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
1792
1793      ReplaceNode(Node, Load);
1794      return;
1795    }
1796    case Intrinsic::riscv_vleff:
1797    case Intrinsic::riscv_vleff_mask: {
1798      bool IsMasked = IntNo == Intrinsic::riscv_vleff_mask;
1799
1800      MVT VT = Node->getSimpleValueType(0);
1801      unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
1802
1803      unsigned CurOp = 2;
1804      SmallVector<SDValue, 7> Operands;
1805      Operands.push_back(Node->getOperand(CurOp++));
1806      addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
1807                                 /*IsStridedOrIndexed*/ false, Operands,
1808                                 /*IsLoad=*/true);
1809
1810      RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
1811      const RISCV::VLEPseudo *P =
1812          RISCV::getVLEPseudo(IsMasked, /*Strided*/ false, /*FF*/ true,
1813                              Log2SEW, static_cast<unsigned>(LMUL));
1814      MachineSDNode *Load = CurDAG->getMachineNode(
1815          P->Pseudo, DL, Node->getVTList(), Operands);
1816      if (auto *MemOp = dyn_cast<MemSDNode>(Node))
1817        CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
1818
1819      ReplaceNode(Node, Load);
1820      return;
1821    }
1822    }
1823    break;
1824  }
1825  case ISD::INTRINSIC_VOID: {
1826    unsigned IntNo = Node->getConstantOperandVal(1);
1827    switch (IntNo) {
1828    case Intrinsic::riscv_vsseg2:
1829    case Intrinsic::riscv_vsseg3:
1830    case Intrinsic::riscv_vsseg4:
1831    case Intrinsic::riscv_vsseg5:
1832    case Intrinsic::riscv_vsseg6:
1833    case Intrinsic::riscv_vsseg7:
1834    case Intrinsic::riscv_vsseg8: {
1835      selectVSSEG(Node, /*IsMasked*/ false, /*IsStrided*/ false);
1836      return;
1837    }
1838    case Intrinsic::riscv_vsseg2_mask:
1839    case Intrinsic::riscv_vsseg3_mask:
1840    case Intrinsic::riscv_vsseg4_mask:
1841    case Intrinsic::riscv_vsseg5_mask:
1842    case Intrinsic::riscv_vsseg6_mask:
1843    case Intrinsic::riscv_vsseg7_mask:
1844    case Intrinsic::riscv_vsseg8_mask: {
1845      selectVSSEG(Node, /*IsMasked*/ true, /*IsStrided*/ false);
1846      return;
1847    }
1848    case Intrinsic::riscv_vssseg2:
1849    case Intrinsic::riscv_vssseg3:
1850    case Intrinsic::riscv_vssseg4:
1851    case Intrinsic::riscv_vssseg5:
1852    case Intrinsic::riscv_vssseg6:
1853    case Intrinsic::riscv_vssseg7:
1854    case Intrinsic::riscv_vssseg8: {
1855      selectVSSEG(Node, /*IsMasked*/ false, /*IsStrided*/ true);
1856      return;
1857    }
1858    case Intrinsic::riscv_vssseg2_mask:
1859    case Intrinsic::riscv_vssseg3_mask:
1860    case Intrinsic::riscv_vssseg4_mask:
1861    case Intrinsic::riscv_vssseg5_mask:
1862    case Intrinsic::riscv_vssseg6_mask:
1863    case Intrinsic::riscv_vssseg7_mask:
1864    case Intrinsic::riscv_vssseg8_mask: {
1865      selectVSSEG(Node, /*IsMasked*/ true, /*IsStrided*/ true);
1866      return;
1867    }
1868    case Intrinsic::riscv_vsoxseg2:
1869    case Intrinsic::riscv_vsoxseg3:
1870    case Intrinsic::riscv_vsoxseg4:
1871    case Intrinsic::riscv_vsoxseg5:
1872    case Intrinsic::riscv_vsoxseg6:
1873    case Intrinsic::riscv_vsoxseg7:
1874    case Intrinsic::riscv_vsoxseg8:
1875      selectVSXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ true);
1876      return;
1877    case Intrinsic::riscv_vsuxseg2:
1878    case Intrinsic::riscv_vsuxseg3:
1879    case Intrinsic::riscv_vsuxseg4:
1880    case Intrinsic::riscv_vsuxseg5:
1881    case Intrinsic::riscv_vsuxseg6:
1882    case Intrinsic::riscv_vsuxseg7:
1883    case Intrinsic::riscv_vsuxseg8:
1884      selectVSXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ false);
1885      return;
1886    case Intrinsic::riscv_vsoxseg2_mask:
1887    case Intrinsic::riscv_vsoxseg3_mask:
1888    case Intrinsic::riscv_vsoxseg4_mask:
1889    case Intrinsic::riscv_vsoxseg5_mask:
1890    case Intrinsic::riscv_vsoxseg6_mask:
1891    case Intrinsic::riscv_vsoxseg7_mask:
1892    case Intrinsic::riscv_vsoxseg8_mask:
1893      selectVSXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ true);
1894      return;
1895    case Intrinsic::riscv_vsuxseg2_mask:
1896    case Intrinsic::riscv_vsuxseg3_mask:
1897    case Intrinsic::riscv_vsuxseg4_mask:
1898    case Intrinsic::riscv_vsuxseg5_mask:
1899    case Intrinsic::riscv_vsuxseg6_mask:
1900    case Intrinsic::riscv_vsuxseg7_mask:
1901    case Intrinsic::riscv_vsuxseg8_mask:
1902      selectVSXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ false);
1903      return;
1904    case Intrinsic::riscv_vsoxei:
1905    case Intrinsic::riscv_vsoxei_mask:
1906    case Intrinsic::riscv_vsuxei:
1907    case Intrinsic::riscv_vsuxei_mask: {
1908      bool IsMasked = IntNo == Intrinsic::riscv_vsoxei_mask ||
1909                      IntNo == Intrinsic::riscv_vsuxei_mask;
1910      bool IsOrdered = IntNo == Intrinsic::riscv_vsoxei ||
1911                       IntNo == Intrinsic::riscv_vsoxei_mask;
1912
1913      MVT VT = Node->getOperand(2)->getSimpleValueType(0);
1914      unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
1915
1916      unsigned CurOp = 2;
1917      SmallVector<SDValue, 8> Operands;
1918      Operands.push_back(Node->getOperand(CurOp++)); // Store value.
1919
1920      MVT IndexVT;
1921      addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
1922                                 /*IsStridedOrIndexed*/ true, Operands,
1923                                 /*IsLoad=*/false, &IndexVT);
1924
1925      assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
1926             "Element count mismatch");
1927
1928      RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
1929      RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
1930      unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
1931      if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
1932        report_fatal_error("The V extension does not support EEW=64 for index "
1933                           "values when XLEN=32");
1934      }
1935      const RISCV::VLX_VSXPseudo *P = RISCV::getVSXPseudo(
1936          IsMasked, IsOrdered, IndexLog2EEW,
1937          static_cast<unsigned>(LMUL), static_cast<unsigned>(IndexLMUL));
1938      MachineSDNode *Store =
1939          CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
1940
1941      if (auto *MemOp = dyn_cast<MemSDNode>(Node))
1942        CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
1943
1944      ReplaceNode(Node, Store);
1945      return;
1946    }
1947    case Intrinsic::riscv_vsm:
1948    case Intrinsic::riscv_vse:
1949    case Intrinsic::riscv_vse_mask:
1950    case Intrinsic::riscv_vsse:
1951    case Intrinsic::riscv_vsse_mask: {
1952      bool IsMasked = IntNo == Intrinsic::riscv_vse_mask ||
1953                      IntNo == Intrinsic::riscv_vsse_mask;
1954      bool IsStrided =
1955          IntNo == Intrinsic::riscv_vsse || IntNo == Intrinsic::riscv_vsse_mask;
1956
1957      MVT VT = Node->getOperand(2)->getSimpleValueType(0);
1958      unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
1959
1960      unsigned CurOp = 2;
1961      SmallVector<SDValue, 8> Operands;
1962      Operands.push_back(Node->getOperand(CurOp++)); // Store value.
1963
1964      addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
1965                                 Operands);
1966
1967      RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
1968      const RISCV::VSEPseudo *P = RISCV::getVSEPseudo(
1969          IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));
1970      MachineSDNode *Store =
1971          CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
1972      if (auto *MemOp = dyn_cast<MemSDNode>(Node))
1973        CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
1974
1975      ReplaceNode(Node, Store);
1976      return;
1977    }
1978    }
1979    break;
1980  }
1981  case ISD::BITCAST: {
1982    MVT SrcVT = Node->getOperand(0).getSimpleValueType();
1983    // Just drop bitcasts between vectors if both are fixed or both are
1984    // scalable.
1985    if ((VT.isScalableVector() && SrcVT.isScalableVector()) ||
1986        (VT.isFixedLengthVector() && SrcVT.isFixedLengthVector())) {
1987      ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
1988      CurDAG->RemoveDeadNode(Node);
1989      return;
1990    }
1991    break;
1992  }
1993  case ISD::INSERT_SUBVECTOR: {
1994    SDValue V = Node->getOperand(0);
1995    SDValue SubV = Node->getOperand(1);
1996    SDLoc DL(SubV);
1997    auto Idx = Node->getConstantOperandVal(2);
1998    MVT SubVecVT = SubV.getSimpleValueType();
1999
2000    const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
2001    MVT SubVecContainerVT = SubVecVT;
2002    // Establish the correct scalable-vector types for any fixed-length type.
2003    if (SubVecVT.isFixedLengthVector())
2004      SubVecContainerVT = TLI.getContainerForFixedLengthVector(SubVecVT);
2005    if (VT.isFixedLengthVector())
2006      VT = TLI.getContainerForFixedLengthVector(VT);
2007
2008    const auto *TRI = Subtarget->getRegisterInfo();
2009    unsigned SubRegIdx;
2010    std::tie(SubRegIdx, Idx) =
2011        RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
2012            VT, SubVecContainerVT, Idx, TRI);
2013
2014    // If the Idx hasn't been completely eliminated then this is a subvector
2015    // insert which doesn't naturally align to a vector register. These must
2016    // be handled using instructions to manipulate the vector registers.
2017    if (Idx != 0)
2018      break;
2019
2020    RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecContainerVT);
2021    bool IsSubVecPartReg = SubVecLMUL == RISCVII::VLMUL::LMUL_F2 ||
2022                           SubVecLMUL == RISCVII::VLMUL::LMUL_F4 ||
2023                           SubVecLMUL == RISCVII::VLMUL::LMUL_F8;
2024    (void)IsSubVecPartReg; // Silence unused variable warning without asserts.
2025    assert((!IsSubVecPartReg || V.isUndef()) &&
2026           "Expecting lowering to have created legal INSERT_SUBVECTORs when "
2027           "the subvector is smaller than a full-sized register");
2028
2029    // If we haven't set a SubRegIdx, then we must be going between
2030    // equally-sized LMUL groups (e.g. VR -> VR). This can be done as a copy.
2031    if (SubRegIdx == RISCV::NoSubRegister) {
2032      unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(VT);
2033      assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecContainerVT) ==
2034                 InRegClassID &&
2035             "Unexpected subvector extraction");
2036      SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT);
2037      SDNode *NewNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
2038                                               DL, VT, SubV, RC);
2039      ReplaceNode(Node, NewNode);
2040      return;
2041    }
2042
2043    SDValue Insert = CurDAG->getTargetInsertSubreg(SubRegIdx, DL, VT, V, SubV);
2044    ReplaceNode(Node, Insert.getNode());
2045    return;
2046  }
2047  case ISD::EXTRACT_SUBVECTOR: {
2048    SDValue V = Node->getOperand(0);
2049    auto Idx = Node->getConstantOperandVal(1);
2050    MVT InVT = V.getSimpleValueType();
2051    SDLoc DL(V);
2052
2053    const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
2054    MVT SubVecContainerVT = VT;
2055    // Establish the correct scalable-vector types for any fixed-length type.
2056    if (VT.isFixedLengthVector())
2057      SubVecContainerVT = TLI.getContainerForFixedLengthVector(VT);
2058    if (InVT.isFixedLengthVector())
2059      InVT = TLI.getContainerForFixedLengthVector(InVT);
2060
2061    const auto *TRI = Subtarget->getRegisterInfo();
2062    unsigned SubRegIdx;
2063    std::tie(SubRegIdx, Idx) =
2064        RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
2065            InVT, SubVecContainerVT, Idx, TRI);
2066
2067    // If the Idx hasn't been completely eliminated then this is a subvector
2068    // extract which doesn't naturally align to a vector register. These must
2069    // be handled using instructions to manipulate the vector registers.
2070    if (Idx != 0)
2071      break;
2072
2073    // If we haven't set a SubRegIdx, then we must be going between
2074    // equally-sized LMUL types (e.g. VR -> VR). This can be done as a copy.
2075    if (SubRegIdx == RISCV::NoSubRegister) {
2076      unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(InVT);
2077      assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecContainerVT) ==
2078                 InRegClassID &&
2079             "Unexpected subvector extraction");
2080      SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT);
2081      SDNode *NewNode =
2082          CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC);
2083      ReplaceNode(Node, NewNode);
2084      return;
2085    }
2086
2087    SDValue Extract = CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, V);
2088    ReplaceNode(Node, Extract.getNode());
2089    return;
2090  }
2091  case RISCVISD::VMV_S_X_VL:
2092  case RISCVISD::VFMV_S_F_VL:
2093  case RISCVISD::VMV_V_X_VL:
2094  case RISCVISD::VFMV_V_F_VL: {
2095    // Try to match splat of a scalar load to a strided load with stride of x0.
2096    bool IsScalarMove = Node->getOpcode() == RISCVISD::VMV_S_X_VL ||
2097                        Node->getOpcode() == RISCVISD::VFMV_S_F_VL;
2098    if (!Node->getOperand(0).isUndef())
2099      break;
2100    SDValue Src = Node->getOperand(1);
2101    auto *Ld = dyn_cast<LoadSDNode>(Src);
2102    // Can't fold load update node because the second
2103    // output is used so that load update node can't be removed.
2104    if (!Ld || Ld->isIndexed())
2105      break;
2106    EVT MemVT = Ld->getMemoryVT();
2107    // The memory VT should be the same size as the element type.
2108    if (MemVT.getStoreSize() != VT.getVectorElementType().getStoreSize())
2109      break;
2110    if (!IsProfitableToFold(Src, Node, Node) ||
2111        !IsLegalToFold(Src, Node, Node, TM.getOptLevel()))
2112      break;
2113
2114    SDValue VL;
2115    if (IsScalarMove) {
2116      // We could deal with more VL if we update the VSETVLI insert pass to
2117      // avoid introducing more VSETVLI.
2118      if (!isOneConstant(Node->getOperand(2)))
2119        break;
2120      selectVLOp(Node->getOperand(2), VL);
2121    } else
2122      selectVLOp(Node->getOperand(2), VL);
2123
2124    unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2125    SDValue SEW = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
2126
2127    // If VL=1, then we don't need to do a strided load and can just do a
2128    // regular load.
2129    bool IsStrided = !isOneConstant(VL);
2130
2131    // Only do a strided load if we have optimized zero-stride vector load.
2132    if (IsStrided && !Subtarget->hasOptimizedZeroStrideLoad())
2133      break;
2134
2135    SmallVector<SDValue> Operands = {
2136        SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT), 0),
2137        Ld->getBasePtr()};
2138    if (IsStrided)
2139      Operands.push_back(CurDAG->getRegister(RISCV::X0, XLenVT));
2140    uint64_t Policy = RISCVII::MASK_AGNOSTIC | RISCVII::TAIL_AGNOSTIC;
2141    SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT);
2142    Operands.append({VL, SEW, PolicyOp, Ld->getChain()});
2143
2144    RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
2145    const RISCV::VLEPseudo *P = RISCV::getVLEPseudo(
2146        /*IsMasked*/ false, IsStrided, /*FF*/ false,
2147        Log2SEW, static_cast<unsigned>(LMUL));
2148    MachineSDNode *Load =
2149        CurDAG->getMachineNode(P->Pseudo, DL, {VT, MVT::Other}, Operands);
2150    // Update the chain.
2151    ReplaceUses(Src.getValue(1), SDValue(Load, 1));
2152    // Record the mem-refs
2153    CurDAG->setNodeMemRefs(Load, {Ld->getMemOperand()});
2154    // Replace the splat with the vlse.
2155    ReplaceNode(Node, Load);
2156    return;
2157  }
2158  case ISD::PREFETCH:
2159    unsigned Locality = Node->getConstantOperandVal(3);
2160    if (Locality > 2)
2161      break;
2162
2163    if (auto *LoadStoreMem = dyn_cast<MemSDNode>(Node)) {
2164      MachineMemOperand *MMO = LoadStoreMem->getMemOperand();
2165      MMO->setFlags(MachineMemOperand::MONonTemporal);
2166
2167      int NontemporalLevel = 0;
2168      switch (Locality) {
2169      case 0:
2170        NontemporalLevel = 3; // NTL.ALL
2171        break;
2172      case 1:
2173        NontemporalLevel = 1; // NTL.PALL
2174        break;
2175      case 2:
2176        NontemporalLevel = 0; // NTL.P1
2177        break;
2178      default:
2179        llvm_unreachable("unexpected locality value.");
2180      }
2181
2182      if (NontemporalLevel & 0b1)
2183        MMO->setFlags(MONontemporalBit0);
2184      if (NontemporalLevel & 0b10)
2185        MMO->setFlags(MONontemporalBit1);
2186    }
2187    break;
2188  }
2189
2190  // Select the default instruction.
2191  SelectCode(Node);
2192}
2193
2194bool RISCVDAGToDAGISel::SelectInlineAsmMemoryOperand(
2195    const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
2196    std::vector<SDValue> &OutOps) {
2197  // Always produce a register and immediate operand, as expected by
2198  // RISCVAsmPrinter::PrintAsmMemoryOperand.
2199  switch (ConstraintID) {
2200  case InlineAsm::ConstraintCode::o:
2201  case InlineAsm::ConstraintCode::m: {
2202    SDValue Op0, Op1;
2203    bool Found = SelectAddrRegImm(Op, Op0, Op1);
2204    assert(Found && "SelectAddrRegImm should always succeed");
2205    (void)Found;
2206    OutOps.push_back(Op0);
2207    OutOps.push_back(Op1);
2208    return false;
2209  }
2210  case InlineAsm::ConstraintCode::A:
2211    OutOps.push_back(Op);
2212    OutOps.push_back(
2213        CurDAG->getTargetConstant(0, SDLoc(Op), Subtarget->getXLenVT()));
2214    return false;
2215  default:
2216    report_fatal_error("Unexpected asm memory constraint " +
2217                       InlineAsm::getMemConstraintName(ConstraintID));
2218  }
2219
2220  return true;
2221}
2222
2223bool RISCVDAGToDAGISel::SelectAddrFrameIndex(SDValue Addr, SDValue &Base,
2224                                             SDValue &Offset) {
2225  if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
2226    Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT());
2227    Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), Subtarget->getXLenVT());
2228    return true;
2229  }
2230
2231  return false;
2232}
2233
2234// Select a frame index and an optional immediate offset from an ADD or OR.
2235bool RISCVDAGToDAGISel::SelectFrameAddrRegImm(SDValue Addr, SDValue &Base,
2236                                              SDValue &Offset) {
2237  if (SelectAddrFrameIndex(Addr, Base, Offset))
2238    return true;
2239
2240  if (!CurDAG->isBaseWithConstantOffset(Addr))
2241    return false;
2242
2243  if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) {
2244    int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2245    if (isInt<12>(CVal)) {
2246      Base = CurDAG->getTargetFrameIndex(FIN->getIndex(),
2247                                         Subtarget->getXLenVT());
2248      Offset = CurDAG->getTargetConstant(CVal, SDLoc(Addr),
2249                                         Subtarget->getXLenVT());
2250      return true;
2251    }
2252  }
2253
2254  return false;
2255}
2256
2257// Fold constant addresses.
2258static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL,
2259                               const MVT VT, const RISCVSubtarget *Subtarget,
2260                               SDValue Addr, SDValue &Base, SDValue &Offset,
2261                               bool IsPrefetch = false) {
2262  if (!isa<ConstantSDNode>(Addr))
2263    return false;
2264
2265  int64_t CVal = cast<ConstantSDNode>(Addr)->getSExtValue();
2266
2267  // If the constant is a simm12, we can fold the whole constant and use X0 as
2268  // the base. If the constant can be materialized with LUI+simm12, use LUI as
2269  // the base. We can't use generateInstSeq because it favors LUI+ADDIW.
2270  int64_t Lo12 = SignExtend64<12>(CVal);
2271  int64_t Hi = (uint64_t)CVal - (uint64_t)Lo12;
2272  if (!Subtarget->is64Bit() || isInt<32>(Hi)) {
2273    if (IsPrefetch && (Lo12 & 0b11111) != 0)
2274      return false;
2275
2276    if (Hi) {
2277      int64_t Hi20 = (Hi >> 12) & 0xfffff;
2278      Base = SDValue(
2279          CurDAG->getMachineNode(RISCV::LUI, DL, VT,
2280                                 CurDAG->getTargetConstant(Hi20, DL, VT)),
2281          0);
2282    } else {
2283      Base = CurDAG->getRegister(RISCV::X0, VT);
2284    }
2285    Offset = CurDAG->getTargetConstant(Lo12, DL, VT);
2286    return true;
2287  }
2288
2289  // Ask how constant materialization would handle this constant.
2290  RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(CVal, *Subtarget);
2291
2292  // If the last instruction would be an ADDI, we can fold its immediate and
2293  // emit the rest of the sequence as the base.
2294  if (Seq.back().getOpcode() != RISCV::ADDI)
2295    return false;
2296  Lo12 = Seq.back().getImm();
2297  if (IsPrefetch && (Lo12 & 0b11111) != 0)
2298    return false;
2299
2300  // Drop the last instruction.
2301  Seq.pop_back();
2302  assert(!Seq.empty() && "Expected more instructions in sequence");
2303
2304  Base = selectImmSeq(CurDAG, DL, VT, Seq);
2305  Offset = CurDAG->getTargetConstant(Lo12, DL, VT);
2306  return true;
2307}
2308
2309// Is this ADD instruction only used as the base pointer of scalar loads and
2310// stores?
2311static bool isWorthFoldingAdd(SDValue Add) {
2312  for (auto *Use : Add->uses()) {
2313    if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE &&
2314        Use->getOpcode() != ISD::ATOMIC_LOAD &&
2315        Use->getOpcode() != ISD::ATOMIC_STORE)
2316      return false;
2317    EVT VT = cast<MemSDNode>(Use)->getMemoryVT();
2318    if (!VT.isScalarInteger() && VT != MVT::f16 && VT != MVT::f32 &&
2319        VT != MVT::f64)
2320      return false;
2321    // Don't allow stores of the value. It must be used as the address.
2322    if (Use->getOpcode() == ISD::STORE &&
2323        cast<StoreSDNode>(Use)->getValue() == Add)
2324      return false;
2325    if (Use->getOpcode() == ISD::ATOMIC_STORE &&
2326        cast<AtomicSDNode>(Use)->getVal() == Add)
2327      return false;
2328  }
2329
2330  return true;
2331}
2332
2333bool RISCVDAGToDAGISel::SelectAddrRegRegScale(SDValue Addr,
2334                                              unsigned MaxShiftAmount,
2335                                              SDValue &Base, SDValue &Index,
2336                                              SDValue &Scale) {
2337  EVT VT = Addr.getSimpleValueType();
2338  auto UnwrapShl = [this, VT, MaxShiftAmount](SDValue N, SDValue &Index,
2339                                              SDValue &Shift) {
2340    uint64_t ShiftAmt = 0;
2341    Index = N;
2342
2343    if (N.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N.getOperand(1))) {
2344      // Only match shifts by a value in range [0, MaxShiftAmount].
2345      if (N.getConstantOperandVal(1) <= MaxShiftAmount) {
2346        Index = N.getOperand(0);
2347        ShiftAmt = N.getConstantOperandVal(1);
2348      }
2349    }
2350
2351    Shift = CurDAG->getTargetConstant(ShiftAmt, SDLoc(N), VT);
2352    return ShiftAmt != 0;
2353  };
2354
2355  if (Addr.getOpcode() == ISD::ADD) {
2356    if (auto *C1 = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
2357      SDValue AddrB = Addr.getOperand(0);
2358      if (AddrB.getOpcode() == ISD::ADD &&
2359          UnwrapShl(AddrB.getOperand(0), Index, Scale) &&
2360          !isa<ConstantSDNode>(AddrB.getOperand(1)) &&
2361          isInt<12>(C1->getSExtValue())) {
2362        // (add (add (shl A C2) B) C1) -> (add (add B C1) (shl A C2))
2363        SDValue C1Val =
2364            CurDAG->getTargetConstant(C1->getZExtValue(), SDLoc(Addr), VT);
2365        Base = SDValue(CurDAG->getMachineNode(RISCV::ADDI, SDLoc(Addr), VT,
2366                                              AddrB.getOperand(1), C1Val),
2367                       0);
2368        return true;
2369      }
2370    } else if (UnwrapShl(Addr.getOperand(0), Index, Scale)) {
2371      Base = Addr.getOperand(1);
2372      return true;
2373    } else {
2374      UnwrapShl(Addr.getOperand(1), Index, Scale);
2375      Base = Addr.getOperand(0);
2376      return true;
2377    }
2378  } else if (UnwrapShl(Addr, Index, Scale)) {
2379    EVT VT = Addr.getValueType();
2380    Base = CurDAG->getRegister(RISCV::X0, VT);
2381    return true;
2382  }
2383
2384  return false;
2385}
2386
2387bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base,
2388                                         SDValue &Offset, bool IsINX) {
2389  if (SelectAddrFrameIndex(Addr, Base, Offset))
2390    return true;
2391
2392  SDLoc DL(Addr);
2393  MVT VT = Addr.getSimpleValueType();
2394
2395  if (Addr.getOpcode() == RISCVISD::ADD_LO) {
2396    Base = Addr.getOperand(0);
2397    Offset = Addr.getOperand(1);
2398    return true;
2399  }
2400
2401  int64_t RV32ZdinxRange = IsINX ? 4 : 0;
2402  if (CurDAG->isBaseWithConstantOffset(Addr)) {
2403    int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2404    if (isInt<12>(CVal) && isInt<12>(CVal + RV32ZdinxRange)) {
2405      Base = Addr.getOperand(0);
2406      if (Base.getOpcode() == RISCVISD::ADD_LO) {
2407        SDValue LoOperand = Base.getOperand(1);
2408        if (auto *GA = dyn_cast<GlobalAddressSDNode>(LoOperand)) {
2409          // If the Lo in (ADD_LO hi, lo) is a global variable's address
2410          // (its low part, really), then we can rely on the alignment of that
2411          // variable to provide a margin of safety before low part can overflow
2412          // the 12 bits of the load/store offset. Check if CVal falls within
2413          // that margin; if so (low part + CVal) can't overflow.
2414          const DataLayout &DL = CurDAG->getDataLayout();
2415          Align Alignment = commonAlignment(
2416              GA->getGlobal()->getPointerAlignment(DL), GA->getOffset());
2417          if (CVal == 0 || Alignment > CVal) {
2418            int64_t CombinedOffset = CVal + GA->getOffset();
2419            Base = Base.getOperand(0);
2420            Offset = CurDAG->getTargetGlobalAddress(
2421                GA->getGlobal(), SDLoc(LoOperand), LoOperand.getValueType(),
2422                CombinedOffset, GA->getTargetFlags());
2423            return true;
2424          }
2425        }
2426      }
2427
2428      if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
2429        Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
2430      Offset = CurDAG->getTargetConstant(CVal, DL, VT);
2431      return true;
2432    }
2433  }
2434
2435  // Handle ADD with large immediates.
2436  if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) {
2437    int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2438    assert(!(isInt<12>(CVal) && isInt<12>(CVal + RV32ZdinxRange)) &&
2439           "simm12 not already handled?");
2440
2441    // Handle immediates in the range [-4096,-2049] or [2048, 4094]. We can use
2442    // an ADDI for part of the offset and fold the rest into the load/store.
2443    // This mirrors the AddiPair PatFrag in RISCVInstrInfo.td.
2444    if (isInt<12>(CVal / 2) && isInt<12>(CVal - CVal / 2)) {
2445      int64_t Adj = CVal < 0 ? -2048 : 2047;
2446      Base = SDValue(
2447          CurDAG->getMachineNode(RISCV::ADDI, DL, VT, Addr.getOperand(0),
2448                                 CurDAG->getTargetConstant(Adj, DL, VT)),
2449          0);
2450      Offset = CurDAG->getTargetConstant(CVal - Adj, DL, VT);
2451      return true;
2452    }
2453
2454    // For larger immediates, we might be able to save one instruction from
2455    // constant materialization by folding the Lo12 bits of the immediate into
2456    // the address. We should only do this if the ADD is only used by loads and
2457    // stores that can fold the lo12 bits. Otherwise, the ADD will get iseled
2458    // separately with the full materialized immediate creating extra
2459    // instructions.
2460    if (isWorthFoldingAdd(Addr) &&
2461        selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base,
2462                           Offset)) {
2463      // Insert an ADD instruction with the materialized Hi52 bits.
2464      Base = SDValue(
2465          CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base),
2466          0);
2467      return true;
2468    }
2469  }
2470
2471  if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset))
2472    return true;
2473
2474  Base = Addr;
2475  Offset = CurDAG->getTargetConstant(0, DL, VT);
2476  return true;
2477}
2478
2479/// Similar to SelectAddrRegImm, except that the least significant 5 bits of
2480/// Offset shoule be all zeros.
2481bool RISCVDAGToDAGISel::SelectAddrRegImmLsb00000(SDValue Addr, SDValue &Base,
2482                                                 SDValue &Offset) {
2483  if (SelectAddrFrameIndex(Addr, Base, Offset))
2484    return true;
2485
2486  SDLoc DL(Addr);
2487  MVT VT = Addr.getSimpleValueType();
2488
2489  if (CurDAG->isBaseWithConstantOffset(Addr)) {
2490    int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2491    if (isInt<12>(CVal)) {
2492      Base = Addr.getOperand(0);
2493
2494      // Early-out if not a valid offset.
2495      if ((CVal & 0b11111) != 0) {
2496        Base = Addr;
2497        Offset = CurDAG->getTargetConstant(0, DL, VT);
2498        return true;
2499      }
2500
2501      if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
2502        Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
2503      Offset = CurDAG->getTargetConstant(CVal, DL, VT);
2504      return true;
2505    }
2506  }
2507
2508  // Handle ADD with large immediates.
2509  if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) {
2510    int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2511    assert(!(isInt<12>(CVal) && isInt<12>(CVal)) &&
2512           "simm12 not already handled?");
2513
2514    // Handle immediates in the range [-4096,-2049] or [2017, 4065]. We can save
2515    // one instruction by folding adjustment (-2048 or 2016) into the address.
2516    if ((-2049 >= CVal && CVal >= -4096) || (4065 >= CVal && CVal >= 2017)) {
2517      int64_t Adj = CVal < 0 ? -2048 : 2016;
2518      int64_t AdjustedOffset = CVal - Adj;
2519      Base = SDValue(CurDAG->getMachineNode(
2520                         RISCV::ADDI, DL, VT, Addr.getOperand(0),
2521                         CurDAG->getTargetConstant(AdjustedOffset, DL, VT)),
2522                     0);
2523      Offset = CurDAG->getTargetConstant(Adj, DL, VT);
2524      return true;
2525    }
2526
2527    if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base,
2528                           Offset, true)) {
2529      // Insert an ADD instruction with the materialized Hi52 bits.
2530      Base = SDValue(
2531          CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base),
2532          0);
2533      return true;
2534    }
2535  }
2536
2537  if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset, true))
2538    return true;
2539
2540  Base = Addr;
2541  Offset = CurDAG->getTargetConstant(0, DL, VT);
2542  return true;
2543}
2544
2545bool RISCVDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth,
2546                                        SDValue &ShAmt) {
2547  ShAmt = N;
2548
2549  // Peek through zext.
2550  if (ShAmt->getOpcode() == ISD::ZERO_EXTEND)
2551    ShAmt = ShAmt.getOperand(0);
2552
2553  // Shift instructions on RISC-V only read the lower 5 or 6 bits of the shift
2554  // amount. If there is an AND on the shift amount, we can bypass it if it
2555  // doesn't affect any of those bits.
2556  if (ShAmt.getOpcode() == ISD::AND &&
2557      isa<ConstantSDNode>(ShAmt.getOperand(1))) {
2558    const APInt &AndMask = ShAmt.getConstantOperandAPInt(1);
2559
2560    // Since the max shift amount is a power of 2 we can subtract 1 to make a
2561    // mask that covers the bits needed to represent all shift amounts.
2562    assert(isPowerOf2_32(ShiftWidth) && "Unexpected max shift amount!");
2563    APInt ShMask(AndMask.getBitWidth(), ShiftWidth - 1);
2564
2565    if (ShMask.isSubsetOf(AndMask)) {
2566      ShAmt = ShAmt.getOperand(0);
2567    } else {
2568      // SimplifyDemandedBits may have optimized the mask so try restoring any
2569      // bits that are known zero.
2570      KnownBits Known = CurDAG->computeKnownBits(ShAmt.getOperand(0));
2571      if (!ShMask.isSubsetOf(AndMask | Known.Zero))
2572        return true;
2573      ShAmt = ShAmt.getOperand(0);
2574    }
2575  }
2576
2577  if (ShAmt.getOpcode() == ISD::ADD &&
2578      isa<ConstantSDNode>(ShAmt.getOperand(1))) {
2579    uint64_t Imm = ShAmt.getConstantOperandVal(1);
2580    // If we are shifting by X+N where N == 0 mod Size, then just shift by X
2581    // to avoid the ADD.
2582    if (Imm != 0 && Imm % ShiftWidth == 0) {
2583      ShAmt = ShAmt.getOperand(0);
2584      return true;
2585    }
2586  } else if (ShAmt.getOpcode() == ISD::SUB &&
2587             isa<ConstantSDNode>(ShAmt.getOperand(0))) {
2588    uint64_t Imm = ShAmt.getConstantOperandVal(0);
2589    // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
2590    // generate a NEG instead of a SUB of a constant.
2591    if (Imm != 0 && Imm % ShiftWidth == 0) {
2592      SDLoc DL(ShAmt);
2593      EVT VT = ShAmt.getValueType();
2594      SDValue Zero = CurDAG->getRegister(RISCV::X0, VT);
2595      unsigned NegOpc = VT == MVT::i64 ? RISCV::SUBW : RISCV::SUB;
2596      MachineSDNode *Neg = CurDAG->getMachineNode(NegOpc, DL, VT, Zero,
2597                                                  ShAmt.getOperand(1));
2598      ShAmt = SDValue(Neg, 0);
2599      return true;
2600    }
2601    // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
2602    // to generate a NOT instead of a SUB of a constant.
2603    if (Imm % ShiftWidth == ShiftWidth - 1) {
2604      SDLoc DL(ShAmt);
2605      EVT VT = ShAmt.getValueType();
2606      MachineSDNode *Not =
2607          CurDAG->getMachineNode(RISCV::XORI, DL, VT, ShAmt.getOperand(1),
2608                                 CurDAG->getTargetConstant(-1, DL, VT));
2609      ShAmt = SDValue(Not, 0);
2610      return true;
2611    }
2612  }
2613
2614  return true;
2615}
2616
2617/// RISC-V doesn't have general instructions for integer setne/seteq, but we can
2618/// check for equality with 0. This function emits instructions that convert the
2619/// seteq/setne into something that can be compared with 0.
2620/// \p ExpectedCCVal indicates the condition code to attempt to match (e.g.
2621/// ISD::SETNE).
2622bool RISCVDAGToDAGISel::selectSETCC(SDValue N, ISD::CondCode ExpectedCCVal,
2623                                    SDValue &Val) {
2624  assert(ISD::isIntEqualitySetCC(ExpectedCCVal) &&
2625         "Unexpected condition code!");
2626
2627  // We're looking for a setcc.
2628  if (N->getOpcode() != ISD::SETCC)
2629    return false;
2630
2631  // Must be an equality comparison.
2632  ISD::CondCode CCVal = cast<CondCodeSDNode>(N->getOperand(2))->get();
2633  if (CCVal != ExpectedCCVal)
2634    return false;
2635
2636  SDValue LHS = N->getOperand(0);
2637  SDValue RHS = N->getOperand(1);
2638
2639  if (!LHS.getValueType().isScalarInteger())
2640    return false;
2641
2642  // If the RHS side is 0, we don't need any extra instructions, return the LHS.
2643  if (isNullConstant(RHS)) {
2644    Val = LHS;
2645    return true;
2646  }
2647
2648  SDLoc DL(N);
2649
2650  if (auto *C = dyn_cast<ConstantSDNode>(RHS)) {
2651    int64_t CVal = C->getSExtValue();
2652    // If the RHS is -2048, we can use xori to produce 0 if the LHS is -2048 and
2653    // non-zero otherwise.
2654    if (CVal == -2048) {
2655      Val =
2656          SDValue(CurDAG->getMachineNode(
2657                      RISCV::XORI, DL, N->getValueType(0), LHS,
2658                      CurDAG->getTargetConstant(CVal, DL, N->getValueType(0))),
2659                  0);
2660      return true;
2661    }
2662    // If the RHS is [-2047,2048], we can use addi with -RHS to produce 0 if the
2663    // LHS is equal to the RHS and non-zero otherwise.
2664    if (isInt<12>(CVal) || CVal == 2048) {
2665      Val =
2666          SDValue(CurDAG->getMachineNode(
2667                      RISCV::ADDI, DL, N->getValueType(0), LHS,
2668                      CurDAG->getTargetConstant(-CVal, DL, N->getValueType(0))),
2669                  0);
2670      return true;
2671    }
2672  }
2673
2674  // If nothing else we can XOR the LHS and RHS to produce zero if they are
2675  // equal and a non-zero value if they aren't.
2676  Val = SDValue(
2677      CurDAG->getMachineNode(RISCV::XOR, DL, N->getValueType(0), LHS, RHS), 0);
2678  return true;
2679}
2680
2681bool RISCVDAGToDAGISel::selectSExtBits(SDValue N, unsigned Bits, SDValue &Val) {
2682  if (N.getOpcode() == ISD::SIGN_EXTEND_INREG &&
2683      cast<VTSDNode>(N.getOperand(1))->getVT().getSizeInBits() == Bits) {
2684    Val = N.getOperand(0);
2685    return true;
2686  }
2687
2688  auto UnwrapShlSra = [](SDValue N, unsigned ShiftAmt) {
2689    if (N.getOpcode() != ISD::SRA || !isa<ConstantSDNode>(N.getOperand(1)))
2690      return N;
2691
2692    SDValue N0 = N.getOperand(0);
2693    if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
2694        N.getConstantOperandVal(1) == ShiftAmt &&
2695        N0.getConstantOperandVal(1) == ShiftAmt)
2696      return N0.getOperand(0);
2697
2698    return N;
2699  };
2700
2701  MVT VT = N.getSimpleValueType();
2702  if (CurDAG->ComputeNumSignBits(N) > (VT.getSizeInBits() - Bits)) {
2703    Val = UnwrapShlSra(N, VT.getSizeInBits() - Bits);
2704    return true;
2705  }
2706
2707  return false;
2708}
2709
2710bool RISCVDAGToDAGISel::selectZExtBits(SDValue N, unsigned Bits, SDValue &Val) {
2711  if (N.getOpcode() == ISD::AND) {
2712    auto *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
2713    if (C && C->getZExtValue() == maskTrailingOnes<uint64_t>(Bits)) {
2714      Val = N.getOperand(0);
2715      return true;
2716    }
2717  }
2718  MVT VT = N.getSimpleValueType();
2719  APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), Bits);
2720  if (CurDAG->MaskedValueIsZero(N, Mask)) {
2721    Val = N;
2722    return true;
2723  }
2724
2725  return false;
2726}
2727
2728/// Look for various patterns that can be done with a SHL that can be folded
2729/// into a SHXADD. \p ShAmt contains 1, 2, or 3 and is set based on which
2730/// SHXADD we are trying to match.
2731bool RISCVDAGToDAGISel::selectSHXADDOp(SDValue N, unsigned ShAmt,
2732                                       SDValue &Val) {
2733  if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) {
2734    SDValue N0 = N.getOperand(0);
2735
2736    bool LeftShift = N0.getOpcode() == ISD::SHL;
2737    if ((LeftShift || N0.getOpcode() == ISD::SRL) &&
2738        isa<ConstantSDNode>(N0.getOperand(1))) {
2739      uint64_t Mask = N.getConstantOperandVal(1);
2740      unsigned C2 = N0.getConstantOperandVal(1);
2741
2742      unsigned XLen = Subtarget->getXLen();
2743      if (LeftShift)
2744        Mask &= maskTrailingZeros<uint64_t>(C2);
2745      else
2746        Mask &= maskTrailingOnes<uint64_t>(XLen - C2);
2747
2748      // Look for (and (shl y, c2), c1) where c1 is a shifted mask with no
2749      // leading zeros and c3 trailing zeros. We can use an SRLI by c2+c3
2750      // followed by a SHXADD with c3 for the X amount.
2751      if (isShiftedMask_64(Mask)) {
2752        unsigned Leading = XLen - llvm::bit_width(Mask);
2753        unsigned Trailing = llvm::countr_zero(Mask);
2754        if (LeftShift && Leading == 0 && C2 < Trailing && Trailing == ShAmt) {
2755          SDLoc DL(N);
2756          EVT VT = N.getValueType();
2757          Val = SDValue(CurDAG->getMachineNode(
2758                            RISCV::SRLI, DL, VT, N0.getOperand(0),
2759                            CurDAG->getTargetConstant(Trailing - C2, DL, VT)),
2760                        0);
2761          return true;
2762        }
2763        // Look for (and (shr y, c2), c1) where c1 is a shifted mask with c2
2764        // leading zeros and c3 trailing zeros. We can use an SRLI by C3
2765        // followed by a SHXADD using c3 for the X amount.
2766        if (!LeftShift && Leading == C2 && Trailing == ShAmt) {
2767          SDLoc DL(N);
2768          EVT VT = N.getValueType();
2769          Val = SDValue(
2770              CurDAG->getMachineNode(
2771                  RISCV::SRLI, DL, VT, N0.getOperand(0),
2772                  CurDAG->getTargetConstant(Leading + Trailing, DL, VT)),
2773              0);
2774          return true;
2775        }
2776      }
2777    }
2778  }
2779
2780  bool LeftShift = N.getOpcode() == ISD::SHL;
2781  if ((LeftShift || N.getOpcode() == ISD::SRL) &&
2782      isa<ConstantSDNode>(N.getOperand(1))) {
2783    SDValue N0 = N.getOperand(0);
2784    if (N0.getOpcode() == ISD::AND && N0.hasOneUse() &&
2785        isa<ConstantSDNode>(N0.getOperand(1))) {
2786      uint64_t Mask = N0.getConstantOperandVal(1);
2787      if (isShiftedMask_64(Mask)) {
2788        unsigned C1 = N.getConstantOperandVal(1);
2789        unsigned XLen = Subtarget->getXLen();
2790        unsigned Leading = XLen - llvm::bit_width(Mask);
2791        unsigned Trailing = llvm::countr_zero(Mask);
2792        // Look for (shl (and X, Mask), C1) where Mask has 32 leading zeros and
2793        // C3 trailing zeros. If C1+C3==ShAmt we can use SRLIW+SHXADD.
2794        if (LeftShift && Leading == 32 && Trailing > 0 &&
2795            (Trailing + C1) == ShAmt) {
2796          SDLoc DL(N);
2797          EVT VT = N.getValueType();
2798          Val = SDValue(CurDAG->getMachineNode(
2799                            RISCV::SRLIW, DL, VT, N0.getOperand(0),
2800                            CurDAG->getTargetConstant(Trailing, DL, VT)),
2801                        0);
2802          return true;
2803        }
2804        // Look for (srl (and X, Mask), C1) where Mask has 32 leading zeros and
2805        // C3 trailing zeros. If C3-C1==ShAmt we can use SRLIW+SHXADD.
2806        if (!LeftShift && Leading == 32 && Trailing > C1 &&
2807            (Trailing - C1) == ShAmt) {
2808          SDLoc DL(N);
2809          EVT VT = N.getValueType();
2810          Val = SDValue(CurDAG->getMachineNode(
2811                            RISCV::SRLIW, DL, VT, N0.getOperand(0),
2812                            CurDAG->getTargetConstant(Trailing, DL, VT)),
2813                        0);
2814          return true;
2815        }
2816      }
2817    }
2818  }
2819
2820  return false;
2821}
2822
2823/// Look for various patterns that can be done with a SHL that can be folded
2824/// into a SHXADD_UW. \p ShAmt contains 1, 2, or 3 and is set based on which
2825/// SHXADD_UW we are trying to match.
2826bool RISCVDAGToDAGISel::selectSHXADD_UWOp(SDValue N, unsigned ShAmt,
2827                                          SDValue &Val) {
2828  if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1)) &&
2829      N.hasOneUse()) {
2830    SDValue N0 = N.getOperand(0);
2831    if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
2832        N0.hasOneUse()) {
2833      uint64_t Mask = N.getConstantOperandVal(1);
2834      unsigned C2 = N0.getConstantOperandVal(1);
2835
2836      Mask &= maskTrailingZeros<uint64_t>(C2);
2837
2838      // Look for (and (shl y, c2), c1) where c1 is a shifted mask with
2839      // 32-ShAmt leading zeros and c2 trailing zeros. We can use SLLI by
2840      // c2-ShAmt followed by SHXADD_UW with ShAmt for the X amount.
2841      if (isShiftedMask_64(Mask)) {
2842        unsigned Leading = llvm::countl_zero(Mask);
2843        unsigned Trailing = llvm::countr_zero(Mask);
2844        if (Leading == 32 - ShAmt && Trailing == C2 && Trailing > ShAmt) {
2845          SDLoc DL(N);
2846          EVT VT = N.getValueType();
2847          Val = SDValue(CurDAG->getMachineNode(
2848                            RISCV::SLLI, DL, VT, N0.getOperand(0),
2849                            CurDAG->getTargetConstant(C2 - ShAmt, DL, VT)),
2850                        0);
2851          return true;
2852        }
2853      }
2854    }
2855  }
2856
2857  return false;
2858}
2859
2860static bool vectorPseudoHasAllNBitUsers(SDNode *User, unsigned UserOpNo,
2861                                        unsigned Bits,
2862                                        const TargetInstrInfo *TII) {
2863  unsigned MCOpcode = RISCV::getRVVMCOpcode(User->getMachineOpcode());
2864
2865  if (!MCOpcode)
2866    return false;
2867
2868  const MCInstrDesc &MCID = TII->get(User->getMachineOpcode());
2869  const uint64_t TSFlags = MCID.TSFlags;
2870  if (!RISCVII::hasSEWOp(TSFlags))
2871    return false;
2872  assert(RISCVII::hasVLOp(TSFlags));
2873
2874  bool HasGlueOp = User->getGluedNode() != nullptr;
2875  unsigned ChainOpIdx = User->getNumOperands() - HasGlueOp - 1;
2876  bool HasChainOp = User->getOperand(ChainOpIdx).getValueType() == MVT::Other;
2877  bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TSFlags);
2878  unsigned VLIdx =
2879      User->getNumOperands() - HasVecPolicyOp - HasChainOp - HasGlueOp - 2;
2880  const unsigned Log2SEW = User->getConstantOperandVal(VLIdx + 1);
2881
2882  if (UserOpNo == VLIdx)
2883    return false;
2884
2885  auto NumDemandedBits =
2886      RISCV::getVectorLowDemandedScalarBits(MCOpcode, Log2SEW);
2887  return NumDemandedBits && Bits >= *NumDemandedBits;
2888}
2889
2890// Return true if all users of this SDNode* only consume the lower \p Bits.
2891// This can be used to form W instructions for add/sub/mul/shl even when the
2892// root isn't a sext_inreg. This can allow the ADDW/SUBW/MULW/SLLIW to CSE if
2893// SimplifyDemandedBits has made it so some users see a sext_inreg and some
2894// don't. The sext_inreg+add/sub/mul/shl will get selected, but still leave
2895// the add/sub/mul/shl to become non-W instructions. By checking the users we
2896// may be able to use a W instruction and CSE with the other instruction if
2897// this has happened. We could try to detect that the CSE opportunity exists
2898// before doing this, but that would be more complicated.
2899bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits,
2900                                        const unsigned Depth) const {
2901  assert((Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB ||
2902          Node->getOpcode() == ISD::MUL || Node->getOpcode() == ISD::SHL ||
2903          Node->getOpcode() == ISD::SRL || Node->getOpcode() == ISD::AND ||
2904          Node->getOpcode() == ISD::OR || Node->getOpcode() == ISD::XOR ||
2905          Node->getOpcode() == ISD::SIGN_EXTEND_INREG ||
2906          isa<ConstantSDNode>(Node) || Depth != 0) &&
2907         "Unexpected opcode");
2908
2909  if (Depth >= SelectionDAG::MaxRecursionDepth)
2910    return false;
2911
2912  // The PatFrags that call this may run before RISCVGenDAGISel.inc has checked
2913  // the VT. Ensure the type is scalar to avoid wasting time on vectors.
2914  if (Depth == 0 && !Node->getValueType(0).isScalarInteger())
2915    return false;
2916
2917  for (auto UI = Node->use_begin(), UE = Node->use_end(); UI != UE; ++UI) {
2918    SDNode *User = *UI;
2919    // Users of this node should have already been instruction selected
2920    if (!User->isMachineOpcode())
2921      return false;
2922
2923    // TODO: Add more opcodes?
2924    switch (User->getMachineOpcode()) {
2925    default:
2926      if (vectorPseudoHasAllNBitUsers(User, UI.getOperandNo(), Bits, TII))
2927        break;
2928      return false;
2929    case RISCV::ADDW:
2930    case RISCV::ADDIW:
2931    case RISCV::SUBW:
2932    case RISCV::MULW:
2933    case RISCV::SLLW:
2934    case RISCV::SLLIW:
2935    case RISCV::SRAW:
2936    case RISCV::SRAIW:
2937    case RISCV::SRLW:
2938    case RISCV::SRLIW:
2939    case RISCV::DIVW:
2940    case RISCV::DIVUW:
2941    case RISCV::REMW:
2942    case RISCV::REMUW:
2943    case RISCV::ROLW:
2944    case RISCV::RORW:
2945    case RISCV::RORIW:
2946    case RISCV::CLZW:
2947    case RISCV::CTZW:
2948    case RISCV::CPOPW:
2949    case RISCV::SLLI_UW:
2950    case RISCV::FMV_W_X:
2951    case RISCV::FCVT_H_W:
2952    case RISCV::FCVT_H_WU:
2953    case RISCV::FCVT_S_W:
2954    case RISCV::FCVT_S_WU:
2955    case RISCV::FCVT_D_W:
2956    case RISCV::FCVT_D_WU:
2957    case RISCV::TH_REVW:
2958    case RISCV::TH_SRRIW:
2959      if (Bits < 32)
2960        return false;
2961      break;
2962    case RISCV::SLL:
2963    case RISCV::SRA:
2964    case RISCV::SRL:
2965    case RISCV::ROL:
2966    case RISCV::ROR:
2967    case RISCV::BSET:
2968    case RISCV::BCLR:
2969    case RISCV::BINV:
2970      // Shift amount operands only use log2(Xlen) bits.
2971      if (UI.getOperandNo() != 1 || Bits < Log2_32(Subtarget->getXLen()))
2972        return false;
2973      break;
2974    case RISCV::SLLI:
2975      // SLLI only uses the lower (XLen - ShAmt) bits.
2976      if (Bits < Subtarget->getXLen() - User->getConstantOperandVal(1))
2977        return false;
2978      break;
2979    case RISCV::ANDI:
2980      if (Bits >= (unsigned)llvm::bit_width(User->getConstantOperandVal(1)))
2981        break;
2982      goto RecCheck;
2983    case RISCV::ORI: {
2984      uint64_t Imm = cast<ConstantSDNode>(User->getOperand(1))->getSExtValue();
2985      if (Bits >= (unsigned)llvm::bit_width<uint64_t>(~Imm))
2986        break;
2987      [[fallthrough]];
2988    }
2989    case RISCV::AND:
2990    case RISCV::OR:
2991    case RISCV::XOR:
2992    case RISCV::XORI:
2993    case RISCV::ANDN:
2994    case RISCV::ORN:
2995    case RISCV::XNOR:
2996    case RISCV::SH1ADD:
2997    case RISCV::SH2ADD:
2998    case RISCV::SH3ADD:
2999    RecCheck:
3000      if (hasAllNBitUsers(User, Bits, Depth + 1))
3001        break;
3002      return false;
3003    case RISCV::SRLI: {
3004      unsigned ShAmt = User->getConstantOperandVal(1);
3005      // If we are shifting right by less than Bits, and users don't demand any
3006      // bits that were shifted into [Bits-1:0], then we can consider this as an
3007      // N-Bit user.
3008      if (Bits > ShAmt && hasAllNBitUsers(User, Bits - ShAmt, Depth + 1))
3009        break;
3010      return false;
3011    }
3012    case RISCV::SEXT_B:
3013    case RISCV::PACKH:
3014      if (Bits < 8)
3015        return false;
3016      break;
3017    case RISCV::SEXT_H:
3018    case RISCV::FMV_H_X:
3019    case RISCV::ZEXT_H_RV32:
3020    case RISCV::ZEXT_H_RV64:
3021    case RISCV::PACKW:
3022      if (Bits < 16)
3023        return false;
3024      break;
3025    case RISCV::PACK:
3026      if (Bits < (Subtarget->getXLen() / 2))
3027        return false;
3028      break;
3029    case RISCV::ADD_UW:
3030    case RISCV::SH1ADD_UW:
3031    case RISCV::SH2ADD_UW:
3032    case RISCV::SH3ADD_UW:
3033      // The first operand to add.uw/shXadd.uw is implicitly zero extended from
3034      // 32 bits.
3035      if (UI.getOperandNo() != 0 || Bits < 32)
3036        return false;
3037      break;
3038    case RISCV::SB:
3039      if (UI.getOperandNo() != 0 || Bits < 8)
3040        return false;
3041      break;
3042    case RISCV::SH:
3043      if (UI.getOperandNo() != 0 || Bits < 16)
3044        return false;
3045      break;
3046    case RISCV::SW:
3047      if (UI.getOperandNo() != 0 || Bits < 32)
3048        return false;
3049      break;
3050    }
3051  }
3052
3053  return true;
3054}
3055
3056// Select a constant that can be represented as (sign_extend(imm5) << imm2).
3057bool RISCVDAGToDAGISel::selectSimm5Shl2(SDValue N, SDValue &Simm5,
3058                                        SDValue &Shl2) {
3059  if (auto *C = dyn_cast<ConstantSDNode>(N)) {
3060    int64_t Offset = C->getSExtValue();
3061    int64_t Shift;
3062    for (Shift = 0; Shift < 4; Shift++)
3063      if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0))
3064        break;
3065
3066    // Constant cannot be encoded.
3067    if (Shift == 4)
3068      return false;
3069
3070    EVT Ty = N->getValueType(0);
3071    Simm5 = CurDAG->getTargetConstant(Offset >> Shift, SDLoc(N), Ty);
3072    Shl2 = CurDAG->getTargetConstant(Shift, SDLoc(N), Ty);
3073    return true;
3074  }
3075
3076  return false;
3077}
3078
3079// Select VL as a 5 bit immediate or a value that will become a register. This
3080// allows us to choose betwen VSETIVLI or VSETVLI later.
3081bool RISCVDAGToDAGISel::selectVLOp(SDValue N, SDValue &VL) {
3082  auto *C = dyn_cast<ConstantSDNode>(N);
3083  if (C && isUInt<5>(C->getZExtValue())) {
3084    VL = CurDAG->getTargetConstant(C->getZExtValue(), SDLoc(N),
3085                                   N->getValueType(0));
3086  } else if (C && C->isAllOnes()) {
3087    // Treat all ones as VLMax.
3088    VL = CurDAG->getTargetConstant(RISCV::VLMaxSentinel, SDLoc(N),
3089                                   N->getValueType(0));
3090  } else if (isa<RegisterSDNode>(N) &&
3091             cast<RegisterSDNode>(N)->getReg() == RISCV::X0) {
3092    // All our VL operands use an operand that allows GPRNoX0 or an immediate
3093    // as the register class. Convert X0 to a special immediate to pass the
3094    // MachineVerifier. This is recognized specially by the vsetvli insertion
3095    // pass.
3096    VL = CurDAG->getTargetConstant(RISCV::VLMaxSentinel, SDLoc(N),
3097                                   N->getValueType(0));
3098  } else {
3099    VL = N;
3100  }
3101
3102  return true;
3103}
3104
3105static SDValue findVSplat(SDValue N) {
3106  if (N.getOpcode() == ISD::INSERT_SUBVECTOR) {
3107    if (!N.getOperand(0).isUndef())
3108      return SDValue();
3109    N = N.getOperand(1);
3110  }
3111  SDValue Splat = N;
3112  if ((Splat.getOpcode() != RISCVISD::VMV_V_X_VL &&
3113       Splat.getOpcode() != RISCVISD::VMV_S_X_VL) ||
3114      !Splat.getOperand(0).isUndef())
3115    return SDValue();
3116  assert(Splat.getNumOperands() == 3 && "Unexpected number of operands");
3117  return Splat;
3118}
3119
3120bool RISCVDAGToDAGISel::selectVSplat(SDValue N, SDValue &SplatVal) {
3121  SDValue Splat = findVSplat(N);
3122  if (!Splat)
3123    return false;
3124
3125  SplatVal = Splat.getOperand(1);
3126  return true;
3127}
3128
3129static bool selectVSplatImmHelper(SDValue N, SDValue &SplatVal,
3130                                  SelectionDAG &DAG,
3131                                  const RISCVSubtarget &Subtarget,
3132                                  std::function<bool(int64_t)> ValidateImm) {
3133  SDValue Splat = findVSplat(N);
3134  if (!Splat || !isa<ConstantSDNode>(Splat.getOperand(1)))
3135    return false;
3136
3137  const unsigned SplatEltSize = Splat.getScalarValueSizeInBits();
3138  assert(Subtarget.getXLenVT() == Splat.getOperand(1).getSimpleValueType() &&
3139         "Unexpected splat operand type");
3140
3141  // The semantics of RISCVISD::VMV_V_X_VL is that when the operand
3142  // type is wider than the resulting vector element type: an implicit
3143  // truncation first takes place. Therefore, perform a manual
3144  // truncation/sign-extension in order to ignore any truncated bits and catch
3145  // any zero-extended immediate.
3146  // For example, we wish to match (i8 -1) -> (XLenVT 255) as a simm5 by first
3147  // sign-extending to (XLenVT -1).
3148  APInt SplatConst = Splat.getConstantOperandAPInt(1).sextOrTrunc(SplatEltSize);
3149
3150  int64_t SplatImm = SplatConst.getSExtValue();
3151
3152  if (!ValidateImm(SplatImm))
3153    return false;
3154
3155  SplatVal = DAG.getTargetConstant(SplatImm, SDLoc(N), Subtarget.getXLenVT());
3156  return true;
3157}
3158
3159bool RISCVDAGToDAGISel::selectVSplatSimm5(SDValue N, SDValue &SplatVal) {
3160  return selectVSplatImmHelper(N, SplatVal, *CurDAG, *Subtarget,
3161                               [](int64_t Imm) { return isInt<5>(Imm); });
3162}
3163
3164bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1(SDValue N, SDValue &SplatVal) {
3165  return selectVSplatImmHelper(
3166      N, SplatVal, *CurDAG, *Subtarget,
3167      [](int64_t Imm) { return (isInt<5>(Imm) && Imm != -16) || Imm == 16; });
3168}
3169
3170bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1NonZero(SDValue N,
3171                                                      SDValue &SplatVal) {
3172  return selectVSplatImmHelper(
3173      N, SplatVal, *CurDAG, *Subtarget, [](int64_t Imm) {
3174        return Imm != 0 && ((isInt<5>(Imm) && Imm != -16) || Imm == 16);
3175      });
3176}
3177
3178bool RISCVDAGToDAGISel::selectVSplatUimm(SDValue N, unsigned Bits,
3179                                         SDValue &SplatVal) {
3180  return selectVSplatImmHelper(
3181      N, SplatVal, *CurDAG, *Subtarget,
3182      [Bits](int64_t Imm) { return isUIntN(Bits, Imm); });
3183}
3184
3185bool RISCVDAGToDAGISel::selectLow8BitsVSplat(SDValue N, SDValue &SplatVal) {
3186  // Truncates are custom lowered during legalization.
3187  auto IsTrunc = [this](SDValue N) {
3188    if (N->getOpcode() != RISCVISD::TRUNCATE_VECTOR_VL)
3189      return false;
3190    SDValue VL;
3191    selectVLOp(N->getOperand(2), VL);
3192    // Any vmset_vl is ok, since any bits past VL are undefined and we can
3193    // assume they are set.
3194    return N->getOperand(1).getOpcode() == RISCVISD::VMSET_VL &&
3195           isa<ConstantSDNode>(VL) &&
3196           cast<ConstantSDNode>(VL)->getSExtValue() == RISCV::VLMaxSentinel;
3197  };
3198
3199  // We can have multiple nested truncates, so unravel them all if needed.
3200  while (N->getOpcode() == ISD::SIGN_EXTEND ||
3201         N->getOpcode() == ISD::ZERO_EXTEND || IsTrunc(N)) {
3202    if (!N.hasOneUse() ||
3203        N.getValueType().getSizeInBits().getKnownMinValue() < 8)
3204      return false;
3205    N = N->getOperand(0);
3206  }
3207
3208  return selectVSplat(N, SplatVal);
3209}
3210
3211bool RISCVDAGToDAGISel::selectFPImm(SDValue N, SDValue &Imm) {
3212  ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N.getNode());
3213  if (!CFP)
3214    return false;
3215  const APFloat &APF = CFP->getValueAPF();
3216  // td can handle +0.0 already.
3217  if (APF.isPosZero())
3218    return false;
3219
3220  MVT VT = CFP->getSimpleValueType(0);
3221
3222  // Even if this FPImm requires an additional FNEG (i.e. the second element of
3223  // the returned pair is true) we still prefer FLI + FNEG over immediate
3224  // materialization as the latter might generate a longer instruction sequence.
3225  if (static_cast<const RISCVTargetLowering *>(TLI)
3226          ->getLegalZfaFPImm(APF, VT)
3227          .first >= 0)
3228    return false;
3229
3230  MVT XLenVT = Subtarget->getXLenVT();
3231  if (VT == MVT::f64 && !Subtarget->is64Bit()) {
3232    assert(APF.isNegZero() && "Unexpected constant.");
3233    return false;
3234  }
3235  SDLoc DL(N);
3236  Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(),
3237                  *Subtarget);
3238  return true;
3239}
3240
3241bool RISCVDAGToDAGISel::selectRVVSimm5(SDValue N, unsigned Width,
3242                                       SDValue &Imm) {
3243  if (auto *C = dyn_cast<ConstantSDNode>(N)) {
3244    int64_t ImmVal = SignExtend64(C->getSExtValue(), Width);
3245
3246    if (!isInt<5>(ImmVal))
3247      return false;
3248
3249    Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), Subtarget->getXLenVT());
3250    return true;
3251  }
3252
3253  return false;
3254}
3255
3256// Try to remove sext.w if the input is a W instruction or can be made into
3257// a W instruction cheaply.
3258bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) {
3259  // Look for the sext.w pattern, addiw rd, rs1, 0.
3260  if (N->getMachineOpcode() != RISCV::ADDIW ||
3261      !isNullConstant(N->getOperand(1)))
3262    return false;
3263
3264  SDValue N0 = N->getOperand(0);
3265  if (!N0.isMachineOpcode())
3266    return false;
3267
3268  switch (N0.getMachineOpcode()) {
3269  default:
3270    break;
3271  case RISCV::ADD:
3272  case RISCV::ADDI:
3273  case RISCV::SUB:
3274  case RISCV::MUL:
3275  case RISCV::SLLI: {
3276    // Convert sext.w+add/sub/mul to their W instructions. This will create
3277    // a new independent instruction. This improves latency.
3278    unsigned Opc;
3279    switch (N0.getMachineOpcode()) {
3280    default:
3281      llvm_unreachable("Unexpected opcode!");
3282    case RISCV::ADD:  Opc = RISCV::ADDW;  break;
3283    case RISCV::ADDI: Opc = RISCV::ADDIW; break;
3284    case RISCV::SUB:  Opc = RISCV::SUBW;  break;
3285    case RISCV::MUL:  Opc = RISCV::MULW;  break;
3286    case RISCV::SLLI: Opc = RISCV::SLLIW; break;
3287    }
3288
3289    SDValue N00 = N0.getOperand(0);
3290    SDValue N01 = N0.getOperand(1);
3291
3292    // Shift amount needs to be uimm5.
3293    if (N0.getMachineOpcode() == RISCV::SLLI &&
3294        !isUInt<5>(cast<ConstantSDNode>(N01)->getSExtValue()))
3295      break;
3296
3297    SDNode *Result =
3298        CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
3299                               N00, N01);
3300    ReplaceUses(N, Result);
3301    return true;
3302  }
3303  case RISCV::ADDW:
3304  case RISCV::ADDIW:
3305  case RISCV::SUBW:
3306  case RISCV::MULW:
3307  case RISCV::SLLIW:
3308  case RISCV::PACKW:
3309  case RISCV::TH_MULAW:
3310  case RISCV::TH_MULAH:
3311  case RISCV::TH_MULSW:
3312  case RISCV::TH_MULSH:
3313    if (N0.getValueType() == MVT::i32)
3314      break;
3315
3316    // Result is already sign extended just remove the sext.w.
3317    // NOTE: We only handle the nodes that are selected with hasAllWUsers.
3318    ReplaceUses(N, N0.getNode());
3319    return true;
3320  }
3321
3322  return false;
3323}
3324
3325static bool usesAllOnesMask(SDValue MaskOp, SDValue GlueOp) {
3326  // Check that we're using V0 as a mask register.
3327  if (!isa<RegisterSDNode>(MaskOp) ||
3328      cast<RegisterSDNode>(MaskOp)->getReg() != RISCV::V0)
3329    return false;
3330
3331  // The glued user defines V0.
3332  const auto *Glued = GlueOp.getNode();
3333
3334  if (!Glued || Glued->getOpcode() != ISD::CopyToReg)
3335    return false;
3336
3337  // Check that we're defining V0 as a mask register.
3338  if (!isa<RegisterSDNode>(Glued->getOperand(1)) ||
3339      cast<RegisterSDNode>(Glued->getOperand(1))->getReg() != RISCV::V0)
3340    return false;
3341
3342  // Check the instruction defining V0; it needs to be a VMSET pseudo.
3343  SDValue MaskSetter = Glued->getOperand(2);
3344
3345  // Sometimes the VMSET is wrapped in a COPY_TO_REGCLASS, e.g. if the mask came
3346  // from an extract_subvector or insert_subvector.
3347  if (MaskSetter->isMachineOpcode() &&
3348      MaskSetter->getMachineOpcode() == RISCV::COPY_TO_REGCLASS)
3349    MaskSetter = MaskSetter->getOperand(0);
3350
3351  const auto IsVMSet = [](unsigned Opc) {
3352    return Opc == RISCV::PseudoVMSET_M_B1 || Opc == RISCV::PseudoVMSET_M_B16 ||
3353           Opc == RISCV::PseudoVMSET_M_B2 || Opc == RISCV::PseudoVMSET_M_B32 ||
3354           Opc == RISCV::PseudoVMSET_M_B4 || Opc == RISCV::PseudoVMSET_M_B64 ||
3355           Opc == RISCV::PseudoVMSET_M_B8;
3356  };
3357
3358  // TODO: Check that the VMSET is the expected bitwidth? The pseudo has
3359  // undefined behaviour if it's the wrong bitwidth, so we could choose to
3360  // assume that it's all-ones? Same applies to its VL.
3361  return MaskSetter->isMachineOpcode() &&
3362         IsVMSet(MaskSetter.getMachineOpcode());
3363}
3364
3365// Return true if we can make sure mask of N is all-ones mask.
3366static bool usesAllOnesMask(SDNode *N, unsigned MaskOpIdx) {
3367  return usesAllOnesMask(N->getOperand(MaskOpIdx),
3368                         N->getOperand(N->getNumOperands() - 1));
3369}
3370
3371static bool isImplicitDef(SDValue V) {
3372  return V.isMachineOpcode() &&
3373         V.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF;
3374}
3375
3376// Optimize masked RVV pseudo instructions with a known all-ones mask to their
3377// corresponding "unmasked" pseudo versions. The mask we're interested in will
3378// take the form of a V0 physical register operand, with a glued
3379// register-setting instruction.
3380bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(MachineSDNode *N) {
3381  const RISCV::RISCVMaskedPseudoInfo *I =
3382      RISCV::getMaskedPseudoInfo(N->getMachineOpcode());
3383  if (!I)
3384    return false;
3385
3386  unsigned MaskOpIdx = I->MaskOpIdx;
3387  if (!usesAllOnesMask(N, MaskOpIdx))
3388    return false;
3389
3390  // There are two classes of pseudos in the table - compares and
3391  // everything else.  See the comment on RISCVMaskedPseudo for details.
3392  const unsigned Opc = I->UnmaskedPseudo;
3393  const MCInstrDesc &MCID = TII->get(Opc);
3394  const bool UseTUPseudo = RISCVII::hasVecPolicyOp(MCID.TSFlags);
3395#ifndef NDEBUG
3396  const MCInstrDesc &MaskedMCID = TII->get(N->getMachineOpcode());
3397  assert(RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags) ==
3398         RISCVII::hasVecPolicyOp(MCID.TSFlags) &&
3399         "Masked and unmasked pseudos are inconsistent");
3400  const bool HasTiedDest = RISCVII::isFirstDefTiedToFirstUse(MCID);
3401  assert(UseTUPseudo == HasTiedDest && "Unexpected pseudo structure");
3402#endif
3403
3404  SmallVector<SDValue, 8> Ops;
3405  // Skip the merge operand at index 0 if !UseTUPseudo.
3406  for (unsigned I = !UseTUPseudo, E = N->getNumOperands(); I != E; I++) {
3407    // Skip the mask, and the Glue.
3408    SDValue Op = N->getOperand(I);
3409    if (I == MaskOpIdx || Op.getValueType() == MVT::Glue)
3410      continue;
3411    Ops.push_back(Op);
3412  }
3413
3414  // Transitively apply any node glued to our new node.
3415  const auto *Glued = N->getGluedNode();
3416  if (auto *TGlued = Glued->getGluedNode())
3417    Ops.push_back(SDValue(TGlued, TGlued->getNumValues() - 1));
3418
3419  MachineSDNode *Result =
3420      CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
3421
3422  if (!N->memoperands_empty())
3423    CurDAG->setNodeMemRefs(Result, N->memoperands());
3424
3425  Result->setFlags(N->getFlags());
3426  ReplaceUses(N, Result);
3427
3428  return true;
3429}
3430
3431static bool IsVMerge(SDNode *N) {
3432  return RISCV::getRVVMCOpcode(N->getMachineOpcode()) == RISCV::VMERGE_VVM;
3433}
3434
3435static bool IsVMv(SDNode *N) {
3436  return RISCV::getRVVMCOpcode(N->getMachineOpcode()) == RISCV::VMV_V_V;
3437}
3438
3439static unsigned GetVMSetForLMul(RISCVII::VLMUL LMUL) {
3440  switch (LMUL) {
3441  case RISCVII::LMUL_F8:
3442    return RISCV::PseudoVMSET_M_B1;
3443  case RISCVII::LMUL_F4:
3444    return RISCV::PseudoVMSET_M_B2;
3445  case RISCVII::LMUL_F2:
3446    return RISCV::PseudoVMSET_M_B4;
3447  case RISCVII::LMUL_1:
3448    return RISCV::PseudoVMSET_M_B8;
3449  case RISCVII::LMUL_2:
3450    return RISCV::PseudoVMSET_M_B16;
3451  case RISCVII::LMUL_4:
3452    return RISCV::PseudoVMSET_M_B32;
3453  case RISCVII::LMUL_8:
3454    return RISCV::PseudoVMSET_M_B64;
3455  case RISCVII::LMUL_RESERVED:
3456    llvm_unreachable("Unexpected LMUL");
3457  }
3458  llvm_unreachable("Unknown VLMUL enum");
3459}
3460
3461// Try to fold away VMERGE_VVM instructions. We handle these cases:
3462// -Masked TU VMERGE_VVM combined with an unmasked TA instruction instruction
3463//  folds to a masked TU instruction. VMERGE_VVM must have have merge operand
3464//  same as false operand.
3465// -Masked TA VMERGE_VVM combined with an unmasked TA instruction fold to a
3466//  masked TA instruction.
3467// -Unmasked TU VMERGE_VVM combined with a masked MU TA instruction folds to
3468//  masked TU instruction. Both instructions must have the same merge operand.
3469//  VMERGE_VVM must have have merge operand same as false operand.
3470// Note: The VMERGE_VVM forms above (TA, and TU) refer to the policy implied,
3471// not the pseudo name.  That is, a TA VMERGE_VVM can be either the _TU pseudo
3472// form with an IMPLICIT_DEF passthrough operand or the unsuffixed (TA) pseudo
3473// form.
3474bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) {
3475  SDValue Merge, False, True, VL, Mask, Glue;
3476  // A vmv.v.v is equivalent to a vmerge with an all-ones mask.
3477  if (IsVMv(N)) {
3478    Merge = N->getOperand(0);
3479    False = N->getOperand(0);
3480    True = N->getOperand(1);
3481    VL = N->getOperand(2);
3482    // A vmv.v.v won't have a Mask or Glue, instead we'll construct an all-ones
3483    // mask later below.
3484  } else {
3485    assert(IsVMerge(N));
3486    Merge = N->getOperand(0);
3487    False = N->getOperand(1);
3488    True = N->getOperand(2);
3489    Mask = N->getOperand(3);
3490    VL = N->getOperand(4);
3491    // We always have a glue node for the mask at v0.
3492    Glue = N->getOperand(N->getNumOperands() - 1);
3493  }
3494  assert(!Mask || cast<RegisterSDNode>(Mask)->getReg() == RISCV::V0);
3495  assert(!Glue || Glue.getValueType() == MVT::Glue);
3496
3497  // We require that either merge and false are the same, or that merge
3498  // is undefined.
3499  if (Merge != False && !isImplicitDef(Merge))
3500    return false;
3501
3502  assert(True.getResNo() == 0 &&
3503         "Expect True is the first output of an instruction.");
3504
3505  // Need N is the exactly one using True.
3506  if (!True.hasOneUse())
3507    return false;
3508
3509  if (!True.isMachineOpcode())
3510    return false;
3511
3512  unsigned TrueOpc = True.getMachineOpcode();
3513  const MCInstrDesc &TrueMCID = TII->get(TrueOpc);
3514  uint64_t TrueTSFlags = TrueMCID.TSFlags;
3515  bool HasTiedDest = RISCVII::isFirstDefTiedToFirstUse(TrueMCID);
3516
3517  bool IsMasked = false;
3518  const RISCV::RISCVMaskedPseudoInfo *Info =
3519      RISCV::lookupMaskedIntrinsicByUnmasked(TrueOpc);
3520  if (!Info && HasTiedDest) {
3521    Info = RISCV::getMaskedPseudoInfo(TrueOpc);
3522    IsMasked = true;
3523  }
3524
3525  if (!Info)
3526    return false;
3527
3528  // When Mask is not a true mask, this transformation is illegal for some
3529  // operations whose results are affected by mask, like viota.m.
3530  if (Info->MaskAffectsResult && Mask && !usesAllOnesMask(Mask, Glue))
3531    return false;
3532
3533  if (HasTiedDest && !isImplicitDef(True->getOperand(0))) {
3534    // The vmerge instruction must be TU.
3535    // FIXME: This could be relaxed, but we need to handle the policy for the
3536    // resulting op correctly.
3537    if (isImplicitDef(Merge))
3538      return false;
3539    SDValue MergeOpTrue = True->getOperand(0);
3540    // Both the vmerge instruction and the True instruction must have the same
3541    // merge operand.
3542    if (False != MergeOpTrue)
3543      return false;
3544  }
3545
3546  if (IsMasked) {
3547    assert(HasTiedDest && "Expected tied dest");
3548    // The vmerge instruction must be TU.
3549    if (isImplicitDef(Merge))
3550      return false;
3551    // The vmerge instruction must have an all 1s mask since we're going to keep
3552    // the mask from the True instruction.
3553    // FIXME: Support mask agnostic True instruction which would have an
3554    // undef merge operand.
3555    if (Mask && !usesAllOnesMask(Mask, Glue))
3556      return false;
3557  }
3558
3559  // Skip if True has side effect.
3560  // TODO: Support vleff and vlsegff.
3561  if (TII->get(TrueOpc).hasUnmodeledSideEffects())
3562    return false;
3563
3564  // The last operand of a masked instruction may be glued.
3565  bool HasGlueOp = True->getGluedNode() != nullptr;
3566
3567  // The chain operand may exist either before the glued operands or in the last
3568  // position.
3569  unsigned TrueChainOpIdx = True.getNumOperands() - HasGlueOp - 1;
3570  bool HasChainOp =
3571      True.getOperand(TrueChainOpIdx).getValueType() == MVT::Other;
3572
3573  if (HasChainOp) {
3574    // Avoid creating cycles in the DAG. We must ensure that none of the other
3575    // operands depend on True through it's Chain.
3576    SmallVector<const SDNode *, 4> LoopWorklist;
3577    SmallPtrSet<const SDNode *, 16> Visited;
3578    LoopWorklist.push_back(False.getNode());
3579    if (Mask)
3580      LoopWorklist.push_back(Mask.getNode());
3581    LoopWorklist.push_back(VL.getNode());
3582    if (Glue)
3583      LoopWorklist.push_back(Glue.getNode());
3584    if (SDNode::hasPredecessorHelper(True.getNode(), Visited, LoopWorklist))
3585      return false;
3586  }
3587
3588  // The vector policy operand may be present for masked intrinsics
3589  bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TrueTSFlags);
3590  unsigned TrueVLIndex =
3591      True.getNumOperands() - HasVecPolicyOp - HasChainOp - HasGlueOp - 2;
3592  SDValue TrueVL = True.getOperand(TrueVLIndex);
3593  SDValue SEW = True.getOperand(TrueVLIndex + 1);
3594
3595  auto GetMinVL = [](SDValue LHS, SDValue RHS) {
3596    if (LHS == RHS)
3597      return LHS;
3598    if (isAllOnesConstant(LHS))
3599      return RHS;
3600    if (isAllOnesConstant(RHS))
3601      return LHS;
3602    auto *CLHS = dyn_cast<ConstantSDNode>(LHS);
3603    auto *CRHS = dyn_cast<ConstantSDNode>(RHS);
3604    if (!CLHS || !CRHS)
3605      return SDValue();
3606    return CLHS->getZExtValue() <= CRHS->getZExtValue() ? LHS : RHS;
3607  };
3608
3609  // Because N and True must have the same merge operand (or True's operand is
3610  // implicit_def), the "effective" body is the minimum of their VLs.
3611  SDValue OrigVL = VL;
3612  VL = GetMinVL(TrueVL, VL);
3613  if (!VL)
3614    return false;
3615
3616  // If we end up changing the VL or mask of True, then we need to make sure it
3617  // doesn't raise any observable fp exceptions, since changing the active
3618  // elements will affect how fflags is set.
3619  if (TrueVL != VL || !IsMasked)
3620    if (mayRaiseFPException(True.getNode()) &&
3621        !True->getFlags().hasNoFPExcept())
3622      return false;
3623
3624  SDLoc DL(N);
3625
3626  // From the preconditions we checked above, we know the mask and thus glue
3627  // for the result node will be taken from True.
3628  if (IsMasked) {
3629    Mask = True->getOperand(Info->MaskOpIdx);
3630    Glue = True->getOperand(True->getNumOperands() - 1);
3631    assert(Glue.getValueType() == MVT::Glue);
3632  }
3633  // If we end up using the vmerge mask the vmerge is actually a vmv.v.v, create
3634  // an all-ones mask to use.
3635  else if (IsVMv(N)) {
3636    unsigned TSFlags = TII->get(N->getMachineOpcode()).TSFlags;
3637    unsigned VMSetOpc = GetVMSetForLMul(RISCVII::getLMul(TSFlags));
3638    ElementCount EC = N->getValueType(0).getVectorElementCount();
3639    MVT MaskVT = MVT::getVectorVT(MVT::i1, EC);
3640
3641    SDValue AllOnesMask =
3642        SDValue(CurDAG->getMachineNode(VMSetOpc, DL, MaskVT, VL, SEW), 0);
3643    SDValue MaskCopy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,
3644                                            RISCV::V0, AllOnesMask, SDValue());
3645    Mask = CurDAG->getRegister(RISCV::V0, MaskVT);
3646    Glue = MaskCopy.getValue(1);
3647  }
3648
3649  unsigned MaskedOpc = Info->MaskedPseudo;
3650#ifndef NDEBUG
3651  const MCInstrDesc &MaskedMCID = TII->get(MaskedOpc);
3652  assert(RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags) &&
3653         "Expected instructions with mask have policy operand.");
3654  assert(MaskedMCID.getOperandConstraint(MaskedMCID.getNumDefs(),
3655                                         MCOI::TIED_TO) == 0 &&
3656         "Expected instructions with mask have a tied dest.");
3657#endif
3658
3659  // Use a tumu policy, relaxing it to tail agnostic provided that the merge
3660  // operand is undefined.
3661  //
3662  // However, if the VL became smaller than what the vmerge had originally, then
3663  // elements past VL that were previously in the vmerge's body will have moved
3664  // to the tail. In that case we always need to use tail undisturbed to
3665  // preserve them.
3666  bool MergeVLShrunk = VL != OrigVL;
3667  uint64_t Policy = (isImplicitDef(Merge) && !MergeVLShrunk)
3668                        ? RISCVII::TAIL_AGNOSTIC
3669                        : /*TUMU*/ 0;
3670  SDValue PolicyOp =
3671    CurDAG->getTargetConstant(Policy, DL, Subtarget->getXLenVT());
3672
3673
3674  SmallVector<SDValue, 8> Ops;
3675  Ops.push_back(False);
3676
3677  const bool HasRoundingMode = RISCVII::hasRoundModeOp(TrueTSFlags);
3678  const unsigned NormalOpsEnd = TrueVLIndex - IsMasked - HasRoundingMode;
3679  assert(!IsMasked || NormalOpsEnd == Info->MaskOpIdx);
3680  Ops.append(True->op_begin() + HasTiedDest, True->op_begin() + NormalOpsEnd);
3681
3682  Ops.push_back(Mask);
3683
3684  // For unmasked "VOp" with rounding mode operand, that is interfaces like
3685  // (..., rm, vl) or (..., rm, vl, policy).
3686  // Its masked version is (..., vm, rm, vl, policy).
3687  // Check the rounding mode pseudo nodes under RISCVInstrInfoVPseudos.td
3688  if (HasRoundingMode)
3689    Ops.push_back(True->getOperand(TrueVLIndex - 1));
3690
3691  Ops.append({VL, SEW, PolicyOp});
3692
3693  // Result node should have chain operand of True.
3694  if (HasChainOp)
3695    Ops.push_back(True.getOperand(TrueChainOpIdx));
3696
3697  // Add the glue for the CopyToReg of mask->v0.
3698  Ops.push_back(Glue);
3699
3700  MachineSDNode *Result =
3701      CurDAG->getMachineNode(MaskedOpc, DL, True->getVTList(), Ops);
3702  Result->setFlags(True->getFlags());
3703
3704  if (!cast<MachineSDNode>(True)->memoperands_empty())
3705    CurDAG->setNodeMemRefs(Result, cast<MachineSDNode>(True)->memoperands());
3706
3707  // Replace vmerge.vvm node by Result.
3708  ReplaceUses(SDValue(N, 0), SDValue(Result, 0));
3709
3710  // Replace another value of True. E.g. chain and VL.
3711  for (unsigned Idx = 1; Idx < True->getNumValues(); ++Idx)
3712    ReplaceUses(True.getValue(Idx), SDValue(Result, Idx));
3713
3714  return true;
3715}
3716
3717bool RISCVDAGToDAGISel::doPeepholeMergeVVMFold() {
3718  bool MadeChange = false;
3719  SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
3720
3721  while (Position != CurDAG->allnodes_begin()) {
3722    SDNode *N = &*--Position;
3723    if (N->use_empty() || !N->isMachineOpcode())
3724      continue;
3725
3726    if (IsVMerge(N) || IsVMv(N))
3727      MadeChange |= performCombineVMergeAndVOps(N);
3728  }
3729  return MadeChange;
3730}
3731
3732/// If our passthru is an implicit_def, use noreg instead.  This side
3733/// steps issues with MachineCSE not being able to CSE expressions with
3734/// IMPLICIT_DEF operands while preserving the semantic intent. See
3735/// pr64282 for context. Note that this transform is the last one
3736/// performed at ISEL DAG to DAG.
3737bool RISCVDAGToDAGISel::doPeepholeNoRegPassThru() {
3738  bool MadeChange = false;
3739  SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
3740
3741  while (Position != CurDAG->allnodes_begin()) {
3742    SDNode *N = &*--Position;
3743    if (N->use_empty() || !N->isMachineOpcode())
3744      continue;
3745
3746    const unsigned Opc = N->getMachineOpcode();
3747    if (!RISCVVPseudosTable::getPseudoInfo(Opc) ||
3748        !RISCVII::isFirstDefTiedToFirstUse(TII->get(Opc)) ||
3749        !isImplicitDef(N->getOperand(0)))
3750      continue;
3751
3752    SmallVector<SDValue> Ops;
3753    Ops.push_back(CurDAG->getRegister(RISCV::NoRegister, N->getValueType(0)));
3754    for (unsigned I = 1, E = N->getNumOperands(); I != E; I++) {
3755      SDValue Op = N->getOperand(I);
3756      Ops.push_back(Op);
3757    }
3758
3759    MachineSDNode *Result =
3760      CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
3761    Result->setFlags(N->getFlags());
3762    CurDAG->setNodeMemRefs(Result, cast<MachineSDNode>(N)->memoperands());
3763    ReplaceUses(N, Result);
3764    MadeChange = true;
3765  }
3766  return MadeChange;
3767}
3768
3769
3770// This pass converts a legalized DAG into a RISCV-specific DAG, ready
3771// for instruction scheduling.
3772FunctionPass *llvm::createRISCVISelDag(RISCVTargetMachine &TM,
3773                                       CodeGenOptLevel OptLevel) {
3774  return new RISCVDAGToDAGISel(TM, OptLevel);
3775}
3776
3777char RISCVDAGToDAGISel::ID = 0;
3778
3779INITIALIZE_PASS(RISCVDAGToDAGISel, DEBUG_TYPE, PASS_NAME, false, false)
3780