1//===-- RISCVISelLowering.cpp - RISC-V DAG Lowering Implementation -------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file defines the interfaces that RISC-V uses to lower LLVM code into a 10// selection DAG. 11// 12//===----------------------------------------------------------------------===// 13 14#include "RISCVISelLowering.h" 15#include "MCTargetDesc/RISCVMatInt.h" 16#include "RISCV.h" 17#include "RISCVMachineFunctionInfo.h" 18#include "RISCVRegisterInfo.h" 19#include "RISCVSubtarget.h" 20#include "RISCVTargetMachine.h" 21#include "llvm/ADT/SmallSet.h" 22#include "llvm/ADT/Statistic.h" 23#include "llvm/Analysis/MemoryLocation.h" 24#include "llvm/Analysis/VectorUtils.h" 25#include "llvm/CodeGen/MachineFrameInfo.h" 26#include "llvm/CodeGen/MachineFunction.h" 27#include "llvm/CodeGen/MachineInstrBuilder.h" 28#include "llvm/CodeGen/MachineJumpTableInfo.h" 29#include "llvm/CodeGen/MachineRegisterInfo.h" 30#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h" 31#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 32#include "llvm/CodeGen/ValueTypes.h" 33#include "llvm/IR/DiagnosticInfo.h" 34#include "llvm/IR/DiagnosticPrinter.h" 35#include "llvm/IR/IRBuilder.h" 36#include "llvm/IR/Instructions.h" 37#include "llvm/IR/IntrinsicsRISCV.h" 38#include "llvm/IR/PatternMatch.h" 39#include "llvm/Support/CommandLine.h" 40#include "llvm/Support/Debug.h" 41#include "llvm/Support/ErrorHandling.h" 42#include "llvm/Support/InstructionCost.h" 43#include "llvm/Support/KnownBits.h" 44#include "llvm/Support/MathExtras.h" 45#include "llvm/Support/raw_ostream.h" 46#include <optional> 47 48using namespace llvm; 49 50#define DEBUG_TYPE "riscv-lower" 51 52STATISTIC(NumTailCalls, "Number of tail calls"); 53 54static cl::opt<unsigned> ExtensionMaxWebSize( 55 DEBUG_TYPE "-ext-max-web-size", cl::Hidden, 56 cl::desc("Give the maximum size (in number of nodes) of the web of " 57 "instructions that we will consider for VW expansion"), 58 cl::init(18)); 59 60static cl::opt<bool> 61 AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden, 62 cl::desc("Allow the formation of VW_W operations (e.g., " 63 "VWADD_W) with splat constants"), 64 cl::init(false)); 65 66static cl::opt<unsigned> NumRepeatedDivisors( 67 DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden, 68 cl::desc("Set the minimum number of repetitions of a divisor to allow " 69 "transformation to multiplications by the reciprocal"), 70 cl::init(2)); 71 72static cl::opt<int> 73 FPImmCost(DEBUG_TYPE "-fpimm-cost", cl::Hidden, 74 cl::desc("Give the maximum number of instructions that we will " 75 "use for creating a floating-point immediate value"), 76 cl::init(2)); 77 78static cl::opt<bool> 79 RV64LegalI32("riscv-experimental-rv64-legal-i32", cl::ReallyHidden, 80 cl::desc("Make i32 a legal type for SelectionDAG on RV64.")); 81 82RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, 83 const RISCVSubtarget &STI) 84 : TargetLowering(TM), Subtarget(STI) { 85 86 RISCVABI::ABI ABI = Subtarget.getTargetABI(); 87 assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI"); 88 89 if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) && 90 !Subtarget.hasStdExtF()) { 91 errs() << "Hard-float 'f' ABI can't be used for a target that " 92 "doesn't support the F instruction set extension (ignoring " 93 "target-abi)\n"; 94 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32; 95 } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) && 96 !Subtarget.hasStdExtD()) { 97 errs() << "Hard-float 'd' ABI can't be used for a target that " 98 "doesn't support the D instruction set extension (ignoring " 99 "target-abi)\n"; 100 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32; 101 } 102 103 switch (ABI) { 104 default: 105 report_fatal_error("Don't know how to lower this ABI"); 106 case RISCVABI::ABI_ILP32: 107 case RISCVABI::ABI_ILP32E: 108 case RISCVABI::ABI_LP64E: 109 case RISCVABI::ABI_ILP32F: 110 case RISCVABI::ABI_ILP32D: 111 case RISCVABI::ABI_LP64: 112 case RISCVABI::ABI_LP64F: 113 case RISCVABI::ABI_LP64D: 114 break; 115 } 116 117 MVT XLenVT = Subtarget.getXLenVT(); 118 119 // Set up the register classes. 120 addRegisterClass(XLenVT, &RISCV::GPRRegClass); 121 if (Subtarget.is64Bit() && RV64LegalI32) 122 addRegisterClass(MVT::i32, &RISCV::GPRRegClass); 123 124 if (Subtarget.hasStdExtZfhmin()) 125 addRegisterClass(MVT::f16, &RISCV::FPR16RegClass); 126 if (Subtarget.hasStdExtZfbfmin()) 127 addRegisterClass(MVT::bf16, &RISCV::FPR16RegClass); 128 if (Subtarget.hasStdExtF()) 129 addRegisterClass(MVT::f32, &RISCV::FPR32RegClass); 130 if (Subtarget.hasStdExtD()) 131 addRegisterClass(MVT::f64, &RISCV::FPR64RegClass); 132 if (Subtarget.hasStdExtZhinxmin()) 133 addRegisterClass(MVT::f16, &RISCV::GPRF16RegClass); 134 if (Subtarget.hasStdExtZfinx()) 135 addRegisterClass(MVT::f32, &RISCV::GPRF32RegClass); 136 if (Subtarget.hasStdExtZdinx()) { 137 if (Subtarget.is64Bit()) 138 addRegisterClass(MVT::f64, &RISCV::GPRRegClass); 139 else 140 addRegisterClass(MVT::f64, &RISCV::GPRPairRegClass); 141 } 142 143 static const MVT::SimpleValueType BoolVecVTs[] = { 144 MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1, 145 MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1}; 146 static const MVT::SimpleValueType IntVecVTs[] = { 147 MVT::nxv1i8, MVT::nxv2i8, MVT::nxv4i8, MVT::nxv8i8, MVT::nxv16i8, 148 MVT::nxv32i8, MVT::nxv64i8, MVT::nxv1i16, MVT::nxv2i16, MVT::nxv4i16, 149 MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32, 150 MVT::nxv4i32, MVT::nxv8i32, MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64, 151 MVT::nxv4i64, MVT::nxv8i64}; 152 static const MVT::SimpleValueType F16VecVTs[] = { 153 MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16, 154 MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16}; 155 static const MVT::SimpleValueType BF16VecVTs[] = { 156 MVT::nxv1bf16, MVT::nxv2bf16, MVT::nxv4bf16, 157 MVT::nxv8bf16, MVT::nxv16bf16, MVT::nxv32bf16}; 158 static const MVT::SimpleValueType F32VecVTs[] = { 159 MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32}; 160 static const MVT::SimpleValueType F64VecVTs[] = { 161 MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64}; 162 163 if (Subtarget.hasVInstructions()) { 164 auto addRegClassForRVV = [this](MVT VT) { 165 // Disable the smallest fractional LMUL types if ELEN is less than 166 // RVVBitsPerBlock. 167 unsigned MinElts = RISCV::RVVBitsPerBlock / Subtarget.getELen(); 168 if (VT.getVectorMinNumElements() < MinElts) 169 return; 170 171 unsigned Size = VT.getSizeInBits().getKnownMinValue(); 172 const TargetRegisterClass *RC; 173 if (Size <= RISCV::RVVBitsPerBlock) 174 RC = &RISCV::VRRegClass; 175 else if (Size == 2 * RISCV::RVVBitsPerBlock) 176 RC = &RISCV::VRM2RegClass; 177 else if (Size == 4 * RISCV::RVVBitsPerBlock) 178 RC = &RISCV::VRM4RegClass; 179 else if (Size == 8 * RISCV::RVVBitsPerBlock) 180 RC = &RISCV::VRM8RegClass; 181 else 182 llvm_unreachable("Unexpected size"); 183 184 addRegisterClass(VT, RC); 185 }; 186 187 for (MVT VT : BoolVecVTs) 188 addRegClassForRVV(VT); 189 for (MVT VT : IntVecVTs) { 190 if (VT.getVectorElementType() == MVT::i64 && 191 !Subtarget.hasVInstructionsI64()) 192 continue; 193 addRegClassForRVV(VT); 194 } 195 196 if (Subtarget.hasVInstructionsF16Minimal()) 197 for (MVT VT : F16VecVTs) 198 addRegClassForRVV(VT); 199 200 if (Subtarget.hasVInstructionsBF16()) 201 for (MVT VT : BF16VecVTs) 202 addRegClassForRVV(VT); 203 204 if (Subtarget.hasVInstructionsF32()) 205 for (MVT VT : F32VecVTs) 206 addRegClassForRVV(VT); 207 208 if (Subtarget.hasVInstructionsF64()) 209 for (MVT VT : F64VecVTs) 210 addRegClassForRVV(VT); 211 212 if (Subtarget.useRVVForFixedLengthVectors()) { 213 auto addRegClassForFixedVectors = [this](MVT VT) { 214 MVT ContainerVT = getContainerForFixedLengthVector(VT); 215 unsigned RCID = getRegClassIDForVecVT(ContainerVT); 216 const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo(); 217 addRegisterClass(VT, TRI.getRegClass(RCID)); 218 }; 219 for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) 220 if (useRVVForFixedLengthVectorVT(VT)) 221 addRegClassForFixedVectors(VT); 222 223 for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) 224 if (useRVVForFixedLengthVectorVT(VT)) 225 addRegClassForFixedVectors(VT); 226 } 227 } 228 229 // Compute derived properties from the register classes. 230 computeRegisterProperties(STI.getRegisterInfo()); 231 232 setStackPointerRegisterToSaveRestore(RISCV::X2); 233 234 setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, XLenVT, 235 MVT::i1, Promote); 236 // DAGCombiner can call isLoadExtLegal for types that aren't legal. 237 setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, MVT::i32, 238 MVT::i1, Promote); 239 240 // TODO: add all necessary setOperationAction calls. 241 setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Expand); 242 243 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 244 setOperationAction(ISD::BR_CC, XLenVT, Expand); 245 if (RV64LegalI32 && Subtarget.is64Bit()) 246 setOperationAction(ISD::BR_CC, MVT::i32, Expand); 247 setOperationAction(ISD::BRCOND, MVT::Other, Custom); 248 setOperationAction(ISD::SELECT_CC, XLenVT, Expand); 249 if (RV64LegalI32 && Subtarget.is64Bit()) 250 setOperationAction(ISD::SELECT_CC, MVT::i32, Expand); 251 252 setCondCodeAction(ISD::SETLE, XLenVT, Expand); 253 setCondCodeAction(ISD::SETGT, XLenVT, Custom); 254 setCondCodeAction(ISD::SETGE, XLenVT, Expand); 255 setCondCodeAction(ISD::SETULE, XLenVT, Expand); 256 setCondCodeAction(ISD::SETUGT, XLenVT, Custom); 257 setCondCodeAction(ISD::SETUGE, XLenVT, Expand); 258 259 if (RV64LegalI32 && Subtarget.is64Bit()) 260 setOperationAction(ISD::SETCC, MVT::i32, Promote); 261 262 setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand); 263 264 setOperationAction(ISD::VASTART, MVT::Other, Custom); 265 setOperationAction({ISD::VAARG, ISD::VACOPY, ISD::VAEND}, MVT::Other, Expand); 266 267 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 268 269 setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom); 270 271 if (!Subtarget.hasStdExtZbb() && !Subtarget.hasVendorXTHeadBb()) 272 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::i8, MVT::i16}, Expand); 273 274 if (Subtarget.is64Bit()) { 275 setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom); 276 277 if (!RV64LegalI32) { 278 setOperationAction(ISD::LOAD, MVT::i32, Custom); 279 setOperationAction({ISD::ADD, ISD::SUB, ISD::SHL, ISD::SRA, ISD::SRL}, 280 MVT::i32, Custom); 281 setOperationAction(ISD::SADDO, MVT::i32, Custom); 282 setOperationAction({ISD::UADDO, ISD::USUBO, ISD::UADDSAT, ISD::USUBSAT}, 283 MVT::i32, Custom); 284 } 285 } else { 286 setLibcallName( 287 {RTLIB::SHL_I128, RTLIB::SRL_I128, RTLIB::SRA_I128, RTLIB::MUL_I128}, 288 nullptr); 289 setLibcallName(RTLIB::MULO_I64, nullptr); 290 } 291 292 if (!Subtarget.hasStdExtM() && !Subtarget.hasStdExtZmmul()) { 293 setOperationAction({ISD::MUL, ISD::MULHS, ISD::MULHU}, XLenVT, Expand); 294 if (RV64LegalI32 && Subtarget.is64Bit()) 295 setOperationAction(ISD::MUL, MVT::i32, Promote); 296 } else if (Subtarget.is64Bit()) { 297 setOperationAction(ISD::MUL, MVT::i128, Custom); 298 if (!RV64LegalI32) 299 setOperationAction(ISD::MUL, MVT::i32, Custom); 300 } else { 301 setOperationAction(ISD::MUL, MVT::i64, Custom); 302 } 303 304 if (!Subtarget.hasStdExtM()) { 305 setOperationAction({ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM}, 306 XLenVT, Expand); 307 if (RV64LegalI32 && Subtarget.is64Bit()) 308 setOperationAction({ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM}, MVT::i32, 309 Promote); 310 } else if (Subtarget.is64Bit()) { 311 if (!RV64LegalI32) 312 setOperationAction({ISD::SDIV, ISD::UDIV, ISD::UREM}, 313 {MVT::i8, MVT::i16, MVT::i32}, Custom); 314 } 315 316 if (RV64LegalI32 && Subtarget.is64Bit()) { 317 setOperationAction({ISD::MULHS, ISD::MULHU}, MVT::i32, Expand); 318 setOperationAction( 319 {ISD::SDIVREM, ISD::UDIVREM, ISD::SMUL_LOHI, ISD::UMUL_LOHI}, MVT::i32, 320 Expand); 321 } 322 323 setOperationAction( 324 {ISD::SDIVREM, ISD::UDIVREM, ISD::SMUL_LOHI, ISD::UMUL_LOHI}, XLenVT, 325 Expand); 326 327 setOperationAction({ISD::SHL_PARTS, ISD::SRL_PARTS, ISD::SRA_PARTS}, XLenVT, 328 Custom); 329 330 if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) { 331 if (!RV64LegalI32 && Subtarget.is64Bit()) 332 setOperationAction({ISD::ROTL, ISD::ROTR}, MVT::i32, Custom); 333 } else if (Subtarget.hasVendorXTHeadBb()) { 334 if (Subtarget.is64Bit()) 335 setOperationAction({ISD::ROTL, ISD::ROTR}, MVT::i32, Custom); 336 setOperationAction({ISD::ROTL, ISD::ROTR}, XLenVT, Custom); 337 } else if (Subtarget.hasVendorXCVbitmanip()) { 338 setOperationAction(ISD::ROTL, XLenVT, Expand); 339 } else { 340 setOperationAction({ISD::ROTL, ISD::ROTR}, XLenVT, Expand); 341 if (RV64LegalI32 && Subtarget.is64Bit()) 342 setOperationAction({ISD::ROTL, ISD::ROTR}, MVT::i32, Expand); 343 } 344 345 // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll 346 // pattern match it directly in isel. 347 setOperationAction(ISD::BSWAP, XLenVT, 348 (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() || 349 Subtarget.hasVendorXTHeadBb()) 350 ? Legal 351 : Expand); 352 if (RV64LegalI32 && Subtarget.is64Bit()) 353 setOperationAction(ISD::BSWAP, MVT::i32, 354 (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() || 355 Subtarget.hasVendorXTHeadBb()) 356 ? Promote 357 : Expand); 358 359 360 if (Subtarget.hasVendorXCVbitmanip()) { 361 setOperationAction(ISD::BITREVERSE, XLenVT, Legal); 362 } else { 363 // Zbkb can use rev8+brev8 to implement bitreverse. 364 setOperationAction(ISD::BITREVERSE, XLenVT, 365 Subtarget.hasStdExtZbkb() ? Custom : Expand); 366 } 367 368 if (Subtarget.hasStdExtZbb()) { 369 setOperationAction({ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, XLenVT, 370 Legal); 371 if (RV64LegalI32 && Subtarget.is64Bit()) 372 setOperationAction({ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, MVT::i32, 373 Promote); 374 375 if (Subtarget.is64Bit()) { 376 if (RV64LegalI32) 377 setOperationAction(ISD::CTTZ, MVT::i32, Legal); 378 else 379 setOperationAction({ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF}, MVT::i32, Custom); 380 } 381 } else if (!Subtarget.hasVendorXCVbitmanip()) { 382 setOperationAction({ISD::CTTZ, ISD::CTPOP}, XLenVT, Expand); 383 if (RV64LegalI32 && Subtarget.is64Bit()) 384 setOperationAction({ISD::CTTZ, ISD::CTPOP}, MVT::i32, Expand); 385 } 386 387 if (Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() || 388 Subtarget.hasVendorXCVbitmanip()) { 389 // We need the custom lowering to make sure that the resulting sequence 390 // for the 32bit case is efficient on 64bit targets. 391 if (Subtarget.is64Bit()) { 392 if (RV64LegalI32) { 393 setOperationAction(ISD::CTLZ, MVT::i32, 394 Subtarget.hasStdExtZbb() ? Legal : Promote); 395 if (!Subtarget.hasStdExtZbb()) 396 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Promote); 397 } else 398 setOperationAction({ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF}, MVT::i32, Custom); 399 } 400 } else { 401 setOperationAction(ISD::CTLZ, XLenVT, Expand); 402 if (RV64LegalI32 && Subtarget.is64Bit()) 403 setOperationAction(ISD::CTLZ, MVT::i32, Expand); 404 } 405 406 if (!RV64LegalI32 && Subtarget.is64Bit() && 407 !Subtarget.hasShortForwardBranchOpt()) 408 setOperationAction(ISD::ABS, MVT::i32, Custom); 409 410 // We can use PseudoCCSUB to implement ABS. 411 if (Subtarget.hasShortForwardBranchOpt()) 412 setOperationAction(ISD::ABS, XLenVT, Legal); 413 414 if (!Subtarget.hasVendorXTHeadCondMov()) 415 setOperationAction(ISD::SELECT, XLenVT, Custom); 416 417 if (RV64LegalI32 && Subtarget.is64Bit()) 418 setOperationAction(ISD::SELECT, MVT::i32, Promote); 419 420 static const unsigned FPLegalNodeTypes[] = { 421 ISD::FMINNUM, ISD::FMAXNUM, ISD::LRINT, 422 ISD::LLRINT, ISD::LROUND, ISD::LLROUND, 423 ISD::STRICT_LRINT, ISD::STRICT_LLRINT, ISD::STRICT_LROUND, 424 ISD::STRICT_LLROUND, ISD::STRICT_FMA, ISD::STRICT_FADD, 425 ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV, 426 ISD::STRICT_FSQRT, ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS}; 427 428 static const ISD::CondCode FPCCToExpand[] = { 429 ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT, 430 ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT, 431 ISD::SETGE, ISD::SETNE, ISD::SETO, ISD::SETUO}; 432 433 static const unsigned FPOpToExpand[] = { 434 ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, 435 ISD::FREM}; 436 437 static const unsigned FPRndMode[] = { 438 ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FRINT, ISD::FROUND, 439 ISD::FROUNDEVEN}; 440 441 if (Subtarget.hasStdExtZfhminOrZhinxmin()) 442 setOperationAction(ISD::BITCAST, MVT::i16, Custom); 443 444 static const unsigned ZfhminZfbfminPromoteOps[] = { 445 ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD, 446 ISD::FSUB, ISD::FMUL, ISD::FMA, 447 ISD::FDIV, ISD::FSQRT, ISD::FABS, 448 ISD::FNEG, ISD::STRICT_FMA, ISD::STRICT_FADD, 449 ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV, 450 ISD::STRICT_FSQRT, ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS, 451 ISD::SETCC, ISD::FCEIL, ISD::FFLOOR, 452 ISD::FTRUNC, ISD::FRINT, ISD::FROUND, 453 ISD::FROUNDEVEN, ISD::SELECT}; 454 455 if (Subtarget.hasStdExtZfbfmin()) { 456 setOperationAction(ISD::BITCAST, MVT::i16, Custom); 457 setOperationAction(ISD::BITCAST, MVT::bf16, Custom); 458 setOperationAction(ISD::FP_ROUND, MVT::bf16, Custom); 459 setOperationAction(ISD::FP_EXTEND, MVT::f32, Custom); 460 setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom); 461 setOperationAction(ISD::ConstantFP, MVT::bf16, Expand); 462 setOperationAction(ISD::SELECT_CC, MVT::bf16, Expand); 463 setOperationAction(ISD::BR_CC, MVT::bf16, Expand); 464 setOperationAction(ZfhminZfbfminPromoteOps, MVT::bf16, Promote); 465 setOperationAction(ISD::FREM, MVT::bf16, Promote); 466 // FIXME: Need to promote bf16 FCOPYSIGN to f32, but the 467 // DAGCombiner::visitFP_ROUND probably needs improvements first. 468 setOperationAction(ISD::FCOPYSIGN, MVT::bf16, Expand); 469 } 470 471 if (Subtarget.hasStdExtZfhminOrZhinxmin()) { 472 if (Subtarget.hasStdExtZfhOrZhinx()) { 473 setOperationAction(FPLegalNodeTypes, MVT::f16, Legal); 474 setOperationAction(FPRndMode, MVT::f16, 475 Subtarget.hasStdExtZfa() ? Legal : Custom); 476 setOperationAction(ISD::SELECT, MVT::f16, Custom); 477 setOperationAction(ISD::IS_FPCLASS, MVT::f16, Custom); 478 } else { 479 setOperationAction(ZfhminZfbfminPromoteOps, MVT::f16, Promote); 480 setOperationAction({ISD::STRICT_LRINT, ISD::STRICT_LLRINT, 481 ISD::STRICT_LROUND, ISD::STRICT_LLROUND}, 482 MVT::f16, Legal); 483 // FIXME: Need to promote f16 FCOPYSIGN to f32, but the 484 // DAGCombiner::visitFP_ROUND probably needs improvements first. 485 setOperationAction(ISD::FCOPYSIGN, MVT::f16, Expand); 486 } 487 488 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Legal); 489 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal); 490 setCondCodeAction(FPCCToExpand, MVT::f16, Expand); 491 setOperationAction(ISD::SELECT_CC, MVT::f16, Expand); 492 setOperationAction(ISD::BR_CC, MVT::f16, Expand); 493 494 setOperationAction(ISD::FNEARBYINT, MVT::f16, 495 Subtarget.hasStdExtZfa() ? Legal : Promote); 496 setOperationAction({ISD::FREM, ISD::FPOW, ISD::FPOWI, 497 ISD::FCOS, ISD::FSIN, ISD::FSINCOS, ISD::FEXP, 498 ISD::FEXP2, ISD::FEXP10, ISD::FLOG, ISD::FLOG2, 499 ISD::FLOG10}, 500 MVT::f16, Promote); 501 502 // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have 503 // complete support for all operations in LegalizeDAG. 504 setOperationAction({ISD::STRICT_FCEIL, ISD::STRICT_FFLOOR, 505 ISD::STRICT_FNEARBYINT, ISD::STRICT_FRINT, 506 ISD::STRICT_FROUND, ISD::STRICT_FROUNDEVEN, 507 ISD::STRICT_FTRUNC}, 508 MVT::f16, Promote); 509 510 // We need to custom promote this. 511 if (Subtarget.is64Bit()) 512 setOperationAction(ISD::FPOWI, MVT::i32, Custom); 513 514 if (!Subtarget.hasStdExtZfa()) 515 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f16, Custom); 516 } 517 518 if (Subtarget.hasStdExtFOrZfinx()) { 519 setOperationAction(FPLegalNodeTypes, MVT::f32, Legal); 520 setOperationAction(FPRndMode, MVT::f32, 521 Subtarget.hasStdExtZfa() ? Legal : Custom); 522 setCondCodeAction(FPCCToExpand, MVT::f32, Expand); 523 setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); 524 setOperationAction(ISD::SELECT, MVT::f32, Custom); 525 setOperationAction(ISD::BR_CC, MVT::f32, Expand); 526 setOperationAction(FPOpToExpand, MVT::f32, Expand); 527 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); 528 setTruncStoreAction(MVT::f32, MVT::f16, Expand); 529 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand); 530 setTruncStoreAction(MVT::f32, MVT::bf16, Expand); 531 setOperationAction(ISD::IS_FPCLASS, MVT::f32, Custom); 532 setOperationAction(ISD::BF16_TO_FP, MVT::f32, Custom); 533 setOperationAction(ISD::FP_TO_BF16, MVT::f32, 534 Subtarget.isSoftFPABI() ? LibCall : Custom); 535 setOperationAction(ISD::FP_TO_FP16, MVT::f32, Custom); 536 setOperationAction(ISD::FP16_TO_FP, MVT::f32, Custom); 537 538 if (Subtarget.hasStdExtZfa()) 539 setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal); 540 else 541 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f32, Custom); 542 } 543 544 if (Subtarget.hasStdExtFOrZfinx() && Subtarget.is64Bit()) 545 setOperationAction(ISD::BITCAST, MVT::i32, Custom); 546 547 if (Subtarget.hasStdExtDOrZdinx()) { 548 setOperationAction(FPLegalNodeTypes, MVT::f64, Legal); 549 550 if (Subtarget.hasStdExtZfa()) { 551 setOperationAction(FPRndMode, MVT::f64, Legal); 552 setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal); 553 setOperationAction(ISD::BITCAST, MVT::i64, Custom); 554 setOperationAction(ISD::BITCAST, MVT::f64, Custom); 555 } else { 556 if (Subtarget.is64Bit()) 557 setOperationAction(FPRndMode, MVT::f64, Custom); 558 559 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f64, Custom); 560 } 561 562 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal); 563 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal); 564 setCondCodeAction(FPCCToExpand, MVT::f64, Expand); 565 setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); 566 setOperationAction(ISD::SELECT, MVT::f64, Custom); 567 setOperationAction(ISD::BR_CC, MVT::f64, Expand); 568 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); 569 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 570 setOperationAction(FPOpToExpand, MVT::f64, Expand); 571 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand); 572 setTruncStoreAction(MVT::f64, MVT::f16, Expand); 573 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand); 574 setTruncStoreAction(MVT::f64, MVT::bf16, Expand); 575 setOperationAction(ISD::IS_FPCLASS, MVT::f64, Custom); 576 setOperationAction(ISD::BF16_TO_FP, MVT::f64, Custom); 577 setOperationAction(ISD::FP_TO_BF16, MVT::f64, 578 Subtarget.isSoftFPABI() ? LibCall : Custom); 579 setOperationAction(ISD::FP_TO_FP16, MVT::f64, Custom); 580 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand); 581 } 582 583 if (Subtarget.is64Bit()) { 584 setOperationAction({ISD::FP_TO_UINT, ISD::FP_TO_SINT, 585 ISD::STRICT_FP_TO_UINT, ISD::STRICT_FP_TO_SINT}, 586 MVT::i32, Custom); 587 setOperationAction(ISD::LROUND, MVT::i32, Custom); 588 } 589 590 if (Subtarget.hasStdExtFOrZfinx()) { 591 setOperationAction({ISD::FP_TO_UINT_SAT, ISD::FP_TO_SINT_SAT}, XLenVT, 592 Custom); 593 594 setOperationAction({ISD::STRICT_FP_TO_UINT, ISD::STRICT_FP_TO_SINT, 595 ISD::STRICT_UINT_TO_FP, ISD::STRICT_SINT_TO_FP}, 596 XLenVT, Legal); 597 598 if (RV64LegalI32 && Subtarget.is64Bit()) 599 setOperationAction({ISD::STRICT_FP_TO_UINT, ISD::STRICT_FP_TO_SINT, 600 ISD::STRICT_UINT_TO_FP, ISD::STRICT_SINT_TO_FP}, 601 MVT::i32, Legal); 602 603 setOperationAction(ISD::GET_ROUNDING, XLenVT, Custom); 604 setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom); 605 } 606 607 setOperationAction({ISD::GlobalAddress, ISD::BlockAddress, ISD::ConstantPool, 608 ISD::JumpTable}, 609 XLenVT, Custom); 610 611 setOperationAction(ISD::GlobalTLSAddress, XLenVT, Custom); 612 613 if (Subtarget.is64Bit()) 614 setOperationAction(ISD::Constant, MVT::i64, Custom); 615 616 // TODO: On M-mode only targets, the cycle[h] CSR may not be present. 617 // Unfortunately this can't be determined just from the ISA naming string. 618 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, 619 Subtarget.is64Bit() ? Legal : Custom); 620 621 setOperationAction({ISD::TRAP, ISD::DEBUGTRAP}, MVT::Other, Legal); 622 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 623 if (Subtarget.is64Bit()) 624 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom); 625 626 if (Subtarget.hasStdExtZicbop()) { 627 setOperationAction(ISD::PREFETCH, MVT::Other, Legal); 628 } 629 630 if (Subtarget.hasStdExtA()) { 631 setMaxAtomicSizeInBitsSupported(Subtarget.getXLen()); 632 setMinCmpXchgSizeInBits(32); 633 } else if (Subtarget.hasForcedAtomics()) { 634 setMaxAtomicSizeInBitsSupported(Subtarget.getXLen()); 635 } else { 636 setMaxAtomicSizeInBitsSupported(0); 637 } 638 639 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); 640 641 setBooleanContents(ZeroOrOneBooleanContent); 642 643 if (Subtarget.hasVInstructions()) { 644 setBooleanVectorContents(ZeroOrOneBooleanContent); 645 646 setOperationAction(ISD::VSCALE, XLenVT, Custom); 647 if (RV64LegalI32 && Subtarget.is64Bit()) 648 setOperationAction(ISD::VSCALE, MVT::i32, Custom); 649 650 // RVV intrinsics may have illegal operands. 651 // We also need to custom legalize vmv.x.s. 652 setOperationAction({ISD::INTRINSIC_WO_CHAIN, ISD::INTRINSIC_W_CHAIN, 653 ISD::INTRINSIC_VOID}, 654 {MVT::i8, MVT::i16}, Custom); 655 if (Subtarget.is64Bit()) 656 setOperationAction({ISD::INTRINSIC_W_CHAIN, ISD::INTRINSIC_VOID}, 657 MVT::i32, Custom); 658 else 659 setOperationAction({ISD::INTRINSIC_WO_CHAIN, ISD::INTRINSIC_W_CHAIN}, 660 MVT::i64, Custom); 661 662 setOperationAction({ISD::INTRINSIC_W_CHAIN, ISD::INTRINSIC_VOID}, 663 MVT::Other, Custom); 664 665 static const unsigned IntegerVPOps[] = { 666 ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL, 667 ISD::VP_SDIV, ISD::VP_UDIV, ISD::VP_SREM, 668 ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR, 669 ISD::VP_XOR, ISD::VP_ASHR, ISD::VP_LSHR, 670 ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND, 671 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX, 672 ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN, 673 ISD::VP_MERGE, ISD::VP_SELECT, ISD::VP_FP_TO_SINT, 674 ISD::VP_FP_TO_UINT, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND, 675 ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE, ISD::VP_SMIN, 676 ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX, 677 ISD::VP_ABS, ISD::EXPERIMENTAL_VP_REVERSE, ISD::EXPERIMENTAL_VP_SPLICE}; 678 679 static const unsigned FloatingPointVPOps[] = { 680 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL, 681 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS, 682 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD, 683 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE, 684 ISD::VP_SELECT, ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP, 685 ISD::VP_SETCC, ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND, 686 ISD::VP_SQRT, ISD::VP_FMINNUM, ISD::VP_FMAXNUM, 687 ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND, 688 ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO, 689 ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS, 690 ISD::VP_FMINIMUM, ISD::VP_FMAXIMUM, ISD::EXPERIMENTAL_VP_REVERSE, 691 ISD::EXPERIMENTAL_VP_SPLICE}; 692 693 static const unsigned IntegerVecReduceOps[] = { 694 ISD::VECREDUCE_ADD, ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, 695 ISD::VECREDUCE_XOR, ISD::VECREDUCE_SMAX, ISD::VECREDUCE_SMIN, 696 ISD::VECREDUCE_UMAX, ISD::VECREDUCE_UMIN}; 697 698 static const unsigned FloatingPointVecReduceOps[] = { 699 ISD::VECREDUCE_FADD, ISD::VECREDUCE_SEQ_FADD, ISD::VECREDUCE_FMIN, 700 ISD::VECREDUCE_FMAX}; 701 702 if (!Subtarget.is64Bit()) { 703 // We must custom-lower certain vXi64 operations on RV32 due to the vector 704 // element type being illegal. 705 setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, 706 MVT::i64, Custom); 707 708 setOperationAction(IntegerVecReduceOps, MVT::i64, Custom); 709 710 setOperationAction({ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND, 711 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, 712 ISD::VP_REDUCE_SMAX, ISD::VP_REDUCE_SMIN, 713 ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN}, 714 MVT::i64, Custom); 715 } 716 717 for (MVT VT : BoolVecVTs) { 718 if (!isTypeLegal(VT)) 719 continue; 720 721 setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); 722 723 // Mask VTs are custom-expanded into a series of standard nodes 724 setOperationAction({ISD::TRUNCATE, ISD::CONCAT_VECTORS, 725 ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR, 726 ISD::SCALAR_TO_VECTOR}, 727 VT, Custom); 728 729 setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT, 730 Custom); 731 732 setOperationAction(ISD::SELECT, VT, Custom); 733 setOperationAction( 734 {ISD::SELECT_CC, ISD::VSELECT, ISD::VP_MERGE, ISD::VP_SELECT}, VT, 735 Expand); 736 737 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR}, VT, Custom); 738 739 setOperationAction( 740 {ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR}, VT, 741 Custom); 742 743 setOperationAction( 744 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT, 745 Custom); 746 747 // RVV has native int->float & float->int conversions where the 748 // element type sizes are within one power-of-two of each other. Any 749 // wider distances between type sizes have to be lowered as sequences 750 // which progressively narrow the gap in stages. 751 setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT, 752 ISD::FP_TO_UINT, ISD::STRICT_SINT_TO_FP, 753 ISD::STRICT_UINT_TO_FP, ISD::STRICT_FP_TO_SINT, 754 ISD::STRICT_FP_TO_UINT}, 755 VT, Custom); 756 setOperationAction({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}, VT, 757 Custom); 758 759 // Expand all extending loads to types larger than this, and truncating 760 // stores from types larger than this. 761 for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) { 762 setTruncStoreAction(VT, OtherVT, Expand); 763 setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, VT, 764 OtherVT, Expand); 765 } 766 767 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT, 768 ISD::VP_TRUNCATE, ISD::VP_SETCC}, 769 VT, Custom); 770 771 setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom); 772 setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom); 773 774 setOperationAction(ISD::VECTOR_REVERSE, VT, Custom); 775 776 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom); 777 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom); 778 779 setOperationPromotedToType( 780 ISD::VECTOR_SPLICE, VT, 781 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount())); 782 } 783 784 for (MVT VT : IntVecVTs) { 785 if (!isTypeLegal(VT)) 786 continue; 787 788 setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); 789 setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom); 790 791 // Vectors implement MULHS/MULHU. 792 setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, VT, Expand); 793 794 // nxvXi64 MULHS/MULHU requires the V extension instead of Zve64*. 795 if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV()) 796 setOperationAction({ISD::MULHU, ISD::MULHS}, VT, Expand); 797 798 setOperationAction({ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, VT, 799 Legal); 800 801 // Custom-lower extensions and truncations from/to mask types. 802 setOperationAction({ISD::ANY_EXTEND, ISD::SIGN_EXTEND, ISD::ZERO_EXTEND}, 803 VT, Custom); 804 805 // RVV has native int->float & float->int conversions where the 806 // element type sizes are within one power-of-two of each other. Any 807 // wider distances between type sizes have to be lowered as sequences 808 // which progressively narrow the gap in stages. 809 setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT, 810 ISD::FP_TO_UINT, ISD::STRICT_SINT_TO_FP, 811 ISD::STRICT_UINT_TO_FP, ISD::STRICT_FP_TO_SINT, 812 ISD::STRICT_FP_TO_UINT}, 813 VT, Custom); 814 setOperationAction({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}, VT, 815 Custom); 816 setOperationAction({ISD::LRINT, ISD::LLRINT}, VT, Custom); 817 setOperationAction({ISD::AVGFLOORU, ISD::AVGCEILU, ISD::SADDSAT, 818 ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT}, 819 VT, Legal); 820 821 // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL" 822 // nodes which truncate by one power of two at a time. 823 setOperationAction(ISD::TRUNCATE, VT, Custom); 824 825 // Custom-lower insert/extract operations to simplify patterns. 826 setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT, 827 Custom); 828 829 // Custom-lower reduction operations to set up the corresponding custom 830 // nodes' operands. 831 setOperationAction(IntegerVecReduceOps, VT, Custom); 832 833 setOperationAction(IntegerVPOps, VT, Custom); 834 835 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom); 836 837 setOperationAction({ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER}, 838 VT, Custom); 839 840 setOperationAction( 841 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD, 842 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER}, 843 VT, Custom); 844 845 setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR, 846 ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR}, 847 VT, Custom); 848 849 setOperationAction(ISD::SELECT, VT, Custom); 850 setOperationAction(ISD::SELECT_CC, VT, Expand); 851 852 setOperationAction({ISD::STEP_VECTOR, ISD::VECTOR_REVERSE}, VT, Custom); 853 854 for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) { 855 setTruncStoreAction(VT, OtherVT, Expand); 856 setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, VT, 857 OtherVT, Expand); 858 } 859 860 setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom); 861 setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom); 862 863 // Splice 864 setOperationAction(ISD::VECTOR_SPLICE, VT, Custom); 865 866 if (Subtarget.hasStdExtZvkb()) { 867 setOperationAction(ISD::BSWAP, VT, Legal); 868 setOperationAction(ISD::VP_BSWAP, VT, Custom); 869 } else { 870 setOperationAction({ISD::BSWAP, ISD::VP_BSWAP}, VT, Expand); 871 setOperationAction({ISD::ROTL, ISD::ROTR}, VT, Expand); 872 } 873 874 if (Subtarget.hasStdExtZvbb()) { 875 setOperationAction(ISD::BITREVERSE, VT, Legal); 876 setOperationAction(ISD::VP_BITREVERSE, VT, Custom); 877 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ, 878 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP}, 879 VT, Custom); 880 } else { 881 setOperationAction({ISD::BITREVERSE, ISD::VP_BITREVERSE}, VT, Expand); 882 setOperationAction({ISD::CTLZ, ISD::CTTZ, ISD::CTPOP}, VT, Expand); 883 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ, 884 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP}, 885 VT, Expand); 886 887 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the 888 // range of f32. 889 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); 890 if (isTypeLegal(FloatVT)) { 891 setOperationAction({ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF, 892 ISD::CTTZ_ZERO_UNDEF, ISD::VP_CTLZ, 893 ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ_ZERO_UNDEF}, 894 VT, Custom); 895 } 896 } 897 } 898 899 // Expand various CCs to best match the RVV ISA, which natively supports UNE 900 // but no other unordered comparisons, and supports all ordered comparisons 901 // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization 902 // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE), 903 // and we pattern-match those back to the "original", swapping operands once 904 // more. This way we catch both operations and both "vf" and "fv" forms with 905 // fewer patterns. 906 static const ISD::CondCode VFPCCToExpand[] = { 907 ISD::SETO, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT, 908 ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUO, 909 ISD::SETGT, ISD::SETOGT, ISD::SETGE, ISD::SETOGE, 910 }; 911 912 // TODO: support more ops. 913 static const unsigned ZvfhminPromoteOps[] = { 914 ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD, ISD::FSUB, 915 ISD::FMUL, ISD::FMA, ISD::FDIV, ISD::FSQRT, 916 ISD::FABS, ISD::FNEG, ISD::FCOPYSIGN, ISD::FCEIL, 917 ISD::FFLOOR, ISD::FROUND, ISD::FROUNDEVEN, ISD::FRINT, 918 ISD::FNEARBYINT, ISD::IS_FPCLASS, ISD::SETCC, ISD::FMAXIMUM, 919 ISD::FMINIMUM, ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL, 920 ISD::STRICT_FDIV, ISD::STRICT_FSQRT, ISD::STRICT_FMA}; 921 922 // TODO: support more vp ops. 923 static const unsigned ZvfhminPromoteVPOps[] = { 924 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL, 925 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS, 926 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD, 927 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_SQRT, 928 ISD::VP_FMINNUM, ISD::VP_FMAXNUM, ISD::VP_FCEIL, 929 ISD::VP_FFLOOR, ISD::VP_FROUND, ISD::VP_FROUNDEVEN, 930 ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO, ISD::VP_FRINT, 931 ISD::VP_FNEARBYINT, ISD::VP_SETCC, ISD::VP_FMINIMUM, 932 ISD::VP_FMAXIMUM}; 933 934 // Sets common operation actions on RVV floating-point vector types. 935 const auto SetCommonVFPActions = [&](MVT VT) { 936 setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); 937 // RVV has native FP_ROUND & FP_EXTEND conversions where the element type 938 // sizes are within one power-of-two of each other. Therefore conversions 939 // between vXf16 and vXf64 must be lowered as sequences which convert via 940 // vXf32. 941 setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom); 942 // Custom-lower insert/extract operations to simplify patterns. 943 setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT, 944 Custom); 945 // Expand various condition codes (explained above). 946 setCondCodeAction(VFPCCToExpand, VT, Expand); 947 948 setOperationAction({ISD::FMINNUM, ISD::FMAXNUM}, VT, Legal); 949 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, VT, Custom); 950 951 setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND, 952 ISD::FROUNDEVEN, ISD::FRINT, ISD::FNEARBYINT, 953 ISD::IS_FPCLASS}, 954 VT, Custom); 955 956 setOperationAction(FloatingPointVecReduceOps, VT, Custom); 957 958 // Expand FP operations that need libcalls. 959 setOperationAction(ISD::FREM, VT, Expand); 960 setOperationAction(ISD::FPOW, VT, Expand); 961 setOperationAction(ISD::FCOS, VT, Expand); 962 setOperationAction(ISD::FSIN, VT, Expand); 963 setOperationAction(ISD::FSINCOS, VT, Expand); 964 setOperationAction(ISD::FEXP, VT, Expand); 965 setOperationAction(ISD::FEXP2, VT, Expand); 966 setOperationAction(ISD::FEXP10, VT, Expand); 967 setOperationAction(ISD::FLOG, VT, Expand); 968 setOperationAction(ISD::FLOG2, VT, Expand); 969 setOperationAction(ISD::FLOG10, VT, Expand); 970 971 setOperationAction(ISD::FCOPYSIGN, VT, Legal); 972 973 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom); 974 975 setOperationAction({ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER}, 976 VT, Custom); 977 978 setOperationAction( 979 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD, 980 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER}, 981 VT, Custom); 982 983 setOperationAction(ISD::SELECT, VT, Custom); 984 setOperationAction(ISD::SELECT_CC, VT, Expand); 985 986 setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR, 987 ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR}, 988 VT, Custom); 989 990 setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom); 991 setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom); 992 993 setOperationAction({ISD::VECTOR_REVERSE, ISD::VECTOR_SPLICE}, VT, Custom); 994 995 setOperationAction(FloatingPointVPOps, VT, Custom); 996 997 setOperationAction({ISD::STRICT_FP_EXTEND, ISD::STRICT_FP_ROUND}, VT, 998 Custom); 999 setOperationAction({ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL, 1000 ISD::STRICT_FDIV, ISD::STRICT_FSQRT, ISD::STRICT_FMA}, 1001 VT, Legal); 1002 setOperationAction({ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS, 1003 ISD::STRICT_FTRUNC, ISD::STRICT_FCEIL, 1004 ISD::STRICT_FFLOOR, ISD::STRICT_FROUND, 1005 ISD::STRICT_FROUNDEVEN, ISD::STRICT_FNEARBYINT}, 1006 VT, Custom); 1007 }; 1008 1009 // Sets common extload/truncstore actions on RVV floating-point vector 1010 // types. 1011 const auto SetCommonVFPExtLoadTruncStoreActions = 1012 [&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) { 1013 for (auto SmallVT : SmallerVTs) { 1014 setTruncStoreAction(VT, SmallVT, Expand); 1015 setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand); 1016 } 1017 }; 1018 1019 if (Subtarget.hasVInstructionsF16()) { 1020 for (MVT VT : F16VecVTs) { 1021 if (!isTypeLegal(VT)) 1022 continue; 1023 SetCommonVFPActions(VT); 1024 } 1025 } else if (Subtarget.hasVInstructionsF16Minimal()) { 1026 for (MVT VT : F16VecVTs) { 1027 if (!isTypeLegal(VT)) 1028 continue; 1029 setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom); 1030 setOperationAction({ISD::STRICT_FP_ROUND, ISD::STRICT_FP_EXTEND}, VT, 1031 Custom); 1032 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom); 1033 setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT, 1034 Custom); 1035 setOperationAction(ISD::SELECT_CC, VT, Expand); 1036 setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP, 1037 ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, 1038 VT, Custom); 1039 setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR, 1040 ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR}, 1041 VT, Custom); 1042 setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); 1043 // load/store 1044 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom); 1045 1046 // Custom split nxv32f16 since nxv32f32 if not legal. 1047 if (VT == MVT::nxv32f16) { 1048 setOperationAction(ZvfhminPromoteOps, VT, Custom); 1049 setOperationAction(ZvfhminPromoteVPOps, VT, Custom); 1050 continue; 1051 } 1052 // Add more promote ops. 1053 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); 1054 setOperationPromotedToType(ZvfhminPromoteOps, VT, F32VecVT); 1055 setOperationPromotedToType(ZvfhminPromoteVPOps, VT, F32VecVT); 1056 } 1057 } 1058 1059 if (Subtarget.hasVInstructionsF32()) { 1060 for (MVT VT : F32VecVTs) { 1061 if (!isTypeLegal(VT)) 1062 continue; 1063 SetCommonVFPActions(VT); 1064 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs); 1065 } 1066 } 1067 1068 if (Subtarget.hasVInstructionsF64()) { 1069 for (MVT VT : F64VecVTs) { 1070 if (!isTypeLegal(VT)) 1071 continue; 1072 SetCommonVFPActions(VT); 1073 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs); 1074 SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs); 1075 } 1076 } 1077 1078 if (Subtarget.useRVVForFixedLengthVectors()) { 1079 for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) { 1080 if (!useRVVForFixedLengthVectorVT(VT)) 1081 continue; 1082 1083 // By default everything must be expanded. 1084 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) 1085 setOperationAction(Op, VT, Expand); 1086 for (MVT OtherVT : MVT::integer_fixedlen_vector_valuetypes()) { 1087 setTruncStoreAction(VT, OtherVT, Expand); 1088 setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, VT, 1089 OtherVT, Expand); 1090 } 1091 1092 // Custom lower fixed vector undefs to scalable vector undefs to avoid 1093 // expansion to a build_vector of 0s. 1094 setOperationAction(ISD::UNDEF, VT, Custom); 1095 1096 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed. 1097 setOperationAction({ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR}, VT, 1098 Custom); 1099 1100 setOperationAction({ISD::BUILD_VECTOR, ISD::CONCAT_VECTORS}, VT, 1101 Custom); 1102 1103 setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, 1104 VT, Custom); 1105 1106 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); 1107 1108 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom); 1109 1110 setOperationAction(ISD::SETCC, VT, Custom); 1111 1112 setOperationAction(ISD::SELECT, VT, Custom); 1113 1114 setOperationAction(ISD::TRUNCATE, VT, Custom); 1115 1116 setOperationAction(ISD::BITCAST, VT, Custom); 1117 1118 setOperationAction( 1119 {ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR}, VT, 1120 Custom); 1121 1122 setOperationAction( 1123 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT, 1124 Custom); 1125 1126 setOperationAction( 1127 { 1128 ISD::SINT_TO_FP, 1129 ISD::UINT_TO_FP, 1130 ISD::FP_TO_SINT, 1131 ISD::FP_TO_UINT, 1132 ISD::STRICT_SINT_TO_FP, 1133 ISD::STRICT_UINT_TO_FP, 1134 ISD::STRICT_FP_TO_SINT, 1135 ISD::STRICT_FP_TO_UINT, 1136 }, 1137 VT, Custom); 1138 setOperationAction({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}, VT, 1139 Custom); 1140 1141 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); 1142 1143 // Operations below are different for between masks and other vectors. 1144 if (VT.getVectorElementType() == MVT::i1) { 1145 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR, ISD::AND, 1146 ISD::OR, ISD::XOR}, 1147 VT, Custom); 1148 1149 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT, 1150 ISD::VP_SETCC, ISD::VP_TRUNCATE}, 1151 VT, Custom); 1152 1153 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom); 1154 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom); 1155 continue; 1156 } 1157 1158 // Make SPLAT_VECTOR Legal so DAGCombine will convert splat vectors to 1159 // it before type legalization for i64 vectors on RV32. It will then be 1160 // type legalized to SPLAT_VECTOR_PARTS which we need to Custom handle. 1161 // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs 1162 // improvements first. 1163 if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) { 1164 setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); 1165 setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom); 1166 } 1167 1168 setOperationAction( 1169 {ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER}, VT, Custom); 1170 1171 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE, 1172 ISD::EXPERIMENTAL_VP_STRIDED_LOAD, 1173 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, 1174 ISD::VP_SCATTER}, 1175 VT, Custom); 1176 1177 setOperationAction({ISD::ADD, ISD::MUL, ISD::SUB, ISD::AND, ISD::OR, 1178 ISD::XOR, ISD::SDIV, ISD::SREM, ISD::UDIV, 1179 ISD::UREM, ISD::SHL, ISD::SRA, ISD::SRL}, 1180 VT, Custom); 1181 1182 setOperationAction( 1183 {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX, ISD::ABS}, VT, Custom); 1184 1185 // vXi64 MULHS/MULHU requires the V extension instead of Zve64*. 1186 if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV()) 1187 setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Custom); 1188 1189 setOperationAction({ISD::AVGFLOORU, ISD::AVGCEILU, ISD::SADDSAT, 1190 ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT}, 1191 VT, Custom); 1192 1193 setOperationAction(ISD::VSELECT, VT, Custom); 1194 setOperationAction(ISD::SELECT_CC, VT, Expand); 1195 1196 setOperationAction( 1197 {ISD::ANY_EXTEND, ISD::SIGN_EXTEND, ISD::ZERO_EXTEND}, VT, Custom); 1198 1199 // Custom-lower reduction operations to set up the corresponding custom 1200 // nodes' operands. 1201 setOperationAction({ISD::VECREDUCE_ADD, ISD::VECREDUCE_SMAX, 1202 ISD::VECREDUCE_SMIN, ISD::VECREDUCE_UMAX, 1203 ISD::VECREDUCE_UMIN}, 1204 VT, Custom); 1205 1206 setOperationAction(IntegerVPOps, VT, Custom); 1207 1208 if (Subtarget.hasStdExtZvkb()) 1209 setOperationAction({ISD::BSWAP, ISD::ROTL, ISD::ROTR}, VT, Custom); 1210 1211 if (Subtarget.hasStdExtZvbb()) { 1212 setOperationAction({ISD::BITREVERSE, ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF, 1213 ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF, ISD::CTPOP}, 1214 VT, Custom); 1215 } else { 1216 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the 1217 // range of f32. 1218 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); 1219 if (isTypeLegal(FloatVT)) 1220 setOperationAction( 1221 {ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF, ISD::CTTZ_ZERO_UNDEF}, VT, 1222 Custom); 1223 } 1224 } 1225 1226 for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) { 1227 // There are no extending loads or truncating stores. 1228 for (MVT InnerVT : MVT::fp_fixedlen_vector_valuetypes()) { 1229 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand); 1230 setTruncStoreAction(VT, InnerVT, Expand); 1231 } 1232 1233 if (!useRVVForFixedLengthVectorVT(VT)) 1234 continue; 1235 1236 // By default everything must be expanded. 1237 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) 1238 setOperationAction(Op, VT, Expand); 1239 1240 // Custom lower fixed vector undefs to scalable vector undefs to avoid 1241 // expansion to a build_vector of 0s. 1242 setOperationAction(ISD::UNDEF, VT, Custom); 1243 1244 if (VT.getVectorElementType() == MVT::f16 && 1245 !Subtarget.hasVInstructionsF16()) { 1246 setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom); 1247 setOperationAction({ISD::STRICT_FP_ROUND, ISD::STRICT_FP_EXTEND}, VT, 1248 Custom); 1249 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom); 1250 setOperationAction( 1251 {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT, 1252 Custom); 1253 setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP, 1254 ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, 1255 VT, Custom); 1256 setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR, 1257 ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR}, 1258 VT, Custom); 1259 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom); 1260 setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); 1261 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); 1262 // Don't promote f16 vector operations to f32 if f32 vector type is 1263 // not legal. 1264 // TODO: could split the f16 vector into two vectors and do promotion. 1265 if (!isTypeLegal(F32VecVT)) 1266 continue; 1267 setOperationPromotedToType(ZvfhminPromoteOps, VT, F32VecVT); 1268 setOperationPromotedToType(ZvfhminPromoteVPOps, VT, F32VecVT); 1269 continue; 1270 } 1271 1272 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed. 1273 setOperationAction({ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR}, VT, 1274 Custom); 1275 1276 setOperationAction({ISD::BUILD_VECTOR, ISD::CONCAT_VECTORS, 1277 ISD::VECTOR_SHUFFLE, ISD::INSERT_VECTOR_ELT, 1278 ISD::EXTRACT_VECTOR_ELT}, 1279 VT, Custom); 1280 1281 setOperationAction({ISD::LOAD, ISD::STORE, ISD::MLOAD, ISD::MSTORE, 1282 ISD::MGATHER, ISD::MSCATTER}, 1283 VT, Custom); 1284 1285 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE, 1286 ISD::EXPERIMENTAL_VP_STRIDED_LOAD, 1287 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, 1288 ISD::VP_SCATTER}, 1289 VT, Custom); 1290 1291 setOperationAction({ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FDIV, 1292 ISD::FNEG, ISD::FABS, ISD::FCOPYSIGN, ISD::FSQRT, 1293 ISD::FMA, ISD::FMINNUM, ISD::FMAXNUM, 1294 ISD::IS_FPCLASS, ISD::FMAXIMUM, ISD::FMINIMUM}, 1295 VT, Custom); 1296 1297 setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom); 1298 1299 setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND, 1300 ISD::FROUNDEVEN, ISD::FRINT, ISD::FNEARBYINT}, 1301 VT, Custom); 1302 1303 setCondCodeAction(VFPCCToExpand, VT, Expand); 1304 1305 setOperationAction(ISD::SETCC, VT, Custom); 1306 setOperationAction({ISD::VSELECT, ISD::SELECT}, VT, Custom); 1307 setOperationAction(ISD::SELECT_CC, VT, Expand); 1308 1309 setOperationAction(ISD::BITCAST, VT, Custom); 1310 1311 setOperationAction(FloatingPointVecReduceOps, VT, Custom); 1312 1313 setOperationAction(FloatingPointVPOps, VT, Custom); 1314 1315 setOperationAction({ISD::STRICT_FP_EXTEND, ISD::STRICT_FP_ROUND}, VT, 1316 Custom); 1317 setOperationAction( 1318 {ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL, 1319 ISD::STRICT_FDIV, ISD::STRICT_FSQRT, ISD::STRICT_FMA, 1320 ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS, ISD::STRICT_FTRUNC, 1321 ISD::STRICT_FCEIL, ISD::STRICT_FFLOOR, ISD::STRICT_FROUND, 1322 ISD::STRICT_FROUNDEVEN, ISD::STRICT_FNEARBYINT}, 1323 VT, Custom); 1324 } 1325 1326 // Custom-legalize bitcasts from fixed-length vectors to scalar types. 1327 setOperationAction(ISD::BITCAST, {MVT::i8, MVT::i16, MVT::i32, MVT::i64}, 1328 Custom); 1329 if (Subtarget.hasStdExtZfhminOrZhinxmin()) 1330 setOperationAction(ISD::BITCAST, MVT::f16, Custom); 1331 if (Subtarget.hasStdExtFOrZfinx()) 1332 setOperationAction(ISD::BITCAST, MVT::f32, Custom); 1333 if (Subtarget.hasStdExtDOrZdinx()) 1334 setOperationAction(ISD::BITCAST, MVT::f64, Custom); 1335 } 1336 } 1337 1338 if (Subtarget.hasStdExtA()) { 1339 setOperationAction(ISD::ATOMIC_LOAD_SUB, XLenVT, Expand); 1340 if (RV64LegalI32 && Subtarget.is64Bit()) 1341 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand); 1342 } 1343 1344 if (Subtarget.hasForcedAtomics()) { 1345 // Force __sync libcalls to be emitted for atomic rmw/cas operations. 1346 setOperationAction( 1347 {ISD::ATOMIC_CMP_SWAP, ISD::ATOMIC_SWAP, ISD::ATOMIC_LOAD_ADD, 1348 ISD::ATOMIC_LOAD_SUB, ISD::ATOMIC_LOAD_AND, ISD::ATOMIC_LOAD_OR, 1349 ISD::ATOMIC_LOAD_XOR, ISD::ATOMIC_LOAD_NAND, ISD::ATOMIC_LOAD_MIN, 1350 ISD::ATOMIC_LOAD_MAX, ISD::ATOMIC_LOAD_UMIN, ISD::ATOMIC_LOAD_UMAX}, 1351 XLenVT, LibCall); 1352 } 1353 1354 if (Subtarget.hasVendorXTHeadMemIdx()) { 1355 for (unsigned im : {ISD::PRE_INC, ISD::POST_INC}) { 1356 setIndexedLoadAction(im, MVT::i8, Legal); 1357 setIndexedStoreAction(im, MVT::i8, Legal); 1358 setIndexedLoadAction(im, MVT::i16, Legal); 1359 setIndexedStoreAction(im, MVT::i16, Legal); 1360 setIndexedLoadAction(im, MVT::i32, Legal); 1361 setIndexedStoreAction(im, MVT::i32, Legal); 1362 1363 if (Subtarget.is64Bit()) { 1364 setIndexedLoadAction(im, MVT::i64, Legal); 1365 setIndexedStoreAction(im, MVT::i64, Legal); 1366 } 1367 } 1368 } 1369 1370 // Function alignments. 1371 const Align FunctionAlignment(Subtarget.hasStdExtCOrZca() ? 2 : 4); 1372 setMinFunctionAlignment(FunctionAlignment); 1373 // Set preferred alignments. 1374 setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment()); 1375 setPrefLoopAlignment(Subtarget.getPrefLoopAlignment()); 1376 1377 setTargetDAGCombine({ISD::INTRINSIC_VOID, ISD::INTRINSIC_W_CHAIN, 1378 ISD::INTRINSIC_WO_CHAIN, ISD::ADD, ISD::SUB, ISD::MUL, 1379 ISD::AND, ISD::OR, ISD::XOR, ISD::SETCC, ISD::SELECT}); 1380 if (Subtarget.is64Bit()) 1381 setTargetDAGCombine(ISD::SRA); 1382 1383 if (Subtarget.hasStdExtFOrZfinx()) 1384 setTargetDAGCombine({ISD::FADD, ISD::FMAXNUM, ISD::FMINNUM}); 1385 1386 if (Subtarget.hasStdExtZbb()) 1387 setTargetDAGCombine({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}); 1388 1389 if (Subtarget.hasStdExtZbs() && Subtarget.is64Bit()) 1390 setTargetDAGCombine(ISD::TRUNCATE); 1391 1392 if (Subtarget.hasStdExtZbkb()) 1393 setTargetDAGCombine(ISD::BITREVERSE); 1394 if (Subtarget.hasStdExtZfhminOrZhinxmin()) 1395 setTargetDAGCombine(ISD::SIGN_EXTEND_INREG); 1396 if (Subtarget.hasStdExtFOrZfinx()) 1397 setTargetDAGCombine({ISD::ZERO_EXTEND, ISD::FP_TO_SINT, ISD::FP_TO_UINT, 1398 ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}); 1399 if (Subtarget.hasVInstructions()) 1400 setTargetDAGCombine({ISD::FCOPYSIGN, ISD::MGATHER, ISD::MSCATTER, 1401 ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA, ISD::SRL, 1402 ISD::SHL, ISD::STORE, ISD::SPLAT_VECTOR, 1403 ISD::BUILD_VECTOR, ISD::CONCAT_VECTORS, 1404 ISD::EXPERIMENTAL_VP_REVERSE, ISD::MUL, 1405 ISD::INSERT_VECTOR_ELT}); 1406 if (Subtarget.hasVendorXTHeadMemPair()) 1407 setTargetDAGCombine({ISD::LOAD, ISD::STORE}); 1408 if (Subtarget.useRVVForFixedLengthVectors()) 1409 setTargetDAGCombine(ISD::BITCAST); 1410 1411 setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2"); 1412 setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2"); 1413 1414 // Disable strict node mutation. 1415 IsStrictFPEnabled = true; 1416} 1417 1418EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL, 1419 LLVMContext &Context, 1420 EVT VT) const { 1421 if (!VT.isVector()) 1422 return getPointerTy(DL); 1423 if (Subtarget.hasVInstructions() && 1424 (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors())) 1425 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount()); 1426 return VT.changeVectorElementTypeToInteger(); 1427} 1428 1429MVT RISCVTargetLowering::getVPExplicitVectorLengthTy() const { 1430 return Subtarget.getXLenVT(); 1431} 1432 1433// Return false if we can lower get_vector_length to a vsetvli intrinsic. 1434bool RISCVTargetLowering::shouldExpandGetVectorLength(EVT TripCountVT, 1435 unsigned VF, 1436 bool IsScalable) const { 1437 if (!Subtarget.hasVInstructions()) 1438 return true; 1439 1440 if (!IsScalable) 1441 return true; 1442 1443 if (TripCountVT != MVT::i32 && TripCountVT != Subtarget.getXLenVT()) 1444 return true; 1445 1446 // Don't allow VF=1 if those types are't legal. 1447 if (VF < RISCV::RVVBitsPerBlock / Subtarget.getELen()) 1448 return true; 1449 1450 // VLEN=32 support is incomplete. 1451 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock) 1452 return true; 1453 1454 // The maximum VF is for the smallest element width with LMUL=8. 1455 // VF must be a power of 2. 1456 unsigned MaxVF = (RISCV::RVVBitsPerBlock / 8) * 8; 1457 return VF > MaxVF || !isPowerOf2_32(VF); 1458} 1459 1460bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, 1461 const CallInst &I, 1462 MachineFunction &MF, 1463 unsigned Intrinsic) const { 1464 auto &DL = I.getModule()->getDataLayout(); 1465 1466 auto SetRVVLoadStoreInfo = [&](unsigned PtrOp, bool IsStore, 1467 bool IsUnitStrided) { 1468 Info.opc = IsStore ? ISD::INTRINSIC_VOID : ISD::INTRINSIC_W_CHAIN; 1469 Info.ptrVal = I.getArgOperand(PtrOp); 1470 Type *MemTy; 1471 if (IsStore) { 1472 // Store value is the first operand. 1473 MemTy = I.getArgOperand(0)->getType(); 1474 } else { 1475 // Use return type. If it's segment load, return type is a struct. 1476 MemTy = I.getType(); 1477 if (MemTy->isStructTy()) 1478 MemTy = MemTy->getStructElementType(0); 1479 } 1480 if (!IsUnitStrided) 1481 MemTy = MemTy->getScalarType(); 1482 1483 Info.memVT = getValueType(DL, MemTy); 1484 Info.align = Align(DL.getTypeSizeInBits(MemTy->getScalarType()) / 8); 1485 Info.size = MemoryLocation::UnknownSize; 1486 Info.flags |= 1487 IsStore ? MachineMemOperand::MOStore : MachineMemOperand::MOLoad; 1488 return true; 1489 }; 1490 1491 if (I.getMetadata(LLVMContext::MD_nontemporal) != nullptr) 1492 Info.flags |= MachineMemOperand::MONonTemporal; 1493 1494 Info.flags |= RISCVTargetLowering::getTargetMMOFlags(I); 1495 switch (Intrinsic) { 1496 default: 1497 return false; 1498 case Intrinsic::riscv_masked_atomicrmw_xchg_i32: 1499 case Intrinsic::riscv_masked_atomicrmw_add_i32: 1500 case Intrinsic::riscv_masked_atomicrmw_sub_i32: 1501 case Intrinsic::riscv_masked_atomicrmw_nand_i32: 1502 case Intrinsic::riscv_masked_atomicrmw_max_i32: 1503 case Intrinsic::riscv_masked_atomicrmw_min_i32: 1504 case Intrinsic::riscv_masked_atomicrmw_umax_i32: 1505 case Intrinsic::riscv_masked_atomicrmw_umin_i32: 1506 case Intrinsic::riscv_masked_cmpxchg_i32: 1507 Info.opc = ISD::INTRINSIC_W_CHAIN; 1508 Info.memVT = MVT::i32; 1509 Info.ptrVal = I.getArgOperand(0); 1510 Info.offset = 0; 1511 Info.align = Align(4); 1512 Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore | 1513 MachineMemOperand::MOVolatile; 1514 return true; 1515 case Intrinsic::riscv_masked_strided_load: 1516 return SetRVVLoadStoreInfo(/*PtrOp*/ 1, /*IsStore*/ false, 1517 /*IsUnitStrided*/ false); 1518 case Intrinsic::riscv_masked_strided_store: 1519 return SetRVVLoadStoreInfo(/*PtrOp*/ 1, /*IsStore*/ true, 1520 /*IsUnitStrided*/ false); 1521 case Intrinsic::riscv_seg2_load: 1522 case Intrinsic::riscv_seg3_load: 1523 case Intrinsic::riscv_seg4_load: 1524 case Intrinsic::riscv_seg5_load: 1525 case Intrinsic::riscv_seg6_load: 1526 case Intrinsic::riscv_seg7_load: 1527 case Intrinsic::riscv_seg8_load: 1528 return SetRVVLoadStoreInfo(/*PtrOp*/ 0, /*IsStore*/ false, 1529 /*IsUnitStrided*/ false); 1530 case Intrinsic::riscv_seg2_store: 1531 case Intrinsic::riscv_seg3_store: 1532 case Intrinsic::riscv_seg4_store: 1533 case Intrinsic::riscv_seg5_store: 1534 case Intrinsic::riscv_seg6_store: 1535 case Intrinsic::riscv_seg7_store: 1536 case Intrinsic::riscv_seg8_store: 1537 // Operands are (vec, ..., vec, ptr, vl) 1538 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2, 1539 /*IsStore*/ true, 1540 /*IsUnitStrided*/ false); 1541 case Intrinsic::riscv_vle: 1542 case Intrinsic::riscv_vle_mask: 1543 case Intrinsic::riscv_vleff: 1544 case Intrinsic::riscv_vleff_mask: 1545 return SetRVVLoadStoreInfo(/*PtrOp*/ 1, 1546 /*IsStore*/ false, 1547 /*IsUnitStrided*/ true); 1548 case Intrinsic::riscv_vse: 1549 case Intrinsic::riscv_vse_mask: 1550 return SetRVVLoadStoreInfo(/*PtrOp*/ 1, 1551 /*IsStore*/ true, 1552 /*IsUnitStrided*/ true); 1553 case Intrinsic::riscv_vlse: 1554 case Intrinsic::riscv_vlse_mask: 1555 case Intrinsic::riscv_vloxei: 1556 case Intrinsic::riscv_vloxei_mask: 1557 case Intrinsic::riscv_vluxei: 1558 case Intrinsic::riscv_vluxei_mask: 1559 return SetRVVLoadStoreInfo(/*PtrOp*/ 1, 1560 /*IsStore*/ false, 1561 /*IsUnitStrided*/ false); 1562 case Intrinsic::riscv_vsse: 1563 case Intrinsic::riscv_vsse_mask: 1564 case Intrinsic::riscv_vsoxei: 1565 case Intrinsic::riscv_vsoxei_mask: 1566 case Intrinsic::riscv_vsuxei: 1567 case Intrinsic::riscv_vsuxei_mask: 1568 return SetRVVLoadStoreInfo(/*PtrOp*/ 1, 1569 /*IsStore*/ true, 1570 /*IsUnitStrided*/ false); 1571 case Intrinsic::riscv_vlseg2: 1572 case Intrinsic::riscv_vlseg3: 1573 case Intrinsic::riscv_vlseg4: 1574 case Intrinsic::riscv_vlseg5: 1575 case Intrinsic::riscv_vlseg6: 1576 case Intrinsic::riscv_vlseg7: 1577 case Intrinsic::riscv_vlseg8: 1578 case Intrinsic::riscv_vlseg2ff: 1579 case Intrinsic::riscv_vlseg3ff: 1580 case Intrinsic::riscv_vlseg4ff: 1581 case Intrinsic::riscv_vlseg5ff: 1582 case Intrinsic::riscv_vlseg6ff: 1583 case Intrinsic::riscv_vlseg7ff: 1584 case Intrinsic::riscv_vlseg8ff: 1585 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2, 1586 /*IsStore*/ false, 1587 /*IsUnitStrided*/ false); 1588 case Intrinsic::riscv_vlseg2_mask: 1589 case Intrinsic::riscv_vlseg3_mask: 1590 case Intrinsic::riscv_vlseg4_mask: 1591 case Intrinsic::riscv_vlseg5_mask: 1592 case Intrinsic::riscv_vlseg6_mask: 1593 case Intrinsic::riscv_vlseg7_mask: 1594 case Intrinsic::riscv_vlseg8_mask: 1595 case Intrinsic::riscv_vlseg2ff_mask: 1596 case Intrinsic::riscv_vlseg3ff_mask: 1597 case Intrinsic::riscv_vlseg4ff_mask: 1598 case Intrinsic::riscv_vlseg5ff_mask: 1599 case Intrinsic::riscv_vlseg6ff_mask: 1600 case Intrinsic::riscv_vlseg7ff_mask: 1601 case Intrinsic::riscv_vlseg8ff_mask: 1602 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4, 1603 /*IsStore*/ false, 1604 /*IsUnitStrided*/ false); 1605 case Intrinsic::riscv_vlsseg2: 1606 case Intrinsic::riscv_vlsseg3: 1607 case Intrinsic::riscv_vlsseg4: 1608 case Intrinsic::riscv_vlsseg5: 1609 case Intrinsic::riscv_vlsseg6: 1610 case Intrinsic::riscv_vlsseg7: 1611 case Intrinsic::riscv_vlsseg8: 1612 case Intrinsic::riscv_vloxseg2: 1613 case Intrinsic::riscv_vloxseg3: 1614 case Intrinsic::riscv_vloxseg4: 1615 case Intrinsic::riscv_vloxseg5: 1616 case Intrinsic::riscv_vloxseg6: 1617 case Intrinsic::riscv_vloxseg7: 1618 case Intrinsic::riscv_vloxseg8: 1619 case Intrinsic::riscv_vluxseg2: 1620 case Intrinsic::riscv_vluxseg3: 1621 case Intrinsic::riscv_vluxseg4: 1622 case Intrinsic::riscv_vluxseg5: 1623 case Intrinsic::riscv_vluxseg6: 1624 case Intrinsic::riscv_vluxseg7: 1625 case Intrinsic::riscv_vluxseg8: 1626 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3, 1627 /*IsStore*/ false, 1628 /*IsUnitStrided*/ false); 1629 case Intrinsic::riscv_vlsseg2_mask: 1630 case Intrinsic::riscv_vlsseg3_mask: 1631 case Intrinsic::riscv_vlsseg4_mask: 1632 case Intrinsic::riscv_vlsseg5_mask: 1633 case Intrinsic::riscv_vlsseg6_mask: 1634 case Intrinsic::riscv_vlsseg7_mask: 1635 case Intrinsic::riscv_vlsseg8_mask: 1636 case Intrinsic::riscv_vloxseg2_mask: 1637 case Intrinsic::riscv_vloxseg3_mask: 1638 case Intrinsic::riscv_vloxseg4_mask: 1639 case Intrinsic::riscv_vloxseg5_mask: 1640 case Intrinsic::riscv_vloxseg6_mask: 1641 case Intrinsic::riscv_vloxseg7_mask: 1642 case Intrinsic::riscv_vloxseg8_mask: 1643 case Intrinsic::riscv_vluxseg2_mask: 1644 case Intrinsic::riscv_vluxseg3_mask: 1645 case Intrinsic::riscv_vluxseg4_mask: 1646 case Intrinsic::riscv_vluxseg5_mask: 1647 case Intrinsic::riscv_vluxseg6_mask: 1648 case Intrinsic::riscv_vluxseg7_mask: 1649 case Intrinsic::riscv_vluxseg8_mask: 1650 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5, 1651 /*IsStore*/ false, 1652 /*IsUnitStrided*/ false); 1653 case Intrinsic::riscv_vsseg2: 1654 case Intrinsic::riscv_vsseg3: 1655 case Intrinsic::riscv_vsseg4: 1656 case Intrinsic::riscv_vsseg5: 1657 case Intrinsic::riscv_vsseg6: 1658 case Intrinsic::riscv_vsseg7: 1659 case Intrinsic::riscv_vsseg8: 1660 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2, 1661 /*IsStore*/ true, 1662 /*IsUnitStrided*/ false); 1663 case Intrinsic::riscv_vsseg2_mask: 1664 case Intrinsic::riscv_vsseg3_mask: 1665 case Intrinsic::riscv_vsseg4_mask: 1666 case Intrinsic::riscv_vsseg5_mask: 1667 case Intrinsic::riscv_vsseg6_mask: 1668 case Intrinsic::riscv_vsseg7_mask: 1669 case Intrinsic::riscv_vsseg8_mask: 1670 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3, 1671 /*IsStore*/ true, 1672 /*IsUnitStrided*/ false); 1673 case Intrinsic::riscv_vssseg2: 1674 case Intrinsic::riscv_vssseg3: 1675 case Intrinsic::riscv_vssseg4: 1676 case Intrinsic::riscv_vssseg5: 1677 case Intrinsic::riscv_vssseg6: 1678 case Intrinsic::riscv_vssseg7: 1679 case Intrinsic::riscv_vssseg8: 1680 case Intrinsic::riscv_vsoxseg2: 1681 case Intrinsic::riscv_vsoxseg3: 1682 case Intrinsic::riscv_vsoxseg4: 1683 case Intrinsic::riscv_vsoxseg5: 1684 case Intrinsic::riscv_vsoxseg6: 1685 case Intrinsic::riscv_vsoxseg7: 1686 case Intrinsic::riscv_vsoxseg8: 1687 case Intrinsic::riscv_vsuxseg2: 1688 case Intrinsic::riscv_vsuxseg3: 1689 case Intrinsic::riscv_vsuxseg4: 1690 case Intrinsic::riscv_vsuxseg5: 1691 case Intrinsic::riscv_vsuxseg6: 1692 case Intrinsic::riscv_vsuxseg7: 1693 case Intrinsic::riscv_vsuxseg8: 1694 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3, 1695 /*IsStore*/ true, 1696 /*IsUnitStrided*/ false); 1697 case Intrinsic::riscv_vssseg2_mask: 1698 case Intrinsic::riscv_vssseg3_mask: 1699 case Intrinsic::riscv_vssseg4_mask: 1700 case Intrinsic::riscv_vssseg5_mask: 1701 case Intrinsic::riscv_vssseg6_mask: 1702 case Intrinsic::riscv_vssseg7_mask: 1703 case Intrinsic::riscv_vssseg8_mask: 1704 case Intrinsic::riscv_vsoxseg2_mask: 1705 case Intrinsic::riscv_vsoxseg3_mask: 1706 case Intrinsic::riscv_vsoxseg4_mask: 1707 case Intrinsic::riscv_vsoxseg5_mask: 1708 case Intrinsic::riscv_vsoxseg6_mask: 1709 case Intrinsic::riscv_vsoxseg7_mask: 1710 case Intrinsic::riscv_vsoxseg8_mask: 1711 case Intrinsic::riscv_vsuxseg2_mask: 1712 case Intrinsic::riscv_vsuxseg3_mask: 1713 case Intrinsic::riscv_vsuxseg4_mask: 1714 case Intrinsic::riscv_vsuxseg5_mask: 1715 case Intrinsic::riscv_vsuxseg6_mask: 1716 case Intrinsic::riscv_vsuxseg7_mask: 1717 case Intrinsic::riscv_vsuxseg8_mask: 1718 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4, 1719 /*IsStore*/ true, 1720 /*IsUnitStrided*/ false); 1721 } 1722} 1723 1724bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL, 1725 const AddrMode &AM, Type *Ty, 1726 unsigned AS, 1727 Instruction *I) const { 1728 // No global is ever allowed as a base. 1729 if (AM.BaseGV) 1730 return false; 1731 1732 // RVV instructions only support register addressing. 1733 if (Subtarget.hasVInstructions() && isa<VectorType>(Ty)) 1734 return AM.HasBaseReg && AM.Scale == 0 && !AM.BaseOffs; 1735 1736 // Require a 12-bit signed offset. 1737 if (!isInt<12>(AM.BaseOffs)) 1738 return false; 1739 1740 switch (AM.Scale) { 1741 case 0: // "r+i" or just "i", depending on HasBaseReg. 1742 break; 1743 case 1: 1744 if (!AM.HasBaseReg) // allow "r+i". 1745 break; 1746 return false; // disallow "r+r" or "r+r+i". 1747 default: 1748 return false; 1749 } 1750 1751 return true; 1752} 1753 1754bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm) const { 1755 return isInt<12>(Imm); 1756} 1757 1758bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const { 1759 return isInt<12>(Imm); 1760} 1761 1762// On RV32, 64-bit integers are split into their high and low parts and held 1763// in two different registers, so the trunc is free since the low register can 1764// just be used. 1765// FIXME: Should we consider i64->i32 free on RV64 to match the EVT version of 1766// isTruncateFree? 1767bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const { 1768 if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy()) 1769 return false; 1770 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits(); 1771 unsigned DestBits = DstTy->getPrimitiveSizeInBits(); 1772 return (SrcBits == 64 && DestBits == 32); 1773} 1774 1775bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const { 1776 // We consider i64->i32 free on RV64 since we have good selection of W 1777 // instructions that make promoting operations back to i64 free in many cases. 1778 if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() || 1779 !DstVT.isInteger()) 1780 return false; 1781 unsigned SrcBits = SrcVT.getSizeInBits(); 1782 unsigned DestBits = DstVT.getSizeInBits(); 1783 return (SrcBits == 64 && DestBits == 32); 1784} 1785 1786bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { 1787 // Zexts are free if they can be combined with a load. 1788 // Don't advertise i32->i64 zextload as being free for RV64. It interacts 1789 // poorly with type legalization of compares preferring sext. 1790 if (auto *LD = dyn_cast<LoadSDNode>(Val)) { 1791 EVT MemVT = LD->getMemoryVT(); 1792 if ((MemVT == MVT::i8 || MemVT == MVT::i16) && 1793 (LD->getExtensionType() == ISD::NON_EXTLOAD || 1794 LD->getExtensionType() == ISD::ZEXTLOAD)) 1795 return true; 1796 } 1797 1798 return TargetLowering::isZExtFree(Val, VT2); 1799} 1800 1801bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const { 1802 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64; 1803} 1804 1805bool RISCVTargetLowering::signExtendConstant(const ConstantInt *CI) const { 1806 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32); 1807} 1808 1809bool RISCVTargetLowering::isCheapToSpeculateCttz(Type *Ty) const { 1810 return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXCVbitmanip(); 1811} 1812 1813bool RISCVTargetLowering::isCheapToSpeculateCtlz(Type *Ty) const { 1814 return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() || 1815 Subtarget.hasVendorXCVbitmanip(); 1816} 1817 1818bool RISCVTargetLowering::isMaskAndCmp0FoldingBeneficial( 1819 const Instruction &AndI) const { 1820 // We expect to be able to match a bit extraction instruction if the Zbs 1821 // extension is supported and the mask is a power of two. However, we 1822 // conservatively return false if the mask would fit in an ANDI instruction, 1823 // on the basis that it's possible the sinking+duplication of the AND in 1824 // CodeGenPrepare triggered by this hook wouldn't decrease the instruction 1825 // count and would increase code size (e.g. ANDI+BNEZ => BEXTI+BNEZ). 1826 if (!Subtarget.hasStdExtZbs() && !Subtarget.hasVendorXTHeadBs()) 1827 return false; 1828 ConstantInt *Mask = dyn_cast<ConstantInt>(AndI.getOperand(1)); 1829 if (!Mask) 1830 return false; 1831 return !Mask->getValue().isSignedIntN(12) && Mask->getValue().isPowerOf2(); 1832} 1833 1834bool RISCVTargetLowering::hasAndNotCompare(SDValue Y) const { 1835 EVT VT = Y.getValueType(); 1836 1837 // FIXME: Support vectors once we have tests. 1838 if (VT.isVector()) 1839 return false; 1840 1841 return (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) && 1842 !isa<ConstantSDNode>(Y); 1843} 1844 1845bool RISCVTargetLowering::hasBitTest(SDValue X, SDValue Y) const { 1846 // Zbs provides BEXT[_I], which can be used with SEQZ/SNEZ as a bit test. 1847 if (Subtarget.hasStdExtZbs()) 1848 return X.getValueType().isScalarInteger(); 1849 auto *C = dyn_cast<ConstantSDNode>(Y); 1850 // XTheadBs provides th.tst (similar to bexti), if Y is a constant 1851 if (Subtarget.hasVendorXTHeadBs()) 1852 return C != nullptr; 1853 // We can use ANDI+SEQZ/SNEZ as a bit test. Y contains the bit position. 1854 return C && C->getAPIntValue().ule(10); 1855} 1856 1857bool RISCVTargetLowering::shouldFoldSelectWithIdentityConstant(unsigned Opcode, 1858 EVT VT) const { 1859 // Only enable for rvv. 1860 if (!VT.isVector() || !Subtarget.hasVInstructions()) 1861 return false; 1862 1863 if (VT.isFixedLengthVector() && !isTypeLegal(VT)) 1864 return false; 1865 1866 return true; 1867} 1868 1869bool RISCVTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, 1870 Type *Ty) const { 1871 assert(Ty->isIntegerTy()); 1872 1873 unsigned BitSize = Ty->getIntegerBitWidth(); 1874 if (BitSize > Subtarget.getXLen()) 1875 return false; 1876 1877 // Fast path, assume 32-bit immediates are cheap. 1878 int64_t Val = Imm.getSExtValue(); 1879 if (isInt<32>(Val)) 1880 return true; 1881 1882 // A constant pool entry may be more aligned thant he load we're trying to 1883 // replace. If we don't support unaligned scalar mem, prefer the constant 1884 // pool. 1885 // TODO: Can the caller pass down the alignment? 1886 if (!Subtarget.hasFastUnalignedAccess() && 1887 !Subtarget.enableUnalignedScalarMem()) 1888 return true; 1889 1890 // Prefer to keep the load if it would require many instructions. 1891 // This uses the same threshold we use for constant pools but doesn't 1892 // check useConstantPoolForLargeInts. 1893 // TODO: Should we keep the load only when we're definitely going to emit a 1894 // constant pool? 1895 1896 RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Val, Subtarget); 1897 return Seq.size() <= Subtarget.getMaxBuildIntsCost(); 1898} 1899 1900bool RISCVTargetLowering:: 1901 shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( 1902 SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, 1903 unsigned OldShiftOpcode, unsigned NewShiftOpcode, 1904 SelectionDAG &DAG) const { 1905 // One interesting pattern that we'd want to form is 'bit extract': 1906 // ((1 >> Y) & 1) ==/!= 0 1907 // But we also need to be careful not to try to reverse that fold. 1908 1909 // Is this '((1 >> Y) & 1)'? 1910 if (XC && OldShiftOpcode == ISD::SRL && XC->isOne()) 1911 return false; // Keep the 'bit extract' pattern. 1912 1913 // Will this be '((1 >> Y) & 1)' after the transform? 1914 if (NewShiftOpcode == ISD::SRL && CC->isOne()) 1915 return true; // Do form the 'bit extract' pattern. 1916 1917 // If 'X' is a constant, and we transform, then we will immediately 1918 // try to undo the fold, thus causing endless combine loop. 1919 // So only do the transform if X is not a constant. This matches the default 1920 // implementation of this function. 1921 return !XC; 1922} 1923 1924bool RISCVTargetLowering::canSplatOperand(unsigned Opcode, int Operand) const { 1925 switch (Opcode) { 1926 case Instruction::Add: 1927 case Instruction::Sub: 1928 case Instruction::Mul: 1929 case Instruction::And: 1930 case Instruction::Or: 1931 case Instruction::Xor: 1932 case Instruction::FAdd: 1933 case Instruction::FSub: 1934 case Instruction::FMul: 1935 case Instruction::FDiv: 1936 case Instruction::ICmp: 1937 case Instruction::FCmp: 1938 return true; 1939 case Instruction::Shl: 1940 case Instruction::LShr: 1941 case Instruction::AShr: 1942 case Instruction::UDiv: 1943 case Instruction::SDiv: 1944 case Instruction::URem: 1945 case Instruction::SRem: 1946 return Operand == 1; 1947 default: 1948 return false; 1949 } 1950} 1951 1952 1953bool RISCVTargetLowering::canSplatOperand(Instruction *I, int Operand) const { 1954 if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions()) 1955 return false; 1956 1957 if (canSplatOperand(I->getOpcode(), Operand)) 1958 return true; 1959 1960 auto *II = dyn_cast<IntrinsicInst>(I); 1961 if (!II) 1962 return false; 1963 1964 switch (II->getIntrinsicID()) { 1965 case Intrinsic::fma: 1966 case Intrinsic::vp_fma: 1967 return Operand == 0 || Operand == 1; 1968 case Intrinsic::vp_shl: 1969 case Intrinsic::vp_lshr: 1970 case Intrinsic::vp_ashr: 1971 case Intrinsic::vp_udiv: 1972 case Intrinsic::vp_sdiv: 1973 case Intrinsic::vp_urem: 1974 case Intrinsic::vp_srem: 1975 return Operand == 1; 1976 // These intrinsics are commutative. 1977 case Intrinsic::vp_add: 1978 case Intrinsic::vp_mul: 1979 case Intrinsic::vp_and: 1980 case Intrinsic::vp_or: 1981 case Intrinsic::vp_xor: 1982 case Intrinsic::vp_fadd: 1983 case Intrinsic::vp_fmul: 1984 case Intrinsic::vp_icmp: 1985 case Intrinsic::vp_fcmp: 1986 // These intrinsics have 'vr' versions. 1987 case Intrinsic::vp_sub: 1988 case Intrinsic::vp_fsub: 1989 case Intrinsic::vp_fdiv: 1990 return Operand == 0 || Operand == 1; 1991 default: 1992 return false; 1993 } 1994} 1995 1996/// Check if sinking \p I's operands to I's basic block is profitable, because 1997/// the operands can be folded into a target instruction, e.g. 1998/// splats of scalars can fold into vector instructions. 1999bool RISCVTargetLowering::shouldSinkOperands( 2000 Instruction *I, SmallVectorImpl<Use *> &Ops) const { 2001 using namespace llvm::PatternMatch; 2002 2003 if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions()) 2004 return false; 2005 2006 for (auto OpIdx : enumerate(I->operands())) { 2007 if (!canSplatOperand(I, OpIdx.index())) 2008 continue; 2009 2010 Instruction *Op = dyn_cast<Instruction>(OpIdx.value().get()); 2011 // Make sure we are not already sinking this operand 2012 if (!Op || any_of(Ops, [&](Use *U) { return U->get() == Op; })) 2013 continue; 2014 2015 // We are looking for a splat that can be sunk. 2016 if (!match(Op, m_Shuffle(m_InsertElt(m_Undef(), m_Value(), m_ZeroInt()), 2017 m_Undef(), m_ZeroMask()))) 2018 continue; 2019 2020 // Don't sink i1 splats. 2021 if (cast<VectorType>(Op->getType())->getElementType()->isIntegerTy(1)) 2022 continue; 2023 2024 // All uses of the shuffle should be sunk to avoid duplicating it across gpr 2025 // and vector registers 2026 for (Use &U : Op->uses()) { 2027 Instruction *Insn = cast<Instruction>(U.getUser()); 2028 if (!canSplatOperand(Insn, U.getOperandNo())) 2029 return false; 2030 } 2031 2032 Ops.push_back(&Op->getOperandUse(0)); 2033 Ops.push_back(&OpIdx.value()); 2034 } 2035 return true; 2036} 2037 2038bool RISCVTargetLowering::shouldScalarizeBinop(SDValue VecOp) const { 2039 unsigned Opc = VecOp.getOpcode(); 2040 2041 // Assume target opcodes can't be scalarized. 2042 // TODO - do we have any exceptions? 2043 if (Opc >= ISD::BUILTIN_OP_END) 2044 return false; 2045 2046 // If the vector op is not supported, try to convert to scalar. 2047 EVT VecVT = VecOp.getValueType(); 2048 if (!isOperationLegalOrCustomOrPromote(Opc, VecVT)) 2049 return true; 2050 2051 // If the vector op is supported, but the scalar op is not, the transform may 2052 // not be worthwhile. 2053 // Permit a vector binary operation can be converted to scalar binary 2054 // operation which is custom lowered with illegal type. 2055 EVT ScalarVT = VecVT.getScalarType(); 2056 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT) || 2057 isOperationCustom(Opc, ScalarVT); 2058} 2059 2060bool RISCVTargetLowering::isOffsetFoldingLegal( 2061 const GlobalAddressSDNode *GA) const { 2062 // In order to maximise the opportunity for common subexpression elimination, 2063 // keep a separate ADD node for the global address offset instead of folding 2064 // it in the global address node. Later peephole optimisations may choose to 2065 // fold it back in when profitable. 2066 return false; 2067} 2068 2069// Return one of the followings: 2070// (1) `{0-31 value, false}` if FLI is available for Imm's type and FP value. 2071// (2) `{0-31 value, true}` if Imm is negative and FLI is available for its 2072// positive counterpart, which will be materialized from the first returned 2073// element. The second returned element indicated that there should be a FNEG 2074// followed. 2075// (3) `{-1, _}` if there is no way FLI can be used to materialize Imm. 2076std::pair<int, bool> RISCVTargetLowering::getLegalZfaFPImm(const APFloat &Imm, 2077 EVT VT) const { 2078 if (!Subtarget.hasStdExtZfa()) 2079 return std::make_pair(-1, false); 2080 2081 bool IsSupportedVT = false; 2082 if (VT == MVT::f16) { 2083 IsSupportedVT = Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZvfh(); 2084 } else if (VT == MVT::f32) { 2085 IsSupportedVT = true; 2086 } else if (VT == MVT::f64) { 2087 assert(Subtarget.hasStdExtD() && "Expect D extension"); 2088 IsSupportedVT = true; 2089 } 2090 2091 if (!IsSupportedVT) 2092 return std::make_pair(-1, false); 2093 2094 int Index = RISCVLoadFPImm::getLoadFPImm(Imm); 2095 if (Index < 0 && Imm.isNegative()) 2096 // Try the combination of its positive counterpart + FNEG. 2097 return std::make_pair(RISCVLoadFPImm::getLoadFPImm(-Imm), true); 2098 else 2099 return std::make_pair(Index, false); 2100} 2101 2102bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, 2103 bool ForCodeSize) const { 2104 bool IsLegalVT = false; 2105 if (VT == MVT::f16) 2106 IsLegalVT = Subtarget.hasStdExtZfhminOrZhinxmin(); 2107 else if (VT == MVT::f32) 2108 IsLegalVT = Subtarget.hasStdExtFOrZfinx(); 2109 else if (VT == MVT::f64) 2110 IsLegalVT = Subtarget.hasStdExtDOrZdinx(); 2111 else if (VT == MVT::bf16) 2112 IsLegalVT = Subtarget.hasStdExtZfbfmin(); 2113 2114 if (!IsLegalVT) 2115 return false; 2116 2117 if (getLegalZfaFPImm(Imm, VT).first >= 0) 2118 return true; 2119 2120 // Cannot create a 64 bit floating-point immediate value for rv32. 2121 if (Subtarget.getXLen() < VT.getScalarSizeInBits()) { 2122 // td can handle +0.0 or -0.0 already. 2123 // -0.0 can be created by fmv + fneg. 2124 return Imm.isZero(); 2125 } 2126 2127 // Special case: fmv + fneg 2128 if (Imm.isNegZero()) 2129 return true; 2130 2131 // Building an integer and then converting requires a fmv at the end of 2132 // the integer sequence. 2133 const int Cost = 2134 1 + RISCVMatInt::getIntMatCost(Imm.bitcastToAPInt(), Subtarget.getXLen(), 2135 Subtarget); 2136 return Cost <= FPImmCost; 2137} 2138 2139// TODO: This is very conservative. 2140bool RISCVTargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, 2141 unsigned Index) const { 2142 if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT)) 2143 return false; 2144 2145 // Only support extracting a fixed from a fixed vector for now. 2146 if (ResVT.isScalableVector() || SrcVT.isScalableVector()) 2147 return false; 2148 2149 unsigned ResElts = ResVT.getVectorNumElements(); 2150 unsigned SrcElts = SrcVT.getVectorNumElements(); 2151 2152 // Convervatively only handle extracting half of a vector. 2153 // TODO: Relax this. 2154 if ((ResElts * 2) != SrcElts) 2155 return false; 2156 2157 // The smallest type we can slide is i8. 2158 // TODO: We can extract index 0 from a mask vector without a slide. 2159 if (ResVT.getVectorElementType() == MVT::i1) 2160 return false; 2161 2162 // Slide can support arbitrary index, but we only treat vslidedown.vi as 2163 // cheap. 2164 if (Index >= 32) 2165 return false; 2166 2167 // TODO: We can do arbitrary slidedowns, but for now only support extracting 2168 // the upper half of a vector until we have more test coverage. 2169 return Index == 0 || Index == ResElts; 2170} 2171 2172MVT RISCVTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context, 2173 CallingConv::ID CC, 2174 EVT VT) const { 2175 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled. 2176 // We might still end up using a GPR but that will be decided based on ABI. 2177 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() && 2178 !Subtarget.hasStdExtZfhminOrZhinxmin()) 2179 return MVT::f32; 2180 2181 MVT PartVT = TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT); 2182 2183 if (RV64LegalI32 && Subtarget.is64Bit() && PartVT == MVT::i32) 2184 return MVT::i64; 2185 2186 return PartVT; 2187} 2188 2189unsigned RISCVTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context, 2190 CallingConv::ID CC, 2191 EVT VT) const { 2192 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled. 2193 // We might still end up using a GPR but that will be decided based on ABI. 2194 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() && 2195 !Subtarget.hasStdExtZfhminOrZhinxmin()) 2196 return 1; 2197 2198 return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT); 2199} 2200 2201unsigned RISCVTargetLowering::getVectorTypeBreakdownForCallingConv( 2202 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, 2203 unsigned &NumIntermediates, MVT &RegisterVT) const { 2204 unsigned NumRegs = TargetLowering::getVectorTypeBreakdownForCallingConv( 2205 Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT); 2206 2207 if (RV64LegalI32 && Subtarget.is64Bit() && IntermediateVT == MVT::i32) 2208 IntermediateVT = MVT::i64; 2209 2210 if (RV64LegalI32 && Subtarget.is64Bit() && RegisterVT == MVT::i32) 2211 RegisterVT = MVT::i64; 2212 2213 return NumRegs; 2214} 2215 2216// Changes the condition code and swaps operands if necessary, so the SetCC 2217// operation matches one of the comparisons supported directly by branches 2218// in the RISC-V ISA. May adjust compares to favor compare with 0 over compare 2219// with 1/-1. 2220static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, 2221 ISD::CondCode &CC, SelectionDAG &DAG) { 2222 // If this is a single bit test that can't be handled by ANDI, shift the 2223 // bit to be tested to the MSB and perform a signed compare with 0. 2224 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) && 2225 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() && 2226 isa<ConstantSDNode>(LHS.getOperand(1))) { 2227 uint64_t Mask = LHS.getConstantOperandVal(1); 2228 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) { 2229 unsigned ShAmt = 0; 2230 if (isPowerOf2_64(Mask)) { 2231 CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT; 2232 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask); 2233 } else { 2234 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask); 2235 } 2236 2237 LHS = LHS.getOperand(0); 2238 if (ShAmt != 0) 2239 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS, 2240 DAG.getConstant(ShAmt, DL, LHS.getValueType())); 2241 return; 2242 } 2243 } 2244 2245 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) { 2246 int64_t C = RHSC->getSExtValue(); 2247 switch (CC) { 2248 default: break; 2249 case ISD::SETGT: 2250 // Convert X > -1 to X >= 0. 2251 if (C == -1) { 2252 RHS = DAG.getConstant(0, DL, RHS.getValueType()); 2253 CC = ISD::SETGE; 2254 return; 2255 } 2256 break; 2257 case ISD::SETLT: 2258 // Convert X < 1 to 0 >= X. 2259 if (C == 1) { 2260 RHS = LHS; 2261 LHS = DAG.getConstant(0, DL, RHS.getValueType()); 2262 CC = ISD::SETGE; 2263 return; 2264 } 2265 break; 2266 } 2267 } 2268 2269 switch (CC) { 2270 default: 2271 break; 2272 case ISD::SETGT: 2273 case ISD::SETLE: 2274 case ISD::SETUGT: 2275 case ISD::SETULE: 2276 CC = ISD::getSetCCSwappedOperands(CC); 2277 std::swap(LHS, RHS); 2278 break; 2279 } 2280} 2281 2282RISCVII::VLMUL RISCVTargetLowering::getLMUL(MVT VT) { 2283 assert(VT.isScalableVector() && "Expecting a scalable vector type"); 2284 unsigned KnownSize = VT.getSizeInBits().getKnownMinValue(); 2285 if (VT.getVectorElementType() == MVT::i1) 2286 KnownSize *= 8; 2287 2288 switch (KnownSize) { 2289 default: 2290 llvm_unreachable("Invalid LMUL."); 2291 case 8: 2292 return RISCVII::VLMUL::LMUL_F8; 2293 case 16: 2294 return RISCVII::VLMUL::LMUL_F4; 2295 case 32: 2296 return RISCVII::VLMUL::LMUL_F2; 2297 case 64: 2298 return RISCVII::VLMUL::LMUL_1; 2299 case 128: 2300 return RISCVII::VLMUL::LMUL_2; 2301 case 256: 2302 return RISCVII::VLMUL::LMUL_4; 2303 case 512: 2304 return RISCVII::VLMUL::LMUL_8; 2305 } 2306} 2307 2308unsigned RISCVTargetLowering::getRegClassIDForLMUL(RISCVII::VLMUL LMul) { 2309 switch (LMul) { 2310 default: 2311 llvm_unreachable("Invalid LMUL."); 2312 case RISCVII::VLMUL::LMUL_F8: 2313 case RISCVII::VLMUL::LMUL_F4: 2314 case RISCVII::VLMUL::LMUL_F2: 2315 case RISCVII::VLMUL::LMUL_1: 2316 return RISCV::VRRegClassID; 2317 case RISCVII::VLMUL::LMUL_2: 2318 return RISCV::VRM2RegClassID; 2319 case RISCVII::VLMUL::LMUL_4: 2320 return RISCV::VRM4RegClassID; 2321 case RISCVII::VLMUL::LMUL_8: 2322 return RISCV::VRM8RegClassID; 2323 } 2324} 2325 2326unsigned RISCVTargetLowering::getSubregIndexByMVT(MVT VT, unsigned Index) { 2327 RISCVII::VLMUL LMUL = getLMUL(VT); 2328 if (LMUL == RISCVII::VLMUL::LMUL_F8 || 2329 LMUL == RISCVII::VLMUL::LMUL_F4 || 2330 LMUL == RISCVII::VLMUL::LMUL_F2 || 2331 LMUL == RISCVII::VLMUL::LMUL_1) { 2332 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7, 2333 "Unexpected subreg numbering"); 2334 return RISCV::sub_vrm1_0 + Index; 2335 } 2336 if (LMUL == RISCVII::VLMUL::LMUL_2) { 2337 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3, 2338 "Unexpected subreg numbering"); 2339 return RISCV::sub_vrm2_0 + Index; 2340 } 2341 if (LMUL == RISCVII::VLMUL::LMUL_4) { 2342 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1, 2343 "Unexpected subreg numbering"); 2344 return RISCV::sub_vrm4_0 + Index; 2345 } 2346 llvm_unreachable("Invalid vector type."); 2347} 2348 2349unsigned RISCVTargetLowering::getRegClassIDForVecVT(MVT VT) { 2350 if (VT.getVectorElementType() == MVT::i1) 2351 return RISCV::VRRegClassID; 2352 return getRegClassIDForLMUL(getLMUL(VT)); 2353} 2354 2355// Attempt to decompose a subvector insert/extract between VecVT and 2356// SubVecVT via subregister indices. Returns the subregister index that 2357// can perform the subvector insert/extract with the given element index, as 2358// well as the index corresponding to any leftover subvectors that must be 2359// further inserted/extracted within the register class for SubVecVT. 2360std::pair<unsigned, unsigned> 2361RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( 2362 MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, 2363 const RISCVRegisterInfo *TRI) { 2364 static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID && 2365 RISCV::VRM4RegClassID > RISCV::VRM2RegClassID && 2366 RISCV::VRM2RegClassID > RISCV::VRRegClassID), 2367 "Register classes not ordered"); 2368 unsigned VecRegClassID = getRegClassIDForVecVT(VecVT); 2369 unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT); 2370 // Try to compose a subregister index that takes us from the incoming 2371 // LMUL>1 register class down to the outgoing one. At each step we half 2372 // the LMUL: 2373 // nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0 2374 // Note that this is not guaranteed to find a subregister index, such as 2375 // when we are extracting from one VR type to another. 2376 unsigned SubRegIdx = RISCV::NoSubRegister; 2377 for (const unsigned RCID : 2378 {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID}) 2379 if (VecRegClassID > RCID && SubRegClassID <= RCID) { 2380 VecVT = VecVT.getHalfNumVectorElementsVT(); 2381 bool IsHi = 2382 InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue(); 2383 SubRegIdx = TRI->composeSubRegIndices(SubRegIdx, 2384 getSubregIndexByMVT(VecVT, IsHi)); 2385 if (IsHi) 2386 InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue(); 2387 } 2388 return {SubRegIdx, InsertExtractIdx}; 2389} 2390 2391// Permit combining of mask vectors as BUILD_VECTOR never expands to scalar 2392// stores for those types. 2393bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const { 2394 return !Subtarget.useRVVForFixedLengthVectors() || 2395 (VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1); 2396} 2397 2398bool RISCVTargetLowering::isLegalElementTypeForRVV(EVT ScalarTy) const { 2399 if (!ScalarTy.isSimple()) 2400 return false; 2401 switch (ScalarTy.getSimpleVT().SimpleTy) { 2402 case MVT::iPTR: 2403 return Subtarget.is64Bit() ? Subtarget.hasVInstructionsI64() : true; 2404 case MVT::i8: 2405 case MVT::i16: 2406 case MVT::i32: 2407 return true; 2408 case MVT::i64: 2409 return Subtarget.hasVInstructionsI64(); 2410 case MVT::f16: 2411 return Subtarget.hasVInstructionsF16(); 2412 case MVT::f32: 2413 return Subtarget.hasVInstructionsF32(); 2414 case MVT::f64: 2415 return Subtarget.hasVInstructionsF64(); 2416 default: 2417 return false; 2418 } 2419} 2420 2421 2422unsigned RISCVTargetLowering::combineRepeatedFPDivisors() const { 2423 return NumRepeatedDivisors; 2424} 2425 2426static SDValue getVLOperand(SDValue Op) { 2427 assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || 2428 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) && 2429 "Unexpected opcode"); 2430 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN; 2431 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0); 2432 const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II = 2433 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo); 2434 if (!II) 2435 return SDValue(); 2436 return Op.getOperand(II->VLOperand + 1 + HasChain); 2437} 2438 2439static bool useRVVForFixedLengthVectorVT(MVT VT, 2440 const RISCVSubtarget &Subtarget) { 2441 assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!"); 2442 if (!Subtarget.useRVVForFixedLengthVectors()) 2443 return false; 2444 2445 // We only support a set of vector types with a consistent maximum fixed size 2446 // across all supported vector element types to avoid legalization issues. 2447 // Therefore -- since the largest is v1024i8/v512i16/etc -- the largest 2448 // fixed-length vector type we support is 1024 bytes. 2449 if (VT.getFixedSizeInBits() > 1024 * 8) 2450 return false; 2451 2452 unsigned MinVLen = Subtarget.getRealMinVLen(); 2453 2454 MVT EltVT = VT.getVectorElementType(); 2455 2456 // Don't use RVV for vectors we cannot scalarize if required. 2457 switch (EltVT.SimpleTy) { 2458 // i1 is supported but has different rules. 2459 default: 2460 return false; 2461 case MVT::i1: 2462 // Masks can only use a single register. 2463 if (VT.getVectorNumElements() > MinVLen) 2464 return false; 2465 MinVLen /= 8; 2466 break; 2467 case MVT::i8: 2468 case MVT::i16: 2469 case MVT::i32: 2470 break; 2471 case MVT::i64: 2472 if (!Subtarget.hasVInstructionsI64()) 2473 return false; 2474 break; 2475 case MVT::f16: 2476 if (!Subtarget.hasVInstructionsF16Minimal()) 2477 return false; 2478 break; 2479 case MVT::f32: 2480 if (!Subtarget.hasVInstructionsF32()) 2481 return false; 2482 break; 2483 case MVT::f64: 2484 if (!Subtarget.hasVInstructionsF64()) 2485 return false; 2486 break; 2487 } 2488 2489 // Reject elements larger than ELEN. 2490 if (EltVT.getSizeInBits() > Subtarget.getELen()) 2491 return false; 2492 2493 unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen); 2494 // Don't use RVV for types that don't fit. 2495 if (LMul > Subtarget.getMaxLMULForFixedLengthVectors()) 2496 return false; 2497 2498 // TODO: Perhaps an artificial restriction, but worth having whilst getting 2499 // the base fixed length RVV support in place. 2500 if (!VT.isPow2VectorType()) 2501 return false; 2502 2503 return true; 2504} 2505 2506bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const { 2507 return ::useRVVForFixedLengthVectorVT(VT, Subtarget); 2508} 2509 2510// Return the largest legal scalable vector type that matches VT's element type. 2511static MVT getContainerForFixedLengthVector(const TargetLowering &TLI, MVT VT, 2512 const RISCVSubtarget &Subtarget) { 2513 // This may be called before legal types are setup. 2514 assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) || 2515 useRVVForFixedLengthVectorVT(VT, Subtarget)) && 2516 "Expected legal fixed length vector!"); 2517 2518 unsigned MinVLen = Subtarget.getRealMinVLen(); 2519 unsigned MaxELen = Subtarget.getELen(); 2520 2521 MVT EltVT = VT.getVectorElementType(); 2522 switch (EltVT.SimpleTy) { 2523 default: 2524 llvm_unreachable("unexpected element type for RVV container"); 2525 case MVT::i1: 2526 case MVT::i8: 2527 case MVT::i16: 2528 case MVT::i32: 2529 case MVT::i64: 2530 case MVT::f16: 2531 case MVT::f32: 2532 case MVT::f64: { 2533 // We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for 2534 // narrower types. The smallest fractional LMUL we support is 8/ELEN. Within 2535 // each fractional LMUL we support SEW between 8 and LMUL*ELEN. 2536 unsigned NumElts = 2537 (VT.getVectorNumElements() * RISCV::RVVBitsPerBlock) / MinVLen; 2538 NumElts = std::max(NumElts, RISCV::RVVBitsPerBlock / MaxELen); 2539 assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts"); 2540 return MVT::getScalableVectorVT(EltVT, NumElts); 2541 } 2542 } 2543} 2544 2545static MVT getContainerForFixedLengthVector(SelectionDAG &DAG, MVT VT, 2546 const RISCVSubtarget &Subtarget) { 2547 return getContainerForFixedLengthVector(DAG.getTargetLoweringInfo(), VT, 2548 Subtarget); 2549} 2550 2551MVT RISCVTargetLowering::getContainerForFixedLengthVector(MVT VT) const { 2552 return ::getContainerForFixedLengthVector(*this, VT, getSubtarget()); 2553} 2554 2555// Grow V to consume an entire RVV register. 2556static SDValue convertToScalableVector(EVT VT, SDValue V, SelectionDAG &DAG, 2557 const RISCVSubtarget &Subtarget) { 2558 assert(VT.isScalableVector() && 2559 "Expected to convert into a scalable vector!"); 2560 assert(V.getValueType().isFixedLengthVector() && 2561 "Expected a fixed length vector operand!"); 2562 SDLoc DL(V); 2563 SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT()); 2564 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero); 2565} 2566 2567// Shrink V so it's just big enough to maintain a VT's worth of data. 2568static SDValue convertFromScalableVector(EVT VT, SDValue V, SelectionDAG &DAG, 2569 const RISCVSubtarget &Subtarget) { 2570 assert(VT.isFixedLengthVector() && 2571 "Expected to convert into a fixed length vector!"); 2572 assert(V.getValueType().isScalableVector() && 2573 "Expected a scalable vector operand!"); 2574 SDLoc DL(V); 2575 SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT()); 2576 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero); 2577} 2578 2579/// Return the type of the mask type suitable for masking the provided 2580/// vector type. This is simply an i1 element type vector of the same 2581/// (possibly scalable) length. 2582static MVT getMaskTypeFor(MVT VecVT) { 2583 assert(VecVT.isVector()); 2584 ElementCount EC = VecVT.getVectorElementCount(); 2585 return MVT::getVectorVT(MVT::i1, EC); 2586} 2587 2588/// Creates an all ones mask suitable for masking a vector of type VecTy with 2589/// vector length VL. . 2590static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL, 2591 SelectionDAG &DAG) { 2592 MVT MaskVT = getMaskTypeFor(VecVT); 2593 return DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); 2594} 2595 2596static SDValue getVLOp(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL, 2597 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { 2598 // If we know the exact VLEN, and our VL is exactly equal to VLMAX, 2599 // canonicalize the representation. InsertVSETVLI will pick the immediate 2600 // encoding later if profitable. 2601 const auto [MinVLMAX, MaxVLMAX] = 2602 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget); 2603 if (MinVLMAX == MaxVLMAX && NumElts == MinVLMAX) 2604 return DAG.getRegister(RISCV::X0, Subtarget.getXLenVT()); 2605 2606 return DAG.getConstant(NumElts, DL, Subtarget.getXLenVT()); 2607} 2608 2609static std::pair<SDValue, SDValue> 2610getDefaultScalableVLOps(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG, 2611 const RISCVSubtarget &Subtarget) { 2612 assert(VecVT.isScalableVector() && "Expecting a scalable vector"); 2613 SDValue VL = DAG.getRegister(RISCV::X0, Subtarget.getXLenVT()); 2614 SDValue Mask = getAllOnesMask(VecVT, VL, DL, DAG); 2615 return {Mask, VL}; 2616} 2617 2618static std::pair<SDValue, SDValue> 2619getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL, 2620 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { 2621 assert(ContainerVT.isScalableVector() && "Expecting scalable container type"); 2622 SDValue VL = getVLOp(NumElts, ContainerVT, DL, DAG, Subtarget); 2623 SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG); 2624 return {Mask, VL}; 2625} 2626 2627// Gets the two common "VL" operands: an all-ones mask and the vector length. 2628// VecVT is a vector type, either fixed-length or scalable, and ContainerVT is 2629// the vector type that the fixed-length vector is contained in. Otherwise if 2630// VecVT is scalable, then ContainerVT should be the same as VecVT. 2631static std::pair<SDValue, SDValue> 2632getDefaultVLOps(MVT VecVT, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG, 2633 const RISCVSubtarget &Subtarget) { 2634 if (VecVT.isFixedLengthVector()) 2635 return getDefaultVLOps(VecVT.getVectorNumElements(), ContainerVT, DL, DAG, 2636 Subtarget); 2637 assert(ContainerVT.isScalableVector() && "Expecting scalable container type"); 2638 return getDefaultScalableVLOps(ContainerVT, DL, DAG, Subtarget); 2639} 2640 2641SDValue RISCVTargetLowering::computeVLMax(MVT VecVT, const SDLoc &DL, 2642 SelectionDAG &DAG) const { 2643 assert(VecVT.isScalableVector() && "Expected scalable vector"); 2644 return DAG.getElementCount(DL, Subtarget.getXLenVT(), 2645 VecVT.getVectorElementCount()); 2646} 2647 2648std::pair<unsigned, unsigned> 2649RISCVTargetLowering::computeVLMAXBounds(MVT VecVT, 2650 const RISCVSubtarget &Subtarget) { 2651 assert(VecVT.isScalableVector() && "Expected scalable vector"); 2652 2653 unsigned EltSize = VecVT.getScalarSizeInBits(); 2654 unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue(); 2655 2656 unsigned VectorBitsMax = Subtarget.getRealMaxVLen(); 2657 unsigned MaxVLMAX = 2658 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize); 2659 2660 unsigned VectorBitsMin = Subtarget.getRealMinVLen(); 2661 unsigned MinVLMAX = 2662 RISCVTargetLowering::computeVLMAX(VectorBitsMin, EltSize, MinSize); 2663 2664 return std::make_pair(MinVLMAX, MaxVLMAX); 2665} 2666 2667// The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few 2668// of either is (currently) supported. This can get us into an infinite loop 2669// where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR 2670// as a ..., etc. 2671// Until either (or both) of these can reliably lower any node, reporting that 2672// we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks 2673// the infinite loop. Note that this lowers BUILD_VECTOR through the stack, 2674// which is not desirable. 2675bool RISCVTargetLowering::shouldExpandBuildVectorWithShuffles( 2676 EVT VT, unsigned DefinedValues) const { 2677 return false; 2678} 2679 2680InstructionCost RISCVTargetLowering::getLMULCost(MVT VT) const { 2681 // TODO: Here assume reciprocal throughput is 1 for LMUL_1, it is 2682 // implementation-defined. 2683 if (!VT.isVector()) 2684 return InstructionCost::getInvalid(); 2685 unsigned DLenFactor = Subtarget.getDLenFactor(); 2686 unsigned Cost; 2687 if (VT.isScalableVector()) { 2688 unsigned LMul; 2689 bool Fractional; 2690 std::tie(LMul, Fractional) = 2691 RISCVVType::decodeVLMUL(RISCVTargetLowering::getLMUL(VT)); 2692 if (Fractional) 2693 Cost = LMul <= DLenFactor ? (DLenFactor / LMul) : 1; 2694 else 2695 Cost = (LMul * DLenFactor); 2696 } else { 2697 Cost = divideCeil(VT.getSizeInBits(), Subtarget.getRealMinVLen() / DLenFactor); 2698 } 2699 return Cost; 2700} 2701 2702 2703/// Return the cost of a vrgather.vv instruction for the type VT. vrgather.vv 2704/// is generally quadratic in the number of vreg implied by LMUL. Note that 2705/// operand (index and possibly mask) are handled separately. 2706InstructionCost RISCVTargetLowering::getVRGatherVVCost(MVT VT) const { 2707 return getLMULCost(VT) * getLMULCost(VT); 2708} 2709 2710/// Return the cost of a vrgather.vi (or vx) instruction for the type VT. 2711/// vrgather.vi/vx may be linear in the number of vregs implied by LMUL, 2712/// or may track the vrgather.vv cost. It is implementation-dependent. 2713InstructionCost RISCVTargetLowering::getVRGatherVICost(MVT VT) const { 2714 return getLMULCost(VT); 2715} 2716 2717/// Return the cost of a vslidedown.vx or vslideup.vx instruction 2718/// for the type VT. (This does not cover the vslide1up or vslide1down 2719/// variants.) Slides may be linear in the number of vregs implied by LMUL, 2720/// or may track the vrgather.vv cost. It is implementation-dependent. 2721InstructionCost RISCVTargetLowering::getVSlideVXCost(MVT VT) const { 2722 return getLMULCost(VT); 2723} 2724 2725/// Return the cost of a vslidedown.vi or vslideup.vi instruction 2726/// for the type VT. (This does not cover the vslide1up or vslide1down 2727/// variants.) Slides may be linear in the number of vregs implied by LMUL, 2728/// or may track the vrgather.vv cost. It is implementation-dependent. 2729InstructionCost RISCVTargetLowering::getVSlideVICost(MVT VT) const { 2730 return getLMULCost(VT); 2731} 2732 2733static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG, 2734 const RISCVSubtarget &Subtarget) { 2735 // RISC-V FP-to-int conversions saturate to the destination register size, but 2736 // don't produce 0 for nan. We can use a conversion instruction and fix the 2737 // nan case with a compare and a select. 2738 SDValue Src = Op.getOperand(0); 2739 2740 MVT DstVT = Op.getSimpleValueType(); 2741 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); 2742 2743 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT; 2744 2745 if (!DstVT.isVector()) { 2746 // For bf16 or for f16 in absense of Zfh, promote to f32, then saturate 2747 // the result. 2748 if ((Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) || 2749 Src.getValueType() == MVT::bf16) { 2750 Src = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, Src); 2751 } 2752 2753 unsigned Opc; 2754 if (SatVT == DstVT) 2755 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU; 2756 else if (DstVT == MVT::i64 && SatVT == MVT::i32) 2757 Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64; 2758 else 2759 return SDValue(); 2760 // FIXME: Support other SatVTs by clamping before or after the conversion. 2761 2762 SDLoc DL(Op); 2763 SDValue FpToInt = DAG.getNode( 2764 Opc, DL, DstVT, Src, 2765 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, Subtarget.getXLenVT())); 2766 2767 if (Opc == RISCVISD::FCVT_WU_RV64) 2768 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32); 2769 2770 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT); 2771 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, 2772 ISD::CondCode::SETUO); 2773 } 2774 2775 // Vectors. 2776 2777 MVT DstEltVT = DstVT.getVectorElementType(); 2778 MVT SrcVT = Src.getSimpleValueType(); 2779 MVT SrcEltVT = SrcVT.getVectorElementType(); 2780 unsigned SrcEltSize = SrcEltVT.getSizeInBits(); 2781 unsigned DstEltSize = DstEltVT.getSizeInBits(); 2782 2783 // Only handle saturating to the destination type. 2784 if (SatVT != DstEltVT) 2785 return SDValue(); 2786 2787 // FIXME: Don't support narrowing by more than 1 steps for now. 2788 if (SrcEltSize > (2 * DstEltSize)) 2789 return SDValue(); 2790 2791 MVT DstContainerVT = DstVT; 2792 MVT SrcContainerVT = SrcVT; 2793 if (DstVT.isFixedLengthVector()) { 2794 DstContainerVT = getContainerForFixedLengthVector(DAG, DstVT, Subtarget); 2795 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget); 2796 assert(DstContainerVT.getVectorElementCount() == 2797 SrcContainerVT.getVectorElementCount() && 2798 "Expected same element count"); 2799 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget); 2800 } 2801 2802 SDLoc DL(Op); 2803 2804 auto [Mask, VL] = getDefaultVLOps(DstVT, DstContainerVT, DL, DAG, Subtarget); 2805 2806 SDValue IsNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(), 2807 {Src, Src, DAG.getCondCode(ISD::SETNE), 2808 DAG.getUNDEF(Mask.getValueType()), Mask, VL}); 2809 2810 // Need to widen by more than 1 step, promote the FP type, then do a widening 2811 // convert. 2812 if (DstEltSize > (2 * SrcEltSize)) { 2813 assert(SrcContainerVT.getVectorElementType() == MVT::f16 && "Unexpected VT!"); 2814 MVT InterVT = SrcContainerVT.changeVectorElementType(MVT::f32); 2815 Src = DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterVT, Src, Mask, VL); 2816 } 2817 2818 unsigned RVVOpc = 2819 IsSigned ? RISCVISD::VFCVT_RTZ_X_F_VL : RISCVISD::VFCVT_RTZ_XU_F_VL; 2820 SDValue Res = DAG.getNode(RVVOpc, DL, DstContainerVT, Src, Mask, VL); 2821 2822 SDValue SplatZero = DAG.getNode( 2823 RISCVISD::VMV_V_X_VL, DL, DstContainerVT, DAG.getUNDEF(DstContainerVT), 2824 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL); 2825 Res = DAG.getNode(RISCVISD::VMERGE_VL, DL, DstContainerVT, IsNan, SplatZero, 2826 Res, DAG.getUNDEF(DstContainerVT), VL); 2827 2828 if (DstVT.isFixedLengthVector()) 2829 Res = convertFromScalableVector(DstVT, Res, DAG, Subtarget); 2830 2831 return Res; 2832} 2833 2834static RISCVFPRndMode::RoundingMode matchRoundingOp(unsigned Opc) { 2835 switch (Opc) { 2836 case ISD::FROUNDEVEN: 2837 case ISD::STRICT_FROUNDEVEN: 2838 case ISD::VP_FROUNDEVEN: 2839 return RISCVFPRndMode::RNE; 2840 case ISD::FTRUNC: 2841 case ISD::STRICT_FTRUNC: 2842 case ISD::VP_FROUNDTOZERO: 2843 return RISCVFPRndMode::RTZ; 2844 case ISD::FFLOOR: 2845 case ISD::STRICT_FFLOOR: 2846 case ISD::VP_FFLOOR: 2847 return RISCVFPRndMode::RDN; 2848 case ISD::FCEIL: 2849 case ISD::STRICT_FCEIL: 2850 case ISD::VP_FCEIL: 2851 return RISCVFPRndMode::RUP; 2852 case ISD::FROUND: 2853 case ISD::STRICT_FROUND: 2854 case ISD::VP_FROUND: 2855 return RISCVFPRndMode::RMM; 2856 case ISD::FRINT: 2857 return RISCVFPRndMode::DYN; 2858 } 2859 2860 return RISCVFPRndMode::Invalid; 2861} 2862 2863// Expand vector FTRUNC, FCEIL, FFLOOR, FROUND, VP_FCEIL, VP_FFLOOR, VP_FROUND 2864// VP_FROUNDEVEN, VP_FROUNDTOZERO, VP_FRINT and VP_FNEARBYINT by converting to 2865// the integer domain and back. Taking care to avoid converting values that are 2866// nan or already correct. 2867static SDValue 2868lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, 2869 const RISCVSubtarget &Subtarget) { 2870 MVT VT = Op.getSimpleValueType(); 2871 assert(VT.isVector() && "Unexpected type"); 2872 2873 SDLoc DL(Op); 2874 2875 SDValue Src = Op.getOperand(0); 2876 2877 MVT ContainerVT = VT; 2878 if (VT.isFixedLengthVector()) { 2879 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); 2880 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget); 2881 } 2882 2883 SDValue Mask, VL; 2884 if (Op->isVPOpcode()) { 2885 Mask = Op.getOperand(1); 2886 if (VT.isFixedLengthVector()) 2887 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG, 2888 Subtarget); 2889 VL = Op.getOperand(2); 2890 } else { 2891 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 2892 } 2893 2894 // Freeze the source since we are increasing the number of uses. 2895 Src = DAG.getFreeze(Src); 2896 2897 // We do the conversion on the absolute value and fix the sign at the end. 2898 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL); 2899 2900 // Determine the largest integer that can be represented exactly. This and 2901 // values larger than it don't have any fractional bits so don't need to 2902 // be converted. 2903 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(ContainerVT); 2904 unsigned Precision = APFloat::semanticsPrecision(FltSem); 2905 APFloat MaxVal = APFloat(FltSem); 2906 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1), 2907 /*IsSigned*/ false, APFloat::rmNearestTiesToEven); 2908 SDValue MaxValNode = 2909 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType()); 2910 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT, 2911 DAG.getUNDEF(ContainerVT), MaxValNode, VL); 2912 2913 // If abs(Src) was larger than MaxVal or nan, keep it. 2914 MVT SetccVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 2915 Mask = 2916 DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT, 2917 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT), 2918 Mask, Mask, VL}); 2919 2920 // Truncate to integer and convert back to FP. 2921 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger(); 2922 MVT XLenVT = Subtarget.getXLenVT(); 2923 SDValue Truncated; 2924 2925 switch (Op.getOpcode()) { 2926 default: 2927 llvm_unreachable("Unexpected opcode"); 2928 case ISD::FCEIL: 2929 case ISD::VP_FCEIL: 2930 case ISD::FFLOOR: 2931 case ISD::VP_FFLOOR: 2932 case ISD::FROUND: 2933 case ISD::FROUNDEVEN: 2934 case ISD::VP_FROUND: 2935 case ISD::VP_FROUNDEVEN: 2936 case ISD::VP_FROUNDTOZERO: { 2937 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Op.getOpcode()); 2938 assert(FRM != RISCVFPRndMode::Invalid); 2939 Truncated = DAG.getNode(RISCVISD::VFCVT_RM_X_F_VL, DL, IntVT, Src, Mask, 2940 DAG.getTargetConstant(FRM, DL, XLenVT), VL); 2941 break; 2942 } 2943 case ISD::FTRUNC: 2944 Truncated = DAG.getNode(RISCVISD::VFCVT_RTZ_X_F_VL, DL, IntVT, Src, 2945 Mask, VL); 2946 break; 2947 case ISD::FRINT: 2948 case ISD::VP_FRINT: 2949 Truncated = DAG.getNode(RISCVISD::VFCVT_X_F_VL, DL, IntVT, Src, Mask, VL); 2950 break; 2951 case ISD::FNEARBYINT: 2952 case ISD::VP_FNEARBYINT: 2953 Truncated = DAG.getNode(RISCVISD::VFROUND_NOEXCEPT_VL, DL, ContainerVT, Src, 2954 Mask, VL); 2955 break; 2956 } 2957 2958 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL. 2959 if (Truncated.getOpcode() != RISCVISD::VFROUND_NOEXCEPT_VL) 2960 Truncated = DAG.getNode(RISCVISD::SINT_TO_FP_VL, DL, ContainerVT, Truncated, 2961 Mask, VL); 2962 2963 // Restore the original sign so that -0.0 is preserved. 2964 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated, 2965 Src, Src, Mask, VL); 2966 2967 if (!VT.isFixedLengthVector()) 2968 return Truncated; 2969 2970 return convertFromScalableVector(VT, Truncated, DAG, Subtarget); 2971} 2972 2973// Expand vector STRICT_FTRUNC, STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND 2974// STRICT_FROUNDEVEN and STRICT_FNEARBYINT by converting sNan of the source to 2975// qNan and coverting the new source to integer and back to FP. 2976static SDValue 2977lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, 2978 const RISCVSubtarget &Subtarget) { 2979 SDLoc DL(Op); 2980 MVT VT = Op.getSimpleValueType(); 2981 SDValue Chain = Op.getOperand(0); 2982 SDValue Src = Op.getOperand(1); 2983 2984 MVT ContainerVT = VT; 2985 if (VT.isFixedLengthVector()) { 2986 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); 2987 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget); 2988 } 2989 2990 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 2991 2992 // Freeze the source since we are increasing the number of uses. 2993 Src = DAG.getFreeze(Src); 2994 2995 // Covert sNan to qNan by executing x + x for all unordered elemenet x in Src. 2996 MVT MaskVT = Mask.getSimpleValueType(); 2997 SDValue Unorder = DAG.getNode(RISCVISD::STRICT_FSETCC_VL, DL, 2998 DAG.getVTList(MaskVT, MVT::Other), 2999 {Chain, Src, Src, DAG.getCondCode(ISD::SETUNE), 3000 DAG.getUNDEF(MaskVT), Mask, VL}); 3001 Chain = Unorder.getValue(1); 3002 Src = DAG.getNode(RISCVISD::STRICT_FADD_VL, DL, 3003 DAG.getVTList(ContainerVT, MVT::Other), 3004 {Chain, Src, Src, DAG.getUNDEF(ContainerVT), Unorder, VL}); 3005 Chain = Src.getValue(1); 3006 3007 // We do the conversion on the absolute value and fix the sign at the end. 3008 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL); 3009 3010 // Determine the largest integer that can be represented exactly. This and 3011 // values larger than it don't have any fractional bits so don't need to 3012 // be converted. 3013 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(ContainerVT); 3014 unsigned Precision = APFloat::semanticsPrecision(FltSem); 3015 APFloat MaxVal = APFloat(FltSem); 3016 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1), 3017 /*IsSigned*/ false, APFloat::rmNearestTiesToEven); 3018 SDValue MaxValNode = 3019 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType()); 3020 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT, 3021 DAG.getUNDEF(ContainerVT), MaxValNode, VL); 3022 3023 // If abs(Src) was larger than MaxVal or nan, keep it. 3024 Mask = DAG.getNode( 3025 RISCVISD::SETCC_VL, DL, MaskVT, 3026 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT), Mask, Mask, VL}); 3027 3028 // Truncate to integer and convert back to FP. 3029 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger(); 3030 MVT XLenVT = Subtarget.getXLenVT(); 3031 SDValue Truncated; 3032 3033 switch (Op.getOpcode()) { 3034 default: 3035 llvm_unreachable("Unexpected opcode"); 3036 case ISD::STRICT_FCEIL: 3037 case ISD::STRICT_FFLOOR: 3038 case ISD::STRICT_FROUND: 3039 case ISD::STRICT_FROUNDEVEN: { 3040 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Op.getOpcode()); 3041 assert(FRM != RISCVFPRndMode::Invalid); 3042 Truncated = DAG.getNode( 3043 RISCVISD::STRICT_VFCVT_RM_X_F_VL, DL, DAG.getVTList(IntVT, MVT::Other), 3044 {Chain, Src, Mask, DAG.getTargetConstant(FRM, DL, XLenVT), VL}); 3045 break; 3046 } 3047 case ISD::STRICT_FTRUNC: 3048 Truncated = 3049 DAG.getNode(RISCVISD::STRICT_VFCVT_RTZ_X_F_VL, DL, 3050 DAG.getVTList(IntVT, MVT::Other), Chain, Src, Mask, VL); 3051 break; 3052 case ISD::STRICT_FNEARBYINT: 3053 Truncated = DAG.getNode(RISCVISD::STRICT_VFROUND_NOEXCEPT_VL, DL, 3054 DAG.getVTList(ContainerVT, MVT::Other), Chain, Src, 3055 Mask, VL); 3056 break; 3057 } 3058 Chain = Truncated.getValue(1); 3059 3060 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL. 3061 if (Op.getOpcode() != ISD::STRICT_FNEARBYINT) { 3062 Truncated = DAG.getNode(RISCVISD::STRICT_SINT_TO_FP_VL, DL, 3063 DAG.getVTList(ContainerVT, MVT::Other), Chain, 3064 Truncated, Mask, VL); 3065 Chain = Truncated.getValue(1); 3066 } 3067 3068 // Restore the original sign so that -0.0 is preserved. 3069 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated, 3070 Src, Src, Mask, VL); 3071 3072 if (VT.isFixedLengthVector()) 3073 Truncated = convertFromScalableVector(VT, Truncated, DAG, Subtarget); 3074 return DAG.getMergeValues({Truncated, Chain}, DL); 3075} 3076 3077static SDValue 3078lowerFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, 3079 const RISCVSubtarget &Subtarget) { 3080 MVT VT = Op.getSimpleValueType(); 3081 if (VT.isVector()) 3082 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget); 3083 3084 if (DAG.shouldOptForSize()) 3085 return SDValue(); 3086 3087 SDLoc DL(Op); 3088 SDValue Src = Op.getOperand(0); 3089 3090 // Create an integer the size of the mantissa with the MSB set. This and all 3091 // values larger than it don't have any fractional bits so don't need to be 3092 // converted. 3093 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT); 3094 unsigned Precision = APFloat::semanticsPrecision(FltSem); 3095 APFloat MaxVal = APFloat(FltSem); 3096 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1), 3097 /*IsSigned*/ false, APFloat::rmNearestTiesToEven); 3098 SDValue MaxValNode = DAG.getConstantFP(MaxVal, DL, VT); 3099 3100 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Op.getOpcode()); 3101 return DAG.getNode(RISCVISD::FROUND, DL, VT, Src, MaxValNode, 3102 DAG.getTargetConstant(FRM, DL, Subtarget.getXLenVT())); 3103} 3104 3105// Expand vector LRINT and LLRINT by converting to the integer domain. 3106static SDValue lowerVectorXRINT(SDValue Op, SelectionDAG &DAG, 3107 const RISCVSubtarget &Subtarget) { 3108 MVT VT = Op.getSimpleValueType(); 3109 assert(VT.isVector() && "Unexpected type"); 3110 3111 SDLoc DL(Op); 3112 SDValue Src = Op.getOperand(0); 3113 MVT ContainerVT = VT; 3114 3115 if (VT.isFixedLengthVector()) { 3116 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); 3117 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget); 3118 } 3119 3120 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 3121 SDValue Truncated = 3122 DAG.getNode(RISCVISD::VFCVT_X_F_VL, DL, ContainerVT, Src, Mask, VL); 3123 3124 if (!VT.isFixedLengthVector()) 3125 return Truncated; 3126 3127 return convertFromScalableVector(VT, Truncated, DAG, Subtarget); 3128} 3129 3130static SDValue 3131getVSlidedown(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, 3132 const SDLoc &DL, EVT VT, SDValue Merge, SDValue Op, 3133 SDValue Offset, SDValue Mask, SDValue VL, 3134 unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED) { 3135 if (Merge.isUndef()) 3136 Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC; 3137 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT()); 3138 SDValue Ops[] = {Merge, Op, Offset, Mask, VL, PolicyOp}; 3139 return DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, VT, Ops); 3140} 3141 3142static SDValue 3143getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL, 3144 EVT VT, SDValue Merge, SDValue Op, SDValue Offset, SDValue Mask, 3145 SDValue VL, 3146 unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED) { 3147 if (Merge.isUndef()) 3148 Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC; 3149 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT()); 3150 SDValue Ops[] = {Merge, Op, Offset, Mask, VL, PolicyOp}; 3151 return DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, VT, Ops); 3152} 3153 3154static MVT getLMUL1VT(MVT VT) { 3155 assert(VT.getVectorElementType().getSizeInBits() <= 64 && 3156 "Unexpected vector MVT"); 3157 return MVT::getScalableVectorVT( 3158 VT.getVectorElementType(), 3159 RISCV::RVVBitsPerBlock / VT.getVectorElementType().getSizeInBits()); 3160} 3161 3162struct VIDSequence { 3163 int64_t StepNumerator; 3164 unsigned StepDenominator; 3165 int64_t Addend; 3166}; 3167 3168static std::optional<uint64_t> getExactInteger(const APFloat &APF, 3169 uint32_t BitWidth) { 3170 APSInt ValInt(BitWidth, !APF.isNegative()); 3171 // We use an arbitrary rounding mode here. If a floating-point is an exact 3172 // integer (e.g., 1.0), the rounding mode does not affect the output value. If 3173 // the rounding mode changes the output value, then it is not an exact 3174 // integer. 3175 RoundingMode ArbitraryRM = RoundingMode::TowardZero; 3176 bool IsExact; 3177 // If it is out of signed integer range, it will return an invalid operation. 3178 // If it is not an exact integer, IsExact is false. 3179 if ((APF.convertToInteger(ValInt, ArbitraryRM, &IsExact) == 3180 APFloatBase::opInvalidOp) || 3181 !IsExact) 3182 return std::nullopt; 3183 return ValInt.extractBitsAsZExtValue(BitWidth, 0); 3184} 3185 3186// Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S] 3187// to the (non-zero) step S and start value X. This can be then lowered as the 3188// RVV sequence (VID * S) + X, for example. 3189// The step S is represented as an integer numerator divided by a positive 3190// denominator. Note that the implementation currently only identifies 3191// sequences in which either the numerator is +/- 1 or the denominator is 1. It 3192// cannot detect 2/3, for example. 3193// Note that this method will also match potentially unappealing index 3194// sequences, like <i32 0, i32 50939494>, however it is left to the caller to 3195// determine whether this is worth generating code for. 3196static std::optional<VIDSequence> isSimpleVIDSequence(SDValue Op, 3197 unsigned EltSizeInBits) { 3198 unsigned NumElts = Op.getNumOperands(); 3199 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR"); 3200 bool IsInteger = Op.getValueType().isInteger(); 3201 3202 std::optional<unsigned> SeqStepDenom; 3203 std::optional<int64_t> SeqStepNum, SeqAddend; 3204 std::optional<std::pair<uint64_t, unsigned>> PrevElt; 3205 assert(EltSizeInBits >= Op.getValueType().getScalarSizeInBits()); 3206 for (unsigned Idx = 0; Idx < NumElts; Idx++) { 3207 // Assume undef elements match the sequence; we just have to be careful 3208 // when interpolating across them. 3209 if (Op.getOperand(Idx).isUndef()) 3210 continue; 3211 3212 uint64_t Val; 3213 if (IsInteger) { 3214 // The BUILD_VECTOR must be all constants. 3215 if (!isa<ConstantSDNode>(Op.getOperand(Idx))) 3216 return std::nullopt; 3217 Val = Op.getConstantOperandVal(Idx) & 3218 maskTrailingOnes<uint64_t>(Op.getScalarValueSizeInBits()); 3219 } else { 3220 // The BUILD_VECTOR must be all constants. 3221 if (!isa<ConstantFPSDNode>(Op.getOperand(Idx))) 3222 return std::nullopt; 3223 if (auto ExactInteger = getExactInteger( 3224 cast<ConstantFPSDNode>(Op.getOperand(Idx))->getValueAPF(), 3225 Op.getScalarValueSizeInBits())) 3226 Val = *ExactInteger; 3227 else 3228 return std::nullopt; 3229 } 3230 3231 if (PrevElt) { 3232 // Calculate the step since the last non-undef element, and ensure 3233 // it's consistent across the entire sequence. 3234 unsigned IdxDiff = Idx - PrevElt->second; 3235 int64_t ValDiff = SignExtend64(Val - PrevElt->first, EltSizeInBits); 3236 3237 // A zero-value value difference means that we're somewhere in the middle 3238 // of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a 3239 // step change before evaluating the sequence. 3240 if (ValDiff == 0) 3241 continue; 3242 3243 int64_t Remainder = ValDiff % IdxDiff; 3244 // Normalize the step if it's greater than 1. 3245 if (Remainder != ValDiff) { 3246 // The difference must cleanly divide the element span. 3247 if (Remainder != 0) 3248 return std::nullopt; 3249 ValDiff /= IdxDiff; 3250 IdxDiff = 1; 3251 } 3252 3253 if (!SeqStepNum) 3254 SeqStepNum = ValDiff; 3255 else if (ValDiff != SeqStepNum) 3256 return std::nullopt; 3257 3258 if (!SeqStepDenom) 3259 SeqStepDenom = IdxDiff; 3260 else if (IdxDiff != *SeqStepDenom) 3261 return std::nullopt; 3262 } 3263 3264 // Record this non-undef element for later. 3265 if (!PrevElt || PrevElt->first != Val) 3266 PrevElt = std::make_pair(Val, Idx); 3267 } 3268 3269 // We need to have logged a step for this to count as a legal index sequence. 3270 if (!SeqStepNum || !SeqStepDenom) 3271 return std::nullopt; 3272 3273 // Loop back through the sequence and validate elements we might have skipped 3274 // while waiting for a valid step. While doing this, log any sequence addend. 3275 for (unsigned Idx = 0; Idx < NumElts; Idx++) { 3276 if (Op.getOperand(Idx).isUndef()) 3277 continue; 3278 uint64_t Val; 3279 if (IsInteger) { 3280 Val = Op.getConstantOperandVal(Idx) & 3281 maskTrailingOnes<uint64_t>(Op.getScalarValueSizeInBits()); 3282 } else { 3283 Val = *getExactInteger( 3284 cast<ConstantFPSDNode>(Op.getOperand(Idx))->getValueAPF(), 3285 Op.getScalarValueSizeInBits()); 3286 } 3287 uint64_t ExpectedVal = 3288 (int64_t)(Idx * (uint64_t)*SeqStepNum) / *SeqStepDenom; 3289 int64_t Addend = SignExtend64(Val - ExpectedVal, EltSizeInBits); 3290 if (!SeqAddend) 3291 SeqAddend = Addend; 3292 else if (Addend != SeqAddend) 3293 return std::nullopt; 3294 } 3295 3296 assert(SeqAddend && "Must have an addend if we have a step"); 3297 3298 return VIDSequence{*SeqStepNum, *SeqStepDenom, *SeqAddend}; 3299} 3300 3301// Match a splatted value (SPLAT_VECTOR/BUILD_VECTOR) of an EXTRACT_VECTOR_ELT 3302// and lower it as a VRGATHER_VX_VL from the source vector. 3303static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL, 3304 SelectionDAG &DAG, 3305 const RISCVSubtarget &Subtarget) { 3306 if (SplatVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT) 3307 return SDValue(); 3308 SDValue Vec = SplatVal.getOperand(0); 3309 // Only perform this optimization on vectors of the same size for simplicity. 3310 // Don't perform this optimization for i1 vectors. 3311 // FIXME: Support i1 vectors, maybe by promoting to i8? 3312 if (Vec.getValueType() != VT || VT.getVectorElementType() == MVT::i1) 3313 return SDValue(); 3314 SDValue Idx = SplatVal.getOperand(1); 3315 // The index must be a legal type. 3316 if (Idx.getValueType() != Subtarget.getXLenVT()) 3317 return SDValue(); 3318 3319 MVT ContainerVT = VT; 3320 if (VT.isFixedLengthVector()) { 3321 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); 3322 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 3323 } 3324 3325 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 3326 3327 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, Vec, 3328 Idx, DAG.getUNDEF(ContainerVT), Mask, VL); 3329 3330 if (!VT.isFixedLengthVector()) 3331 return Gather; 3332 3333 return convertFromScalableVector(VT, Gather, DAG, Subtarget); 3334} 3335 3336 3337/// Try and optimize BUILD_VECTORs with "dominant values" - these are values 3338/// which constitute a large proportion of the elements. In such cases we can 3339/// splat a vector with the dominant element and make up the shortfall with 3340/// INSERT_VECTOR_ELTs. Returns SDValue if not profitable. 3341/// Note that this includes vectors of 2 elements by association. The 3342/// upper-most element is the "dominant" one, allowing us to use a splat to 3343/// "insert" the upper element, and an insert of the lower element at position 3344/// 0, which improves codegen. 3345static SDValue lowerBuildVectorViaDominantValues(SDValue Op, SelectionDAG &DAG, 3346 const RISCVSubtarget &Subtarget) { 3347 MVT VT = Op.getSimpleValueType(); 3348 assert(VT.isFixedLengthVector() && "Unexpected vector!"); 3349 3350 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); 3351 3352 SDLoc DL(Op); 3353 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 3354 3355 MVT XLenVT = Subtarget.getXLenVT(); 3356 unsigned NumElts = Op.getNumOperands(); 3357 3358 SDValue DominantValue; 3359 unsigned MostCommonCount = 0; 3360 DenseMap<SDValue, unsigned> ValueCounts; 3361 unsigned NumUndefElts = 3362 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); }); 3363 3364 // Track the number of scalar loads we know we'd be inserting, estimated as 3365 // any non-zero floating-point constant. Other kinds of element are either 3366 // already in registers or are materialized on demand. The threshold at which 3367 // a vector load is more desirable than several scalar materializion and 3368 // vector-insertion instructions is not known. 3369 unsigned NumScalarLoads = 0; 3370 3371 for (SDValue V : Op->op_values()) { 3372 if (V.isUndef()) 3373 continue; 3374 3375 ValueCounts.insert(std::make_pair(V, 0)); 3376 unsigned &Count = ValueCounts[V]; 3377 if (0 == Count) 3378 if (auto *CFP = dyn_cast<ConstantFPSDNode>(V)) 3379 NumScalarLoads += !CFP->isExactlyValue(+0.0); 3380 3381 // Is this value dominant? In case of a tie, prefer the highest element as 3382 // it's cheaper to insert near the beginning of a vector than it is at the 3383 // end. 3384 if (++Count >= MostCommonCount) { 3385 DominantValue = V; 3386 MostCommonCount = Count; 3387 } 3388 } 3389 3390 assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR"); 3391 unsigned NumDefElts = NumElts - NumUndefElts; 3392 unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2; 3393 3394 // Don't perform this optimization when optimizing for size, since 3395 // materializing elements and inserting them tends to cause code bloat. 3396 if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts && 3397 (NumElts != 2 || ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) && 3398 ((MostCommonCount > DominantValueCountThreshold) || 3399 (ValueCounts.size() <= Log2_32(NumDefElts)))) { 3400 // Start by splatting the most common element. 3401 SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue); 3402 3403 DenseSet<SDValue> Processed{DominantValue}; 3404 3405 // We can handle an insert into the last element (of a splat) via 3406 // v(f)slide1down. This is slightly better than the vslideup insert 3407 // lowering as it avoids the need for a vector group temporary. It 3408 // is also better than using vmerge.vx as it avoids the need to 3409 // materialize the mask in a vector register. 3410 if (SDValue LastOp = Op->getOperand(Op->getNumOperands() - 1); 3411 !LastOp.isUndef() && ValueCounts[LastOp] == 1 && 3412 LastOp != DominantValue) { 3413 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 3414 auto OpCode = 3415 VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL; 3416 if (!VT.isFloatingPoint()) 3417 LastOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, LastOp); 3418 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec, 3419 LastOp, Mask, VL); 3420 Vec = convertFromScalableVector(VT, Vec, DAG, Subtarget); 3421 Processed.insert(LastOp); 3422 } 3423 3424 MVT SelMaskTy = VT.changeVectorElementType(MVT::i1); 3425 for (const auto &OpIdx : enumerate(Op->ops())) { 3426 const SDValue &V = OpIdx.value(); 3427 if (V.isUndef() || !Processed.insert(V).second) 3428 continue; 3429 if (ValueCounts[V] == 1) { 3430 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V, 3431 DAG.getConstant(OpIdx.index(), DL, XLenVT)); 3432 } else { 3433 // Blend in all instances of this value using a VSELECT, using a 3434 // mask where each bit signals whether that element is the one 3435 // we're after. 3436 SmallVector<SDValue> Ops; 3437 transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) { 3438 return DAG.getConstant(V == V1, DL, XLenVT); 3439 }); 3440 Vec = DAG.getNode(ISD::VSELECT, DL, VT, 3441 DAG.getBuildVector(SelMaskTy, DL, Ops), 3442 DAG.getSplatBuildVector(VT, DL, V), Vec); 3443 } 3444 } 3445 3446 return Vec; 3447 } 3448 3449 return SDValue(); 3450} 3451 3452static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG, 3453 const RISCVSubtarget &Subtarget) { 3454 MVT VT = Op.getSimpleValueType(); 3455 assert(VT.isFixedLengthVector() && "Unexpected vector!"); 3456 3457 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); 3458 3459 SDLoc DL(Op); 3460 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 3461 3462 MVT XLenVT = Subtarget.getXLenVT(); 3463 unsigned NumElts = Op.getNumOperands(); 3464 3465 if (VT.getVectorElementType() == MVT::i1) { 3466 if (ISD::isBuildVectorAllZeros(Op.getNode())) { 3467 SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL); 3468 return convertFromScalableVector(VT, VMClr, DAG, Subtarget); 3469 } 3470 3471 if (ISD::isBuildVectorAllOnes(Op.getNode())) { 3472 SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL); 3473 return convertFromScalableVector(VT, VMSet, DAG, Subtarget); 3474 } 3475 3476 // Lower constant mask BUILD_VECTORs via an integer vector type, in 3477 // scalar integer chunks whose bit-width depends on the number of mask 3478 // bits and XLEN. 3479 // First, determine the most appropriate scalar integer type to use. This 3480 // is at most XLenVT, but may be shrunk to a smaller vector element type 3481 // according to the size of the final vector - use i8 chunks rather than 3482 // XLenVT if we're producing a v8i1. This results in more consistent 3483 // codegen across RV32 and RV64. 3484 unsigned NumViaIntegerBits = std::clamp(NumElts, 8u, Subtarget.getXLen()); 3485 NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.getELen()); 3486 // If we have to use more than one INSERT_VECTOR_ELT then this 3487 // optimization is likely to increase code size; avoid peforming it in 3488 // such a case. We can use a load from a constant pool in this case. 3489 if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits) 3490 return SDValue(); 3491 // Now we can create our integer vector type. Note that it may be larger 3492 // than the resulting mask type: v4i1 would use v1i8 as its integer type. 3493 unsigned IntegerViaVecElts = divideCeil(NumElts, NumViaIntegerBits); 3494 MVT IntegerViaVecVT = 3495 MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits), 3496 IntegerViaVecElts); 3497 3498 uint64_t Bits = 0; 3499 unsigned BitPos = 0, IntegerEltIdx = 0; 3500 SmallVector<SDValue, 8> Elts(IntegerViaVecElts); 3501 3502 for (unsigned I = 0; I < NumElts;) { 3503 SDValue V = Op.getOperand(I); 3504 bool BitValue = !V.isUndef() && V->getAsZExtVal(); 3505 Bits |= ((uint64_t)BitValue << BitPos); 3506 ++BitPos; 3507 ++I; 3508 3509 // Once we accumulate enough bits to fill our scalar type or process the 3510 // last element, insert into our vector and clear our accumulated data. 3511 if (I % NumViaIntegerBits == 0 || I == NumElts) { 3512 if (NumViaIntegerBits <= 32) 3513 Bits = SignExtend64<32>(Bits); 3514 SDValue Elt = DAG.getConstant(Bits, DL, XLenVT); 3515 Elts[IntegerEltIdx] = Elt; 3516 Bits = 0; 3517 BitPos = 0; 3518 IntegerEltIdx++; 3519 } 3520 } 3521 3522 SDValue Vec = DAG.getBuildVector(IntegerViaVecVT, DL, Elts); 3523 3524 if (NumElts < NumViaIntegerBits) { 3525 // If we're producing a smaller vector than our minimum legal integer 3526 // type, bitcast to the equivalent (known-legal) mask type, and extract 3527 // our final mask. 3528 assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type"); 3529 Vec = DAG.getBitcast(MVT::v8i1, Vec); 3530 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec, 3531 DAG.getConstant(0, DL, XLenVT)); 3532 } else { 3533 // Else we must have produced an integer type with the same size as the 3534 // mask type; bitcast for the final result. 3535 assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits()); 3536 Vec = DAG.getBitcast(VT, Vec); 3537 } 3538 3539 return Vec; 3540 } 3541 3542 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) { 3543 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL 3544 : RISCVISD::VMV_V_X_VL; 3545 if (!VT.isFloatingPoint()) 3546 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat); 3547 Splat = 3548 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL); 3549 return convertFromScalableVector(VT, Splat, DAG, Subtarget); 3550 } 3551 3552 // Try and match index sequences, which we can lower to the vid instruction 3553 // with optional modifications. An all-undef vector is matched by 3554 // getSplatValue, above. 3555 if (auto SimpleVID = isSimpleVIDSequence(Op, Op.getScalarValueSizeInBits())) { 3556 int64_t StepNumerator = SimpleVID->StepNumerator; 3557 unsigned StepDenominator = SimpleVID->StepDenominator; 3558 int64_t Addend = SimpleVID->Addend; 3559 3560 assert(StepNumerator != 0 && "Invalid step"); 3561 bool Negate = false; 3562 int64_t SplatStepVal = StepNumerator; 3563 unsigned StepOpcode = ISD::MUL; 3564 // Exclude INT64_MIN to avoid passing it to std::abs. We won't optimize it 3565 // anyway as the shift of 63 won't fit in uimm5. 3566 if (StepNumerator != 1 && StepNumerator != INT64_MIN && 3567 isPowerOf2_64(std::abs(StepNumerator))) { 3568 Negate = StepNumerator < 0; 3569 StepOpcode = ISD::SHL; 3570 SplatStepVal = Log2_64(std::abs(StepNumerator)); 3571 } 3572 3573 // Only emit VIDs with suitably-small steps/addends. We use imm5 is a 3574 // threshold since it's the immediate value many RVV instructions accept. 3575 // There is no vmul.vi instruction so ensure multiply constant can fit in 3576 // a single addi instruction. 3577 if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) || 3578 (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) && 3579 isPowerOf2_32(StepDenominator) && 3580 (SplatStepVal >= 0 || StepDenominator == 1) && isInt<5>(Addend)) { 3581 MVT VIDVT = 3582 VT.isFloatingPoint() ? VT.changeVectorElementTypeToInteger() : VT; 3583 MVT VIDContainerVT = 3584 getContainerForFixedLengthVector(DAG, VIDVT, Subtarget); 3585 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VIDContainerVT, Mask, VL); 3586 // Convert right out of the scalable type so we can use standard ISD 3587 // nodes for the rest of the computation. If we used scalable types with 3588 // these, we'd lose the fixed-length vector info and generate worse 3589 // vsetvli code. 3590 VID = convertFromScalableVector(VIDVT, VID, DAG, Subtarget); 3591 if ((StepOpcode == ISD::MUL && SplatStepVal != 1) || 3592 (StepOpcode == ISD::SHL && SplatStepVal != 0)) { 3593 SDValue SplatStep = DAG.getConstant(SplatStepVal, DL, VIDVT); 3594 VID = DAG.getNode(StepOpcode, DL, VIDVT, VID, SplatStep); 3595 } 3596 if (StepDenominator != 1) { 3597 SDValue SplatStep = 3598 DAG.getConstant(Log2_64(StepDenominator), DL, VIDVT); 3599 VID = DAG.getNode(ISD::SRL, DL, VIDVT, VID, SplatStep); 3600 } 3601 if (Addend != 0 || Negate) { 3602 SDValue SplatAddend = DAG.getConstant(Addend, DL, VIDVT); 3603 VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VIDVT, SplatAddend, 3604 VID); 3605 } 3606 if (VT.isFloatingPoint()) { 3607 // TODO: Use vfwcvt to reduce register pressure. 3608 VID = DAG.getNode(ISD::SINT_TO_FP, DL, VT, VID); 3609 } 3610 return VID; 3611 } 3612 } 3613 3614 // For very small build_vectors, use a single scalar insert of a constant. 3615 // TODO: Base this on constant rematerialization cost, not size. 3616 const unsigned EltBitSize = VT.getScalarSizeInBits(); 3617 if (VT.getSizeInBits() <= 32 && 3618 ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) { 3619 MVT ViaIntVT = MVT::getIntegerVT(VT.getSizeInBits()); 3620 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32) && 3621 "Unexpected sequence type"); 3622 // If we can use the original VL with the modified element type, this 3623 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this 3624 // be moved into InsertVSETVLI? 3625 unsigned ViaVecLen = 3626 (Subtarget.getRealMinVLen() >= VT.getSizeInBits() * NumElts) ? NumElts : 1; 3627 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen); 3628 3629 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize); 3630 uint64_t SplatValue = 0; 3631 // Construct the amalgamated value at this larger vector type. 3632 for (const auto &OpIdx : enumerate(Op->op_values())) { 3633 const auto &SeqV = OpIdx.value(); 3634 if (!SeqV.isUndef()) 3635 SplatValue |= 3636 ((SeqV->getAsZExtVal() & EltMask) << (OpIdx.index() * EltBitSize)); 3637 } 3638 3639 // On RV64, sign-extend from 32 to 64 bits where possible in order to 3640 // achieve better constant materializion. 3641 if (Subtarget.is64Bit() && ViaIntVT == MVT::i32) 3642 SplatValue = SignExtend64<32>(SplatValue); 3643 3644 SDValue Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ViaVecVT, 3645 DAG.getUNDEF(ViaVecVT), 3646 DAG.getConstant(SplatValue, DL, XLenVT), 3647 DAG.getConstant(0, DL, XLenVT)); 3648 if (ViaVecLen != 1) 3649 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, 3650 MVT::getVectorVT(ViaIntVT, 1), Vec, 3651 DAG.getConstant(0, DL, XLenVT)); 3652 return DAG.getBitcast(VT, Vec); 3653 } 3654 3655 3656 // Attempt to detect "hidden" splats, which only reveal themselves as splats 3657 // when re-interpreted as a vector with a larger element type. For example, 3658 // v4i16 = build_vector i16 0, i16 1, i16 0, i16 1 3659 // could be instead splat as 3660 // v2i32 = build_vector i32 0x00010000, i32 0x00010000 3661 // TODO: This optimization could also work on non-constant splats, but it 3662 // would require bit-manipulation instructions to construct the splat value. 3663 SmallVector<SDValue> Sequence; 3664 const auto *BV = cast<BuildVectorSDNode>(Op); 3665 if (VT.isInteger() && EltBitSize < Subtarget.getELen() && 3666 ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) && 3667 BV->getRepeatedSequence(Sequence) && 3668 (Sequence.size() * EltBitSize) <= Subtarget.getELen()) { 3669 unsigned SeqLen = Sequence.size(); 3670 MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen); 3671 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 || 3672 ViaIntVT == MVT::i64) && 3673 "Unexpected sequence type"); 3674 3675 // If we can use the original VL with the modified element type, this 3676 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this 3677 // be moved into InsertVSETVLI? 3678 const unsigned RequiredVL = NumElts / SeqLen; 3679 const unsigned ViaVecLen = 3680 (Subtarget.getRealMinVLen() >= ViaIntVT.getSizeInBits() * NumElts) ? 3681 NumElts : RequiredVL; 3682 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen); 3683 3684 unsigned EltIdx = 0; 3685 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize); 3686 uint64_t SplatValue = 0; 3687 // Construct the amalgamated value which can be splatted as this larger 3688 // vector type. 3689 for (const auto &SeqV : Sequence) { 3690 if (!SeqV.isUndef()) 3691 SplatValue |= 3692 ((SeqV->getAsZExtVal() & EltMask) << (EltIdx * EltBitSize)); 3693 EltIdx++; 3694 } 3695 3696 // On RV64, sign-extend from 32 to 64 bits where possible in order to 3697 // achieve better constant materializion. 3698 if (Subtarget.is64Bit() && ViaIntVT == MVT::i32) 3699 SplatValue = SignExtend64<32>(SplatValue); 3700 3701 // Since we can't introduce illegal i64 types at this stage, we can only 3702 // perform an i64 splat on RV32 if it is its own sign-extended value. That 3703 // way we can use RVV instructions to splat. 3704 assert((ViaIntVT.bitsLE(XLenVT) || 3705 (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) && 3706 "Unexpected bitcast sequence"); 3707 if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) { 3708 SDValue ViaVL = 3709 DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT); 3710 MVT ViaContainerVT = 3711 getContainerForFixedLengthVector(DAG, ViaVecVT, Subtarget); 3712 SDValue Splat = 3713 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT, 3714 DAG.getUNDEF(ViaContainerVT), 3715 DAG.getConstant(SplatValue, DL, XLenVT), ViaVL); 3716 Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget); 3717 if (ViaVecLen != RequiredVL) 3718 Splat = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, 3719 MVT::getVectorVT(ViaIntVT, RequiredVL), Splat, 3720 DAG.getConstant(0, DL, XLenVT)); 3721 return DAG.getBitcast(VT, Splat); 3722 } 3723 } 3724 3725 // If the number of signbits allows, see if we can lower as a <N x i8>. 3726 // Our main goal here is to reduce LMUL (and thus work) required to 3727 // build the constant, but we will also narrow if the resulting 3728 // narrow vector is known to materialize cheaply. 3729 // TODO: We really should be costing the smaller vector. There are 3730 // profitable cases this misses. 3731 if (EltBitSize > 8 && VT.isInteger() && 3732 (NumElts <= 4 || VT.getSizeInBits() > Subtarget.getRealMinVLen())) { 3733 unsigned SignBits = DAG.ComputeNumSignBits(Op); 3734 if (EltBitSize - SignBits < 8) { 3735 SDValue Source = DAG.getBuildVector(VT.changeVectorElementType(MVT::i8), 3736 DL, Op->ops()); 3737 Source = convertToScalableVector(ContainerVT.changeVectorElementType(MVT::i8), 3738 Source, DAG, Subtarget); 3739 SDValue Res = DAG.getNode(RISCVISD::VSEXT_VL, DL, ContainerVT, Source, Mask, VL); 3740 return convertFromScalableVector(VT, Res, DAG, Subtarget); 3741 } 3742 } 3743 3744 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget)) 3745 return Res; 3746 3747 // For constant vectors, use generic constant pool lowering. Otherwise, 3748 // we'd have to materialize constants in GPRs just to move them into the 3749 // vector. 3750 return SDValue(); 3751} 3752 3753static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, 3754 const RISCVSubtarget &Subtarget) { 3755 MVT VT = Op.getSimpleValueType(); 3756 assert(VT.isFixedLengthVector() && "Unexpected vector!"); 3757 3758 if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) || 3759 ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode())) 3760 return lowerBuildVectorOfConstants(Op, DAG, Subtarget); 3761 3762 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); 3763 3764 SDLoc DL(Op); 3765 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 3766 3767 MVT XLenVT = Subtarget.getXLenVT(); 3768 3769 if (VT.getVectorElementType() == MVT::i1) { 3770 // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask 3771 // vector type, we have a legal equivalently-sized i8 type, so we can use 3772 // that. 3773 MVT WideVecVT = VT.changeVectorElementType(MVT::i8); 3774 SDValue VecZero = DAG.getConstant(0, DL, WideVecVT); 3775 3776 SDValue WideVec; 3777 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) { 3778 // For a splat, perform a scalar truncate before creating the wider 3779 // vector. 3780 Splat = DAG.getNode(ISD::AND, DL, Splat.getValueType(), Splat, 3781 DAG.getConstant(1, DL, Splat.getValueType())); 3782 WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat); 3783 } else { 3784 SmallVector<SDValue, 8> Ops(Op->op_values()); 3785 WideVec = DAG.getBuildVector(WideVecVT, DL, Ops); 3786 SDValue VecOne = DAG.getConstant(1, DL, WideVecVT); 3787 WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne); 3788 } 3789 3790 return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE); 3791 } 3792 3793 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) { 3794 if (auto Gather = matchSplatAsGather(Splat, VT, DL, DAG, Subtarget)) 3795 return Gather; 3796 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL 3797 : RISCVISD::VMV_V_X_VL; 3798 if (!VT.isFloatingPoint()) 3799 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat); 3800 Splat = 3801 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL); 3802 return convertFromScalableVector(VT, Splat, DAG, Subtarget); 3803 } 3804 3805 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget)) 3806 return Res; 3807 3808 // If we're compiling for an exact VLEN value, we can split our work per 3809 // register in the register group. 3810 const unsigned MinVLen = Subtarget.getRealMinVLen(); 3811 const unsigned MaxVLen = Subtarget.getRealMaxVLen(); 3812 if (MinVLen == MaxVLen && VT.getSizeInBits().getKnownMinValue() > MinVLen) { 3813 MVT ElemVT = VT.getVectorElementType(); 3814 unsigned ElemsPerVReg = MinVLen / ElemVT.getFixedSizeInBits(); 3815 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); 3816 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg); 3817 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget); 3818 assert(M1VT == getLMUL1VT(M1VT)); 3819 3820 // The following semantically builds up a fixed length concat_vector 3821 // of the component build_vectors. We eagerly lower to scalable and 3822 // insert_subvector here to avoid DAG combining it back to a large 3823 // build_vector. 3824 SmallVector<SDValue> BuildVectorOps(Op->op_begin(), Op->op_end()); 3825 unsigned NumOpElts = M1VT.getVectorMinNumElements(); 3826 SDValue Vec = DAG.getUNDEF(ContainerVT); 3827 for (unsigned i = 0; i < VT.getVectorNumElements(); i += ElemsPerVReg) { 3828 auto OneVRegOfOps = ArrayRef(BuildVectorOps).slice(i, ElemsPerVReg); 3829 SDValue SubBV = 3830 DAG.getNode(ISD::BUILD_VECTOR, DL, OneRegVT, OneVRegOfOps); 3831 SubBV = convertToScalableVector(M1VT, SubBV, DAG, Subtarget); 3832 unsigned InsertIdx = (i / ElemsPerVReg) * NumOpElts; 3833 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Vec, SubBV, 3834 DAG.getVectorIdxConstant(InsertIdx, DL)); 3835 } 3836 return convertFromScalableVector(VT, Vec, DAG, Subtarget); 3837 } 3838 3839 // Cap the cost at a value linear to the number of elements in the vector. 3840 // The default lowering is to use the stack. The vector store + scalar loads 3841 // is linear in VL. However, at high lmuls vslide1down and vslidedown end up 3842 // being (at least) linear in LMUL. As a result, using the vslidedown 3843 // lowering for every element ends up being VL*LMUL.. 3844 // TODO: Should we be directly costing the stack alternative? Doing so might 3845 // give us a more accurate upper bound. 3846 InstructionCost LinearBudget = VT.getVectorNumElements() * 2; 3847 3848 // TODO: unify with TTI getSlideCost. 3849 InstructionCost PerSlideCost = 1; 3850 switch (RISCVTargetLowering::getLMUL(ContainerVT)) { 3851 default: break; 3852 case RISCVII::VLMUL::LMUL_2: 3853 PerSlideCost = 2; 3854 break; 3855 case RISCVII::VLMUL::LMUL_4: 3856 PerSlideCost = 4; 3857 break; 3858 case RISCVII::VLMUL::LMUL_8: 3859 PerSlideCost = 8; 3860 break; 3861 } 3862 3863 // TODO: Should we be using the build instseq then cost + evaluate scheme 3864 // we use for integer constants here? 3865 unsigned UndefCount = 0; 3866 for (const SDValue &V : Op->ops()) { 3867 if (V.isUndef()) { 3868 UndefCount++; 3869 continue; 3870 } 3871 if (UndefCount) { 3872 LinearBudget -= PerSlideCost; 3873 UndefCount = 0; 3874 } 3875 LinearBudget -= PerSlideCost; 3876 } 3877 if (UndefCount) { 3878 LinearBudget -= PerSlideCost; 3879 } 3880 3881 if (LinearBudget < 0) 3882 return SDValue(); 3883 3884 assert((!VT.isFloatingPoint() || 3885 VT.getVectorElementType().getSizeInBits() <= Subtarget.getFLen()) && 3886 "Illegal type which will result in reserved encoding"); 3887 3888 const unsigned Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC; 3889 3890 SDValue Vec; 3891 UndefCount = 0; 3892 for (SDValue V : Op->ops()) { 3893 if (V.isUndef()) { 3894 UndefCount++; 3895 continue; 3896 } 3897 3898 // Start our sequence with a TA splat in the hopes that hardware is able to 3899 // recognize there's no dependency on the prior value of our temporary 3900 // register. 3901 if (!Vec) { 3902 Vec = DAG.getSplatVector(VT, DL, V); 3903 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 3904 UndefCount = 0; 3905 continue; 3906 } 3907 3908 if (UndefCount) { 3909 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT()); 3910 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT), 3911 Vec, Offset, Mask, VL, Policy); 3912 UndefCount = 0; 3913 } 3914 auto OpCode = 3915 VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL; 3916 if (!VT.isFloatingPoint()) 3917 V = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), V); 3918 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec, 3919 V, Mask, VL); 3920 } 3921 if (UndefCount) { 3922 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT()); 3923 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT), 3924 Vec, Offset, Mask, VL, Policy); 3925 } 3926 return convertFromScalableVector(VT, Vec, DAG, Subtarget); 3927} 3928 3929static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, 3930 SDValue Lo, SDValue Hi, SDValue VL, 3931 SelectionDAG &DAG) { 3932 if (!Passthru) 3933 Passthru = DAG.getUNDEF(VT); 3934 if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) { 3935 int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue(); 3936 int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue(); 3937 // If Hi constant is all the same sign bit as Lo, lower this as a custom 3938 // node in order to try and match RVV vector/scalar instructions. 3939 if ((LoC >> 31) == HiC) 3940 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL); 3941 3942 // If vl is equal to VLMAX or fits in 4 bits and Hi constant is equal to Lo, 3943 // we could use vmv.v.x whose EEW = 32 to lower it. This allows us to use 3944 // vlmax vsetvli or vsetivli to change the VL. 3945 // FIXME: Support larger constants? 3946 // FIXME: Support non-constant VLs by saturating? 3947 if (LoC == HiC) { 3948 SDValue NewVL; 3949 if (isAllOnesConstant(VL) || 3950 (isa<RegisterSDNode>(VL) && 3951 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0)) 3952 NewVL = DAG.getRegister(RISCV::X0, MVT::i32); 3953 else if (isa<ConstantSDNode>(VL) && isUInt<4>(VL->getAsZExtVal())) 3954 NewVL = DAG.getNode(ISD::ADD, DL, VL.getValueType(), VL, VL); 3955 3956 if (NewVL) { 3957 MVT InterVT = 3958 MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2); 3959 auto InterVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterVT, 3960 DAG.getUNDEF(InterVT), Lo, 3961 DAG.getRegister(RISCV::X0, MVT::i32)); 3962 return DAG.getNode(ISD::BITCAST, DL, VT, InterVec); 3963 } 3964 } 3965 } 3966 3967 // Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended. 3968 if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(0) == Lo && 3969 isa<ConstantSDNode>(Hi.getOperand(1)) && 3970 Hi.getConstantOperandVal(1) == 31) 3971 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL); 3972 3973 // If the hi bits of the splat are undefined, then it's fine to just splat Lo 3974 // even if it might be sign extended. 3975 if (Hi.isUndef()) 3976 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL); 3977 3978 // Fall back to a stack store and stride x0 vector load. 3979 return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Passthru, Lo, 3980 Hi, VL); 3981} 3982 3983// Called by type legalization to handle splat of i64 on RV32. 3984// FIXME: We can optimize this when the type has sign or zero bits in one 3985// of the halves. 3986static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, 3987 SDValue Scalar, SDValue VL, 3988 SelectionDAG &DAG) { 3989 assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!"); 3990 SDValue Lo, Hi; 3991 std::tie(Lo, Hi) = DAG.SplitScalar(Scalar, DL, MVT::i32, MVT::i32); 3992 return splatPartsI64WithVL(DL, VT, Passthru, Lo, Hi, VL, DAG); 3993} 3994 3995// This function lowers a splat of a scalar operand Splat with the vector 3996// length VL. It ensures the final sequence is type legal, which is useful when 3997// lowering a splat after type legalization. 3998static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL, 3999 MVT VT, const SDLoc &DL, SelectionDAG &DAG, 4000 const RISCVSubtarget &Subtarget) { 4001 bool HasPassthru = Passthru && !Passthru.isUndef(); 4002 if (!HasPassthru && !Passthru) 4003 Passthru = DAG.getUNDEF(VT); 4004 if (VT.isFloatingPoint()) 4005 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Passthru, Scalar, VL); 4006 4007 MVT XLenVT = Subtarget.getXLenVT(); 4008 4009 // Simplest case is that the operand needs to be promoted to XLenVT. 4010 if (Scalar.getValueType().bitsLE(XLenVT)) { 4011 // If the operand is a constant, sign extend to increase our chances 4012 // of being able to use a .vi instruction. ANY_EXTEND would become a 4013 // a zero extend and the simm5 check in isel would fail. 4014 // FIXME: Should we ignore the upper bits in isel instead? 4015 unsigned ExtOpc = 4016 isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND; 4017 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar); 4018 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL); 4019 } 4020 4021 assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 && 4022 "Unexpected scalar for splat lowering!"); 4023 4024 if (isOneConstant(VL) && isNullConstant(Scalar)) 4025 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru, 4026 DAG.getConstant(0, DL, XLenVT), VL); 4027 4028 // Otherwise use the more complicated splatting algorithm. 4029 return splatSplitI64WithVL(DL, VT, Passthru, Scalar, VL, DAG); 4030} 4031 4032// This function lowers an insert of a scalar operand Scalar into lane 4033// 0 of the vector regardless of the value of VL. The contents of the 4034// remaining lanes of the result vector are unspecified. VL is assumed 4035// to be non-zero. 4036static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT, 4037 const SDLoc &DL, SelectionDAG &DAG, 4038 const RISCVSubtarget &Subtarget) { 4039 assert(VT.isScalableVector() && "Expect VT is scalable vector type."); 4040 4041 const MVT XLenVT = Subtarget.getXLenVT(); 4042 SDValue Passthru = DAG.getUNDEF(VT); 4043 4044 if (Scalar.getOpcode() == ISD::EXTRACT_VECTOR_ELT && 4045 isNullConstant(Scalar.getOperand(1))) { 4046 SDValue ExtractedVal = Scalar.getOperand(0); 4047 // The element types must be the same. 4048 if (ExtractedVal.getValueType().getVectorElementType() == 4049 VT.getVectorElementType()) { 4050 MVT ExtractedVT = ExtractedVal.getSimpleValueType(); 4051 MVT ExtractedContainerVT = ExtractedVT; 4052 if (ExtractedContainerVT.isFixedLengthVector()) { 4053 ExtractedContainerVT = getContainerForFixedLengthVector( 4054 DAG, ExtractedContainerVT, Subtarget); 4055 ExtractedVal = convertToScalableVector(ExtractedContainerVT, 4056 ExtractedVal, DAG, Subtarget); 4057 } 4058 if (ExtractedContainerVT.bitsLE(VT)) 4059 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Passthru, 4060 ExtractedVal, DAG.getConstant(0, DL, XLenVT)); 4061 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ExtractedVal, 4062 DAG.getConstant(0, DL, XLenVT)); 4063 } 4064 } 4065 4066 4067 if (VT.isFloatingPoint()) 4068 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, 4069 DAG.getUNDEF(VT), Scalar, VL); 4070 4071 // Avoid the tricky legalization cases by falling back to using the 4072 // splat code which already handles it gracefully. 4073 if (!Scalar.getValueType().bitsLE(XLenVT)) 4074 return lowerScalarSplat(DAG.getUNDEF(VT), Scalar, 4075 DAG.getConstant(1, DL, XLenVT), 4076 VT, DL, DAG, Subtarget); 4077 4078 // If the operand is a constant, sign extend to increase our chances 4079 // of being able to use a .vi instruction. ANY_EXTEND would become a 4080 // a zero extend and the simm5 check in isel would fail. 4081 // FIXME: Should we ignore the upper bits in isel instead? 4082 unsigned ExtOpc = 4083 isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND; 4084 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar); 4085 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, 4086 DAG.getUNDEF(VT), Scalar, VL); 4087} 4088 4089// Is this a shuffle extracts either the even or odd elements of a vector? 4090// That is, specifically, either (a) or (b) below. 4091// t34: v8i8 = extract_subvector t11, Constant:i64<0> 4092// t33: v8i8 = extract_subvector t11, Constant:i64<8> 4093// a) t35: v8i8 = vector_shuffle<0,2,4,6,8,10,12,14> t34, t33 4094// b) t35: v8i8 = vector_shuffle<1,3,5,7,9,11,13,15> t34, t33 4095// Returns {Src Vector, Even Elements} om success 4096static bool isDeinterleaveShuffle(MVT VT, MVT ContainerVT, SDValue V1, 4097 SDValue V2, ArrayRef<int> Mask, 4098 const RISCVSubtarget &Subtarget) { 4099 // Need to be able to widen the vector. 4100 if (VT.getScalarSizeInBits() >= Subtarget.getELen()) 4101 return false; 4102 4103 // Both input must be extracts. 4104 if (V1.getOpcode() != ISD::EXTRACT_SUBVECTOR || 4105 V2.getOpcode() != ISD::EXTRACT_SUBVECTOR) 4106 return false; 4107 4108 // Extracting from the same source. 4109 SDValue Src = V1.getOperand(0); 4110 if (Src != V2.getOperand(0)) 4111 return false; 4112 4113 // Src needs to have twice the number of elements. 4114 if (Src.getValueType().getVectorNumElements() != (Mask.size() * 2)) 4115 return false; 4116 4117 // The extracts must extract the two halves of the source. 4118 if (V1.getConstantOperandVal(1) != 0 || 4119 V2.getConstantOperandVal(1) != Mask.size()) 4120 return false; 4121 4122 // First index must be the first even or odd element from V1. 4123 if (Mask[0] != 0 && Mask[0] != 1) 4124 return false; 4125 4126 // The others must increase by 2 each time. 4127 // TODO: Support undef elements? 4128 for (unsigned i = 1; i != Mask.size(); ++i) 4129 if (Mask[i] != Mask[i - 1] + 2) 4130 return false; 4131 4132 return true; 4133} 4134 4135/// Is this shuffle interleaving contiguous elements from one vector into the 4136/// even elements and contiguous elements from another vector into the odd 4137/// elements. \p EvenSrc will contain the element that should be in the first 4138/// even element. \p OddSrc will contain the element that should be in the first 4139/// odd element. These can be the first element in a source or the element half 4140/// way through the source. 4141static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, int &EvenSrc, 4142 int &OddSrc, const RISCVSubtarget &Subtarget) { 4143 // We need to be able to widen elements to the next larger integer type. 4144 if (VT.getScalarSizeInBits() >= Subtarget.getELen()) 4145 return false; 4146 4147 int Size = Mask.size(); 4148 int NumElts = VT.getVectorNumElements(); 4149 assert(Size == (int)NumElts && "Unexpected mask size"); 4150 4151 SmallVector<unsigned, 2> StartIndexes; 4152 if (!ShuffleVectorInst::isInterleaveMask(Mask, 2, Size * 2, StartIndexes)) 4153 return false; 4154 4155 EvenSrc = StartIndexes[0]; 4156 OddSrc = StartIndexes[1]; 4157 4158 // One source should be low half of first vector. 4159 if (EvenSrc != 0 && OddSrc != 0) 4160 return false; 4161 4162 // Subvectors will be subtracted from either at the start of the two input 4163 // vectors, or at the start and middle of the first vector if it's an unary 4164 // interleave. 4165 // In both cases, HalfNumElts will be extracted. 4166 // We need to ensure that the extract indices are 0 or HalfNumElts otherwise 4167 // we'll create an illegal extract_subvector. 4168 // FIXME: We could support other values using a slidedown first. 4169 int HalfNumElts = NumElts / 2; 4170 return ((EvenSrc % HalfNumElts) == 0) && ((OddSrc % HalfNumElts) == 0); 4171} 4172 4173/// Match shuffles that concatenate two vectors, rotate the concatenation, 4174/// and then extract the original number of elements from the rotated result. 4175/// This is equivalent to vector.splice or X86's PALIGNR instruction. The 4176/// returned rotation amount is for a rotate right, where elements move from 4177/// higher elements to lower elements. \p LoSrc indicates the first source 4178/// vector of the rotate or -1 for undef. \p HiSrc indicates the second vector 4179/// of the rotate or -1 for undef. At least one of \p LoSrc and \p HiSrc will be 4180/// 0 or 1 if a rotation is found. 4181/// 4182/// NOTE: We talk about rotate to the right which matches how bit shift and 4183/// rotate instructions are described where LSBs are on the right, but LLVM IR 4184/// and the table below write vectors with the lowest elements on the left. 4185static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef<int> Mask) { 4186 int Size = Mask.size(); 4187 4188 // We need to detect various ways of spelling a rotation: 4189 // [11, 12, 13, 14, 15, 0, 1, 2] 4190 // [-1, 12, 13, 14, -1, -1, 1, -1] 4191 // [-1, -1, -1, -1, -1, -1, 1, 2] 4192 // [ 3, 4, 5, 6, 7, 8, 9, 10] 4193 // [-1, 4, 5, 6, -1, -1, 9, -1] 4194 // [-1, 4, 5, 6, -1, -1, -1, -1] 4195 int Rotation = 0; 4196 LoSrc = -1; 4197 HiSrc = -1; 4198 for (int i = 0; i != Size; ++i) { 4199 int M = Mask[i]; 4200 if (M < 0) 4201 continue; 4202 4203 // Determine where a rotate vector would have started. 4204 int StartIdx = i - (M % Size); 4205 // The identity rotation isn't interesting, stop. 4206 if (StartIdx == 0) 4207 return -1; 4208 4209 // If we found the tail of a vector the rotation must be the missing 4210 // front. If we found the head of a vector, it must be how much of the 4211 // head. 4212 int CandidateRotation = StartIdx < 0 ? -StartIdx : Size - StartIdx; 4213 4214 if (Rotation == 0) 4215 Rotation = CandidateRotation; 4216 else if (Rotation != CandidateRotation) 4217 // The rotations don't match, so we can't match this mask. 4218 return -1; 4219 4220 // Compute which value this mask is pointing at. 4221 int MaskSrc = M < Size ? 0 : 1; 4222 4223 // Compute which of the two target values this index should be assigned to. 4224 // This reflects whether the high elements are remaining or the low elemnts 4225 // are remaining. 4226 int &TargetSrc = StartIdx < 0 ? HiSrc : LoSrc; 4227 4228 // Either set up this value if we've not encountered it before, or check 4229 // that it remains consistent. 4230 if (TargetSrc < 0) 4231 TargetSrc = MaskSrc; 4232 else if (TargetSrc != MaskSrc) 4233 // This may be a rotation, but it pulls from the inputs in some 4234 // unsupported interleaving. 4235 return -1; 4236 } 4237 4238 // Check that we successfully analyzed the mask, and normalize the results. 4239 assert(Rotation != 0 && "Failed to locate a viable rotation!"); 4240 assert((LoSrc >= 0 || HiSrc >= 0) && 4241 "Failed to find a rotated input vector!"); 4242 4243 return Rotation; 4244} 4245 4246// Lower a deinterleave shuffle to vnsrl. 4247// [a, p, b, q, c, r, d, s] -> [a, b, c, d] (EvenElts == true) 4248// -> [p, q, r, s] (EvenElts == false) 4249// VT is the type of the vector to return, <[vscale x ]n x ty> 4250// Src is the vector to deinterleave of type <[vscale x ]n*2 x ty> 4251static SDValue getDeinterleaveViaVNSRL(const SDLoc &DL, MVT VT, SDValue Src, 4252 bool EvenElts, 4253 const RISCVSubtarget &Subtarget, 4254 SelectionDAG &DAG) { 4255 // The result is a vector of type <m x n x ty> 4256 MVT ContainerVT = VT; 4257 // Convert fixed vectors to scalable if needed 4258 if (ContainerVT.isFixedLengthVector()) { 4259 assert(Src.getSimpleValueType().isFixedLengthVector()); 4260 ContainerVT = getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget); 4261 4262 // The source is a vector of type <m x n*2 x ty> 4263 MVT SrcContainerVT = 4264 MVT::getVectorVT(ContainerVT.getVectorElementType(), 4265 ContainerVT.getVectorElementCount() * 2); 4266 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget); 4267 } 4268 4269 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 4270 4271 // Bitcast the source vector from <m x n*2 x ty> -> <m x n x ty*2> 4272 // This also converts FP to int. 4273 unsigned EltBits = ContainerVT.getScalarSizeInBits(); 4274 MVT WideSrcContainerVT = MVT::getVectorVT( 4275 MVT::getIntegerVT(EltBits * 2), ContainerVT.getVectorElementCount()); 4276 Src = DAG.getBitcast(WideSrcContainerVT, Src); 4277 4278 // The integer version of the container type. 4279 MVT IntContainerVT = ContainerVT.changeVectorElementTypeToInteger(); 4280 4281 // If we want even elements, then the shift amount is 0. Otherwise, shift by 4282 // the original element size. 4283 unsigned Shift = EvenElts ? 0 : EltBits; 4284 SDValue SplatShift = DAG.getNode( 4285 RISCVISD::VMV_V_X_VL, DL, IntContainerVT, DAG.getUNDEF(ContainerVT), 4286 DAG.getConstant(Shift, DL, Subtarget.getXLenVT()), VL); 4287 SDValue Res = 4288 DAG.getNode(RISCVISD::VNSRL_VL, DL, IntContainerVT, Src, SplatShift, 4289 DAG.getUNDEF(IntContainerVT), TrueMask, VL); 4290 // Cast back to FP if needed. 4291 Res = DAG.getBitcast(ContainerVT, Res); 4292 4293 if (VT.isFixedLengthVector()) 4294 Res = convertFromScalableVector(VT, Res, DAG, Subtarget); 4295 return Res; 4296} 4297 4298// Lower the following shuffle to vslidedown. 4299// a) 4300// t49: v8i8 = extract_subvector t13, Constant:i64<0> 4301// t109: v8i8 = extract_subvector t13, Constant:i64<8> 4302// t108: v8i8 = vector_shuffle<1,2,3,4,5,6,7,8> t49, t106 4303// b) 4304// t69: v16i16 = extract_subvector t68, Constant:i64<0> 4305// t23: v8i16 = extract_subvector t69, Constant:i64<0> 4306// t29: v4i16 = extract_subvector t23, Constant:i64<4> 4307// t26: v8i16 = extract_subvector t69, Constant:i64<8> 4308// t30: v4i16 = extract_subvector t26, Constant:i64<0> 4309// t54: v4i16 = vector_shuffle<1,2,3,4> t29, t30 4310static SDValue lowerVECTOR_SHUFFLEAsVSlidedown(const SDLoc &DL, MVT VT, 4311 SDValue V1, SDValue V2, 4312 ArrayRef<int> Mask, 4313 const RISCVSubtarget &Subtarget, 4314 SelectionDAG &DAG) { 4315 auto findNonEXTRACT_SUBVECTORParent = 4316 [](SDValue Parent) -> std::pair<SDValue, uint64_t> { 4317 uint64_t Offset = 0; 4318 while (Parent.getOpcode() == ISD::EXTRACT_SUBVECTOR && 4319 // EXTRACT_SUBVECTOR can be used to extract a fixed-width vector from 4320 // a scalable vector. But we don't want to match the case. 4321 Parent.getOperand(0).getSimpleValueType().isFixedLengthVector()) { 4322 Offset += Parent.getConstantOperandVal(1); 4323 Parent = Parent.getOperand(0); 4324 } 4325 return std::make_pair(Parent, Offset); 4326 }; 4327 4328 auto [V1Src, V1IndexOffset] = findNonEXTRACT_SUBVECTORParent(V1); 4329 auto [V2Src, V2IndexOffset] = findNonEXTRACT_SUBVECTORParent(V2); 4330 4331 // Extracting from the same source. 4332 SDValue Src = V1Src; 4333 if (Src != V2Src) 4334 return SDValue(); 4335 4336 // Rebuild mask because Src may be from multiple EXTRACT_SUBVECTORs. 4337 SmallVector<int, 16> NewMask(Mask); 4338 for (size_t i = 0; i != NewMask.size(); ++i) { 4339 if (NewMask[i] == -1) 4340 continue; 4341 4342 if (static_cast<size_t>(NewMask[i]) < NewMask.size()) { 4343 NewMask[i] = NewMask[i] + V1IndexOffset; 4344 } else { 4345 // Minus NewMask.size() is needed. Otherwise, the b case would be 4346 // <5,6,7,12> instead of <5,6,7,8>. 4347 NewMask[i] = NewMask[i] - NewMask.size() + V2IndexOffset; 4348 } 4349 } 4350 4351 // First index must be known and non-zero. It will be used as the slidedown 4352 // amount. 4353 if (NewMask[0] <= 0) 4354 return SDValue(); 4355 4356 // NewMask is also continuous. 4357 for (unsigned i = 1; i != NewMask.size(); ++i) 4358 if (NewMask[i - 1] + 1 != NewMask[i]) 4359 return SDValue(); 4360 4361 MVT XLenVT = Subtarget.getXLenVT(); 4362 MVT SrcVT = Src.getSimpleValueType(); 4363 MVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget); 4364 auto [TrueMask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget); 4365 SDValue Slidedown = 4366 getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT), 4367 convertToScalableVector(ContainerVT, Src, DAG, Subtarget), 4368 DAG.getConstant(NewMask[0], DL, XLenVT), TrueMask, VL); 4369 return DAG.getNode( 4370 ISD::EXTRACT_SUBVECTOR, DL, VT, 4371 convertFromScalableVector(SrcVT, Slidedown, DAG, Subtarget), 4372 DAG.getConstant(0, DL, XLenVT)); 4373} 4374 4375// Because vslideup leaves the destination elements at the start intact, we can 4376// use it to perform shuffles that insert subvectors: 4377// 4378// vector_shuffle v8:v8i8, v9:v8i8, <0, 1, 2, 3, 8, 9, 10, 11> 4379// -> 4380// vsetvli zero, 8, e8, mf2, ta, ma 4381// vslideup.vi v8, v9, 4 4382// 4383// vector_shuffle v8:v8i8, v9:v8i8 <0, 1, 8, 9, 10, 5, 6, 7> 4384// -> 4385// vsetvli zero, 5, e8, mf2, tu, ma 4386// vslideup.v1 v8, v9, 2 4387static SDValue lowerVECTOR_SHUFFLEAsVSlideup(const SDLoc &DL, MVT VT, 4388 SDValue V1, SDValue V2, 4389 ArrayRef<int> Mask, 4390 const RISCVSubtarget &Subtarget, 4391 SelectionDAG &DAG) { 4392 unsigned NumElts = VT.getVectorNumElements(); 4393 int NumSubElts, Index; 4394 if (!ShuffleVectorInst::isInsertSubvectorMask(Mask, NumElts, NumSubElts, 4395 Index)) 4396 return SDValue(); 4397 4398 bool OpsSwapped = Mask[Index] < (int)NumElts; 4399 SDValue InPlace = OpsSwapped ? V2 : V1; 4400 SDValue ToInsert = OpsSwapped ? V1 : V2; 4401 4402 MVT XLenVT = Subtarget.getXLenVT(); 4403 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); 4404 auto TrueMask = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).first; 4405 // We slide up by the index that the subvector is being inserted at, and set 4406 // VL to the index + the number of elements being inserted. 4407 unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED | RISCVII::MASK_AGNOSTIC; 4408 // If the we're adding a suffix to the in place vector, i.e. inserting right 4409 // up to the very end of it, then we don't actually care about the tail. 4410 if (NumSubElts + Index >= (int)NumElts) 4411 Policy |= RISCVII::TAIL_AGNOSTIC; 4412 4413 InPlace = convertToScalableVector(ContainerVT, InPlace, DAG, Subtarget); 4414 ToInsert = convertToScalableVector(ContainerVT, ToInsert, DAG, Subtarget); 4415 SDValue VL = DAG.getConstant(NumSubElts + Index, DL, XLenVT); 4416 4417 SDValue Res; 4418 // If we're inserting into the lowest elements, use a tail undisturbed 4419 // vmv.v.v. 4420 if (Index == 0) 4421 Res = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, InPlace, ToInsert, 4422 VL); 4423 else 4424 Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, InPlace, ToInsert, 4425 DAG.getConstant(Index, DL, XLenVT), TrueMask, VL, Policy); 4426 return convertFromScalableVector(VT, Res, DAG, Subtarget); 4427} 4428 4429/// Match v(f)slide1up/down idioms. These operations involve sliding 4430/// N-1 elements to make room for an inserted scalar at one end. 4431static SDValue lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc &DL, MVT VT, 4432 SDValue V1, SDValue V2, 4433 ArrayRef<int> Mask, 4434 const RISCVSubtarget &Subtarget, 4435 SelectionDAG &DAG) { 4436 bool OpsSwapped = false; 4437 if (!isa<BuildVectorSDNode>(V1)) { 4438 if (!isa<BuildVectorSDNode>(V2)) 4439 return SDValue(); 4440 std::swap(V1, V2); 4441 OpsSwapped = true; 4442 } 4443 SDValue Splat = cast<BuildVectorSDNode>(V1)->getSplatValue(); 4444 if (!Splat) 4445 return SDValue(); 4446 4447 // Return true if the mask could describe a slide of Mask.size() - 1 4448 // elements from concat_vector(V1, V2)[Base:] to [Offset:]. 4449 auto isSlideMask = [](ArrayRef<int> Mask, unsigned Base, int Offset) { 4450 const unsigned S = (Offset > 0) ? 0 : -Offset; 4451 const unsigned E = Mask.size() - ((Offset > 0) ? Offset : 0); 4452 for (unsigned i = S; i != E; ++i) 4453 if (Mask[i] >= 0 && (unsigned)Mask[i] != Base + i + Offset) 4454 return false; 4455 return true; 4456 }; 4457 4458 const unsigned NumElts = VT.getVectorNumElements(); 4459 bool IsVSlidedown = isSlideMask(Mask, OpsSwapped ? 0 : NumElts, 1); 4460 if (!IsVSlidedown && !isSlideMask(Mask, OpsSwapped ? 0 : NumElts, -1)) 4461 return SDValue(); 4462 4463 const int InsertIdx = Mask[IsVSlidedown ? (NumElts - 1) : 0]; 4464 // Inserted lane must come from splat, undef scalar is legal but not profitable. 4465 if (InsertIdx < 0 || InsertIdx / NumElts != (unsigned)OpsSwapped) 4466 return SDValue(); 4467 4468 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); 4469 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 4470 auto OpCode = IsVSlidedown ? 4471 (VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL) : 4472 (VT.isFloatingPoint() ? RISCVISD::VFSLIDE1UP_VL : RISCVISD::VSLIDE1UP_VL); 4473 if (!VT.isFloatingPoint()) 4474 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Splat); 4475 auto Vec = DAG.getNode(OpCode, DL, ContainerVT, 4476 DAG.getUNDEF(ContainerVT), 4477 convertToScalableVector(ContainerVT, V2, DAG, Subtarget), 4478 Splat, TrueMask, VL); 4479 return convertFromScalableVector(VT, Vec, DAG, Subtarget); 4480} 4481 4482// Given two input vectors of <[vscale x ]n x ty>, use vwaddu.vv and vwmaccu.vx 4483// to create an interleaved vector of <[vscale x] n*2 x ty>. 4484// This requires that the size of ty is less than the subtarget's maximum ELEN. 4485static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV, 4486 const SDLoc &DL, SelectionDAG &DAG, 4487 const RISCVSubtarget &Subtarget) { 4488 MVT VecVT = EvenV.getSimpleValueType(); 4489 MVT VecContainerVT = VecVT; // <vscale x n x ty> 4490 // Convert fixed vectors to scalable if needed 4491 if (VecContainerVT.isFixedLengthVector()) { 4492 VecContainerVT = getContainerForFixedLengthVector(DAG, VecVT, Subtarget); 4493 EvenV = convertToScalableVector(VecContainerVT, EvenV, DAG, Subtarget); 4494 OddV = convertToScalableVector(VecContainerVT, OddV, DAG, Subtarget); 4495 } 4496 4497 assert(VecVT.getScalarSizeInBits() < Subtarget.getELen()); 4498 4499 // We're working with a vector of the same size as the resulting 4500 // interleaved vector, but with half the number of elements and 4501 // twice the SEW (Hence the restriction on not using the maximum 4502 // ELEN) 4503 MVT WideVT = 4504 MVT::getVectorVT(MVT::getIntegerVT(VecVT.getScalarSizeInBits() * 2), 4505 VecVT.getVectorElementCount()); 4506 MVT WideContainerVT = WideVT; // <vscale x n x ty*2> 4507 if (WideContainerVT.isFixedLengthVector()) 4508 WideContainerVT = getContainerForFixedLengthVector(DAG, WideVT, Subtarget); 4509 4510 // Bitcast the input vectors to integers in case they are FP 4511 VecContainerVT = VecContainerVT.changeTypeToInteger(); 4512 EvenV = DAG.getBitcast(VecContainerVT, EvenV); 4513 OddV = DAG.getBitcast(VecContainerVT, OddV); 4514 4515 auto [Mask, VL] = getDefaultVLOps(VecVT, VecContainerVT, DL, DAG, Subtarget); 4516 SDValue Passthru = DAG.getUNDEF(WideContainerVT); 4517 4518 SDValue Interleaved; 4519 if (Subtarget.hasStdExtZvbb()) { 4520 // Interleaved = (OddV << VecVT.getScalarSizeInBits()) + EvenV. 4521 SDValue OffsetVec = 4522 DAG.getSplatVector(VecContainerVT, DL, 4523 DAG.getConstant(VecVT.getScalarSizeInBits(), DL, 4524 Subtarget.getXLenVT())); 4525 Interleaved = DAG.getNode(RISCVISD::VWSLL_VL, DL, WideContainerVT, OddV, 4526 OffsetVec, Passthru, Mask, VL); 4527 Interleaved = DAG.getNode(RISCVISD::VWADDU_W_VL, DL, WideContainerVT, 4528 Interleaved, EvenV, Passthru, Mask, VL); 4529 } else { 4530 // Widen EvenV and OddV with 0s and add one copy of OddV to EvenV with 4531 // vwaddu.vv 4532 Interleaved = DAG.getNode(RISCVISD::VWADDU_VL, DL, WideContainerVT, EvenV, 4533 OddV, Passthru, Mask, VL); 4534 4535 // Then get OddV * by 2^(VecVT.getScalarSizeInBits() - 1) 4536 SDValue AllOnesVec = DAG.getSplatVector( 4537 VecContainerVT, DL, DAG.getAllOnesConstant(DL, Subtarget.getXLenVT())); 4538 SDValue OddsMul = DAG.getNode(RISCVISD::VWMULU_VL, DL, WideContainerVT, 4539 OddV, AllOnesVec, Passthru, Mask, VL); 4540 4541 // Add the two together so we get 4542 // (OddV * 0xff...ff) + (OddV + EvenV) 4543 // = (OddV * 0x100...00) + EvenV 4544 // = (OddV << VecVT.getScalarSizeInBits()) + EvenV 4545 // Note the ADD_VL and VLMULU_VL should get selected as vwmaccu.vx 4546 Interleaved = DAG.getNode(RISCVISD::ADD_VL, DL, WideContainerVT, 4547 Interleaved, OddsMul, Passthru, Mask, VL); 4548 } 4549 4550 // Bitcast from <vscale x n * ty*2> to <vscale x 2*n x ty> 4551 MVT ResultContainerVT = MVT::getVectorVT( 4552 VecVT.getVectorElementType(), // Make sure to use original type 4553 VecContainerVT.getVectorElementCount().multiplyCoefficientBy(2)); 4554 Interleaved = DAG.getBitcast(ResultContainerVT, Interleaved); 4555 4556 // Convert back to a fixed vector if needed 4557 MVT ResultVT = 4558 MVT::getVectorVT(VecVT.getVectorElementType(), 4559 VecVT.getVectorElementCount().multiplyCoefficientBy(2)); 4560 if (ResultVT.isFixedLengthVector()) 4561 Interleaved = 4562 convertFromScalableVector(ResultVT, Interleaved, DAG, Subtarget); 4563 4564 return Interleaved; 4565} 4566 4567// If we have a vector of bits that we want to reverse, we can use a vbrev on a 4568// larger element type, e.g. v32i1 can be reversed with a v1i32 bitreverse. 4569static SDValue lowerBitreverseShuffle(ShuffleVectorSDNode *SVN, 4570 SelectionDAG &DAG, 4571 const RISCVSubtarget &Subtarget) { 4572 SDLoc DL(SVN); 4573 MVT VT = SVN->getSimpleValueType(0); 4574 SDValue V = SVN->getOperand(0); 4575 unsigned NumElts = VT.getVectorNumElements(); 4576 4577 assert(VT.getVectorElementType() == MVT::i1); 4578 4579 if (!ShuffleVectorInst::isReverseMask(SVN->getMask(), 4580 SVN->getMask().size()) || 4581 !SVN->getOperand(1).isUndef()) 4582 return SDValue(); 4583 4584 unsigned ViaEltSize = std::max((uint64_t)8, PowerOf2Ceil(NumElts)); 4585 EVT ViaVT = EVT::getVectorVT( 4586 *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), ViaEltSize), 1); 4587 EVT ViaBitVT = 4588 EVT::getVectorVT(*DAG.getContext(), MVT::i1, ViaVT.getScalarSizeInBits()); 4589 4590 // If we don't have zvbb or the larger element type > ELEN, the operation will 4591 // be illegal. 4592 if (!Subtarget.getTargetLowering()->isOperationLegalOrCustom(ISD::BITREVERSE, 4593 ViaVT) || 4594 !Subtarget.getTargetLowering()->isTypeLegal(ViaBitVT)) 4595 return SDValue(); 4596 4597 // If the bit vector doesn't fit exactly into the larger element type, we need 4598 // to insert it into the larger vector and then shift up the reversed bits 4599 // afterwards to get rid of the gap introduced. 4600 if (ViaEltSize > NumElts) 4601 V = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ViaBitVT, DAG.getUNDEF(ViaBitVT), 4602 V, DAG.getVectorIdxConstant(0, DL)); 4603 4604 SDValue Res = 4605 DAG.getNode(ISD::BITREVERSE, DL, ViaVT, DAG.getBitcast(ViaVT, V)); 4606 4607 // Shift up the reversed bits if the vector didn't exactly fit into the larger 4608 // element type. 4609 if (ViaEltSize > NumElts) 4610 Res = DAG.getNode(ISD::SRL, DL, ViaVT, Res, 4611 DAG.getConstant(ViaEltSize - NumElts, DL, ViaVT)); 4612 4613 Res = DAG.getBitcast(ViaBitVT, Res); 4614 4615 if (ViaEltSize > NumElts) 4616 Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res, 4617 DAG.getVectorIdxConstant(0, DL)); 4618 return Res; 4619} 4620 4621// Given a shuffle mask like <3, 0, 1, 2, 7, 4, 5, 6> for v8i8, we can 4622// reinterpret it as a v2i32 and rotate it right by 8 instead. We can lower this 4623// as a vror.vi if we have Zvkb, or otherwise as a vsll, vsrl and vor. 4624static SDValue lowerVECTOR_SHUFFLEAsRotate(ShuffleVectorSDNode *SVN, 4625 SelectionDAG &DAG, 4626 const RISCVSubtarget &Subtarget) { 4627 SDLoc DL(SVN); 4628 4629 EVT VT = SVN->getValueType(0); 4630 unsigned NumElts = VT.getVectorNumElements(); 4631 unsigned EltSizeInBits = VT.getScalarSizeInBits(); 4632 unsigned NumSubElts, RotateAmt; 4633 if (!ShuffleVectorInst::isBitRotateMask(SVN->getMask(), EltSizeInBits, 2, 4634 NumElts, NumSubElts, RotateAmt)) 4635 return SDValue(); 4636 MVT RotateVT = MVT::getVectorVT(MVT::getIntegerVT(EltSizeInBits * NumSubElts), 4637 NumElts / NumSubElts); 4638 4639 // We might have a RotateVT that isn't legal, e.g. v4i64 on zve32x. 4640 if (!Subtarget.getTargetLowering()->isTypeLegal(RotateVT)) 4641 return SDValue(); 4642 4643 SDValue Op = DAG.getBitcast(RotateVT, SVN->getOperand(0)); 4644 4645 SDValue Rotate; 4646 // A rotate of an i16 by 8 bits either direction is equivalent to a byteswap, 4647 // so canonicalize to vrev8. 4648 if (RotateVT.getScalarType() == MVT::i16 && RotateAmt == 8) 4649 Rotate = DAG.getNode(ISD::BSWAP, DL, RotateVT, Op); 4650 else 4651 Rotate = DAG.getNode(ISD::ROTL, DL, RotateVT, Op, 4652 DAG.getConstant(RotateAmt, DL, RotateVT)); 4653 4654 return DAG.getBitcast(VT, Rotate); 4655} 4656 4657// If compiling with an exactly known VLEN, see if we can split a 4658// shuffle on m2 or larger into a small number of m1 sized shuffles 4659// which write each destination registers exactly once. 4660static SDValue lowerShuffleViaVRegSplitting(ShuffleVectorSDNode *SVN, 4661 SelectionDAG &DAG, 4662 const RISCVSubtarget &Subtarget) { 4663 SDLoc DL(SVN); 4664 MVT VT = SVN->getSimpleValueType(0); 4665 SDValue V1 = SVN->getOperand(0); 4666 SDValue V2 = SVN->getOperand(1); 4667 ArrayRef<int> Mask = SVN->getMask(); 4668 unsigned NumElts = VT.getVectorNumElements(); 4669 4670 // If we don't know exact data layout, not much we can do. If this 4671 // is already m1 or smaller, no point in splitting further. 4672 const unsigned MinVLen = Subtarget.getRealMinVLen(); 4673 const unsigned MaxVLen = Subtarget.getRealMaxVLen(); 4674 if (MinVLen != MaxVLen || VT.getSizeInBits().getFixedValue() <= MinVLen) 4675 return SDValue(); 4676 4677 MVT ElemVT = VT.getVectorElementType(); 4678 unsigned ElemsPerVReg = MinVLen / ElemVT.getFixedSizeInBits(); 4679 unsigned VRegsPerSrc = NumElts / ElemsPerVReg; 4680 4681 SmallVector<std::pair<int, SmallVector<int>>> 4682 OutMasks(VRegsPerSrc, {-1, {}}); 4683 4684 // Check if our mask can be done as a 1-to-1 mapping from source 4685 // to destination registers in the group without needing to 4686 // write each destination more than once. 4687 for (unsigned DstIdx = 0; DstIdx < Mask.size(); DstIdx++) { 4688 int DstVecIdx = DstIdx / ElemsPerVReg; 4689 int DstSubIdx = DstIdx % ElemsPerVReg; 4690 int SrcIdx = Mask[DstIdx]; 4691 if (SrcIdx < 0 || (unsigned)SrcIdx >= 2 * NumElts) 4692 continue; 4693 int SrcVecIdx = SrcIdx / ElemsPerVReg; 4694 int SrcSubIdx = SrcIdx % ElemsPerVReg; 4695 if (OutMasks[DstVecIdx].first == -1) 4696 OutMasks[DstVecIdx].first = SrcVecIdx; 4697 if (OutMasks[DstVecIdx].first != SrcVecIdx) 4698 // Note: This case could easily be handled by keeping track of a chain 4699 // of source values and generating two element shuffles below. This is 4700 // less an implementation question, and more a profitability one. 4701 return SDValue(); 4702 4703 OutMasks[DstVecIdx].second.resize(ElemsPerVReg, -1); 4704 OutMasks[DstVecIdx].second[DstSubIdx] = SrcSubIdx; 4705 } 4706 4707 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); 4708 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg); 4709 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget); 4710 assert(M1VT == getLMUL1VT(M1VT)); 4711 unsigned NumOpElts = M1VT.getVectorMinNumElements(); 4712 SDValue Vec = DAG.getUNDEF(ContainerVT); 4713 // The following semantically builds up a fixed length concat_vector 4714 // of the component shuffle_vectors. We eagerly lower to scalable here 4715 // to avoid DAG combining it back to a large shuffle_vector again. 4716 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget); 4717 V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget); 4718 for (unsigned DstVecIdx = 0 ; DstVecIdx < OutMasks.size(); DstVecIdx++) { 4719 auto &[SrcVecIdx, SrcSubMask] = OutMasks[DstVecIdx]; 4720 if (SrcVecIdx == -1) 4721 continue; 4722 unsigned ExtractIdx = (SrcVecIdx % VRegsPerSrc) * NumOpElts; 4723 SDValue SrcVec = (unsigned)SrcVecIdx >= VRegsPerSrc ? V2 : V1; 4724 SDValue SubVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, SrcVec, 4725 DAG.getVectorIdxConstant(ExtractIdx, DL)); 4726 SubVec = convertFromScalableVector(OneRegVT, SubVec, DAG, Subtarget); 4727 SubVec = DAG.getVectorShuffle(OneRegVT, DL, SubVec, SubVec, SrcSubMask); 4728 SubVec = convertToScalableVector(M1VT, SubVec, DAG, Subtarget); 4729 unsigned InsertIdx = DstVecIdx * NumOpElts; 4730 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Vec, SubVec, 4731 DAG.getVectorIdxConstant(InsertIdx, DL)); 4732 } 4733 return convertFromScalableVector(VT, Vec, DAG, Subtarget); 4734} 4735 4736static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, 4737 const RISCVSubtarget &Subtarget) { 4738 SDValue V1 = Op.getOperand(0); 4739 SDValue V2 = Op.getOperand(1); 4740 SDLoc DL(Op); 4741 MVT XLenVT = Subtarget.getXLenVT(); 4742 MVT VT = Op.getSimpleValueType(); 4743 unsigned NumElts = VT.getVectorNumElements(); 4744 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode()); 4745 4746 if (VT.getVectorElementType() == MVT::i1) { 4747 // Lower to a vror.vi of a larger element type if possible before we promote 4748 // i1s to i8s. 4749 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget)) 4750 return V; 4751 if (SDValue V = lowerBitreverseShuffle(SVN, DAG, Subtarget)) 4752 return V; 4753 4754 // Promote i1 shuffle to i8 shuffle. 4755 MVT WidenVT = MVT::getVectorVT(MVT::i8, VT.getVectorElementCount()); 4756 V1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V1); 4757 V2 = V2.isUndef() ? DAG.getUNDEF(WidenVT) 4758 : DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V2); 4759 SDValue Shuffled = DAG.getVectorShuffle(WidenVT, DL, V1, V2, SVN->getMask()); 4760 return DAG.getSetCC(DL, VT, Shuffled, DAG.getConstant(0, DL, WidenVT), 4761 ISD::SETNE); 4762 } 4763 4764 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); 4765 4766 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 4767 4768 if (SVN->isSplat()) { 4769 const int Lane = SVN->getSplatIndex(); 4770 if (Lane >= 0) { 4771 MVT SVT = VT.getVectorElementType(); 4772 4773 // Turn splatted vector load into a strided load with an X0 stride. 4774 SDValue V = V1; 4775 // Peek through CONCAT_VECTORS as VectorCombine can concat a vector 4776 // with undef. 4777 // FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts? 4778 int Offset = Lane; 4779 if (V.getOpcode() == ISD::CONCAT_VECTORS) { 4780 int OpElements = 4781 V.getOperand(0).getSimpleValueType().getVectorNumElements(); 4782 V = V.getOperand(Offset / OpElements); 4783 Offset %= OpElements; 4784 } 4785 4786 // We need to ensure the load isn't atomic or volatile. 4787 if (ISD::isNormalLoad(V.getNode()) && cast<LoadSDNode>(V)->isSimple()) { 4788 auto *Ld = cast<LoadSDNode>(V); 4789 Offset *= SVT.getStoreSize(); 4790 SDValue NewAddr = DAG.getMemBasePlusOffset( 4791 Ld->getBasePtr(), TypeSize::getFixed(Offset), DL); 4792 4793 // If this is SEW=64 on RV32, use a strided load with a stride of x0. 4794 if (SVT.isInteger() && SVT.bitsGT(XLenVT)) { 4795 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other}); 4796 SDValue IntID = 4797 DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, XLenVT); 4798 SDValue Ops[] = {Ld->getChain(), 4799 IntID, 4800 DAG.getUNDEF(ContainerVT), 4801 NewAddr, 4802 DAG.getRegister(RISCV::X0, XLenVT), 4803 VL}; 4804 SDValue NewLoad = DAG.getMemIntrinsicNode( 4805 ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, SVT, 4806 DAG.getMachineFunction().getMachineMemOperand( 4807 Ld->getMemOperand(), Offset, SVT.getStoreSize())); 4808 DAG.makeEquivalentMemoryOrdering(Ld, NewLoad); 4809 return convertFromScalableVector(VT, NewLoad, DAG, Subtarget); 4810 } 4811 4812 // Otherwise use a scalar load and splat. This will give the best 4813 // opportunity to fold a splat into the operation. ISel can turn it into 4814 // the x0 strided load if we aren't able to fold away the select. 4815 if (SVT.isFloatingPoint()) 4816 V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr, 4817 Ld->getPointerInfo().getWithOffset(Offset), 4818 Ld->getOriginalAlign(), 4819 Ld->getMemOperand()->getFlags()); 4820 else 4821 V = DAG.getExtLoad(ISD::SEXTLOAD, DL, XLenVT, Ld->getChain(), NewAddr, 4822 Ld->getPointerInfo().getWithOffset(Offset), SVT, 4823 Ld->getOriginalAlign(), 4824 Ld->getMemOperand()->getFlags()); 4825 DAG.makeEquivalentMemoryOrdering(Ld, V); 4826 4827 unsigned Opc = 4828 VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL; 4829 SDValue Splat = 4830 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), V, VL); 4831 return convertFromScalableVector(VT, Splat, DAG, Subtarget); 4832 } 4833 4834 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget); 4835 assert(Lane < (int)NumElts && "Unexpected lane!"); 4836 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, 4837 V1, DAG.getConstant(Lane, DL, XLenVT), 4838 DAG.getUNDEF(ContainerVT), TrueMask, VL); 4839 return convertFromScalableVector(VT, Gather, DAG, Subtarget); 4840 } 4841 } 4842 4843 // For exact VLEN m2 or greater, try to split to m1 operations if we 4844 // can split cleanly. 4845 if (SDValue V = lowerShuffleViaVRegSplitting(SVN, DAG, Subtarget)) 4846 return V; 4847 4848 ArrayRef<int> Mask = SVN->getMask(); 4849 4850 if (SDValue V = 4851 lowerVECTOR_SHUFFLEAsVSlide1(DL, VT, V1, V2, Mask, Subtarget, DAG)) 4852 return V; 4853 4854 if (SDValue V = 4855 lowerVECTOR_SHUFFLEAsVSlidedown(DL, VT, V1, V2, Mask, Subtarget, DAG)) 4856 return V; 4857 4858 // A bitrotate will be one instruction on Zvkb, so try to lower to it first if 4859 // available. 4860 if (Subtarget.hasStdExtZvkb()) 4861 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget)) 4862 return V; 4863 4864 // Lower rotations to a SLIDEDOWN and a SLIDEUP. One of the source vectors may 4865 // be undef which can be handled with a single SLIDEDOWN/UP. 4866 int LoSrc, HiSrc; 4867 int Rotation = isElementRotate(LoSrc, HiSrc, Mask); 4868 if (Rotation > 0) { 4869 SDValue LoV, HiV; 4870 if (LoSrc >= 0) { 4871 LoV = LoSrc == 0 ? V1 : V2; 4872 LoV = convertToScalableVector(ContainerVT, LoV, DAG, Subtarget); 4873 } 4874 if (HiSrc >= 0) { 4875 HiV = HiSrc == 0 ? V1 : V2; 4876 HiV = convertToScalableVector(ContainerVT, HiV, DAG, Subtarget); 4877 } 4878 4879 // We found a rotation. We need to slide HiV down by Rotation. Then we need 4880 // to slide LoV up by (NumElts - Rotation). 4881 unsigned InvRotate = NumElts - Rotation; 4882 4883 SDValue Res = DAG.getUNDEF(ContainerVT); 4884 if (HiV) { 4885 // Even though we could use a smaller VL, don't to avoid a vsetivli 4886 // toggle. 4887 Res = getVSlidedown(DAG, Subtarget, DL, ContainerVT, Res, HiV, 4888 DAG.getConstant(Rotation, DL, XLenVT), TrueMask, VL); 4889 } 4890 if (LoV) 4891 Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, Res, LoV, 4892 DAG.getConstant(InvRotate, DL, XLenVT), TrueMask, VL, 4893 RISCVII::TAIL_AGNOSTIC); 4894 4895 return convertFromScalableVector(VT, Res, DAG, Subtarget); 4896 } 4897 4898 // If this is a deinterleave and we can widen the vector, then we can use 4899 // vnsrl to deinterleave. 4900 if (isDeinterleaveShuffle(VT, ContainerVT, V1, V2, Mask, Subtarget)) { 4901 return getDeinterleaveViaVNSRL(DL, VT, V1.getOperand(0), Mask[0] == 0, 4902 Subtarget, DAG); 4903 } 4904 4905 if (SDValue V = 4906 lowerVECTOR_SHUFFLEAsVSlideup(DL, VT, V1, V2, Mask, Subtarget, DAG)) 4907 return V; 4908 4909 // Detect an interleave shuffle and lower to 4910 // (vmaccu.vx (vwaddu.vx lohalf(V1), lohalf(V2)), lohalf(V2), (2^eltbits - 1)) 4911 int EvenSrc, OddSrc; 4912 if (isInterleaveShuffle(Mask, VT, EvenSrc, OddSrc, Subtarget)) { 4913 // Extract the halves of the vectors. 4914 MVT HalfVT = VT.getHalfNumVectorElementsVT(); 4915 4916 int Size = Mask.size(); 4917 SDValue EvenV, OddV; 4918 assert(EvenSrc >= 0 && "Undef source?"); 4919 EvenV = (EvenSrc / Size) == 0 ? V1 : V2; 4920 EvenV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, EvenV, 4921 DAG.getConstant(EvenSrc % Size, DL, XLenVT)); 4922 4923 assert(OddSrc >= 0 && "Undef source?"); 4924 OddV = (OddSrc / Size) == 0 ? V1 : V2; 4925 OddV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, OddV, 4926 DAG.getConstant(OddSrc % Size, DL, XLenVT)); 4927 4928 return getWideningInterleave(EvenV, OddV, DL, DAG, Subtarget); 4929 } 4930 4931 // Detect shuffles which can be re-expressed as vector selects; these are 4932 // shuffles in which each element in the destination is taken from an element 4933 // at the corresponding index in either source vectors. 4934 bool IsSelect = all_of(enumerate(Mask), [&](const auto &MaskIdx) { 4935 int MaskIndex = MaskIdx.value(); 4936 return MaskIndex < 0 || MaskIdx.index() == (unsigned)MaskIndex % NumElts; 4937 }); 4938 4939 assert(!V1.isUndef() && "Unexpected shuffle canonicalization"); 4940 4941 // By default we preserve the original operand order, and use a mask to 4942 // select LHS as true and RHS as false. However, since RVV vector selects may 4943 // feature splats but only on the LHS, we may choose to invert our mask and 4944 // instead select between RHS and LHS. 4945 bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1); 4946 4947 if (IsSelect) { 4948 // Now construct the mask that will be used by the vselect operation. 4949 SmallVector<SDValue> MaskVals; 4950 for (int MaskIndex : Mask) { 4951 bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ SwapOps; 4952 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT)); 4953 } 4954 4955 if (SwapOps) 4956 std::swap(V1, V2); 4957 4958 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle"); 4959 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts); 4960 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals); 4961 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2); 4962 } 4963 4964 // We might be able to express the shuffle as a bitrotate. But even if we 4965 // don't have Zvkb and have to expand, the expanded sequence of approx. 2 4966 // shifts and a vor will have a higher throughput than a vrgather. 4967 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget)) 4968 return V; 4969 4970 if (VT.getScalarSizeInBits() == 8 && VT.getVectorNumElements() > 256) { 4971 // On such a large vector we're unable to use i8 as the index type. 4972 // FIXME: We could promote the index to i16 and use vrgatherei16, but that 4973 // may involve vector splitting if we're already at LMUL=8, or our 4974 // user-supplied maximum fixed-length LMUL. 4975 return SDValue(); 4976 } 4977 4978 // As a backup, shuffles can be lowered via a vrgather instruction, possibly 4979 // merged with a second vrgather. 4980 SmallVector<SDValue> GatherIndicesLHS, GatherIndicesRHS; 4981 4982 // Keep a track of which non-undef indices are used by each LHS/RHS shuffle 4983 // half. 4984 DenseMap<int, unsigned> LHSIndexCounts, RHSIndexCounts; 4985 4986 SmallVector<SDValue> MaskVals; 4987 4988 // Now construct the mask that will be used by the blended vrgather operation. 4989 // Cconstruct the appropriate indices into each vector. 4990 for (int MaskIndex : Mask) { 4991 bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ !SwapOps; 4992 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT)); 4993 bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts; 4994 GatherIndicesLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0 4995 ? DAG.getConstant(MaskIndex, DL, XLenVT) 4996 : DAG.getUNDEF(XLenVT)); 4997 GatherIndicesRHS.push_back( 4998 IsLHSOrUndefIndex ? DAG.getUNDEF(XLenVT) 4999 : DAG.getConstant(MaskIndex - NumElts, DL, XLenVT)); 5000 if (IsLHSOrUndefIndex && MaskIndex >= 0) 5001 ++LHSIndexCounts[MaskIndex]; 5002 if (!IsLHSOrUndefIndex) 5003 ++RHSIndexCounts[MaskIndex - NumElts]; 5004 } 5005 5006 if (SwapOps) { 5007 std::swap(V1, V2); 5008 std::swap(GatherIndicesLHS, GatherIndicesRHS); 5009 } 5010 5011 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle"); 5012 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts); 5013 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals); 5014 5015 unsigned GatherVXOpc = RISCVISD::VRGATHER_VX_VL; 5016 unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL; 5017 MVT IndexVT = VT.changeTypeToInteger(); 5018 // Since we can't introduce illegal index types at this stage, use i16 and 5019 // vrgatherei16 if the corresponding index type for plain vrgather is greater 5020 // than XLenVT. 5021 if (IndexVT.getScalarType().bitsGT(XLenVT)) { 5022 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL; 5023 IndexVT = IndexVT.changeVectorElementType(MVT::i16); 5024 } 5025 5026 // If the mask allows, we can do all the index computation in 16 bits. This 5027 // requires less work and less register pressure at high LMUL, and creates 5028 // smaller constants which may be cheaper to materialize. 5029 if (IndexVT.getScalarType().bitsGT(MVT::i16) && isUInt<16>(NumElts - 1) && 5030 (IndexVT.getSizeInBits() / Subtarget.getRealMinVLen()) > 1) { 5031 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL; 5032 IndexVT = IndexVT.changeVectorElementType(MVT::i16); 5033 } 5034 5035 MVT IndexContainerVT = 5036 ContainerVT.changeVectorElementType(IndexVT.getScalarType()); 5037 5038 SDValue Gather; 5039 // TODO: This doesn't trigger for i64 vectors on RV32, since there we 5040 // encounter a bitcasted BUILD_VECTOR with low/high i32 values. 5041 if (SDValue SplatValue = DAG.getSplatValue(V1, /*LegalTypes*/ true)) { 5042 Gather = lowerScalarSplat(SDValue(), SplatValue, VL, ContainerVT, DL, DAG, 5043 Subtarget); 5044 } else { 5045 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget); 5046 // If only one index is used, we can use a "splat" vrgather. 5047 // TODO: We can splat the most-common index and fix-up any stragglers, if 5048 // that's beneficial. 5049 if (LHSIndexCounts.size() == 1) { 5050 int SplatIndex = LHSIndexCounts.begin()->getFirst(); 5051 Gather = DAG.getNode(GatherVXOpc, DL, ContainerVT, V1, 5052 DAG.getConstant(SplatIndex, DL, XLenVT), 5053 DAG.getUNDEF(ContainerVT), TrueMask, VL); 5054 } else { 5055 SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS); 5056 LHSIndices = 5057 convertToScalableVector(IndexContainerVT, LHSIndices, DAG, Subtarget); 5058 5059 Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices, 5060 DAG.getUNDEF(ContainerVT), TrueMask, VL); 5061 } 5062 } 5063 5064 // If a second vector operand is used by this shuffle, blend it in with an 5065 // additional vrgather. 5066 if (!V2.isUndef()) { 5067 V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget); 5068 5069 MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1); 5070 SelectMask = 5071 convertToScalableVector(MaskContainerVT, SelectMask, DAG, Subtarget); 5072 5073 // If only one index is used, we can use a "splat" vrgather. 5074 // TODO: We can splat the most-common index and fix-up any stragglers, if 5075 // that's beneficial. 5076 if (RHSIndexCounts.size() == 1) { 5077 int SplatIndex = RHSIndexCounts.begin()->getFirst(); 5078 Gather = DAG.getNode(GatherVXOpc, DL, ContainerVT, V2, 5079 DAG.getConstant(SplatIndex, DL, XLenVT), Gather, 5080 SelectMask, VL); 5081 } else { 5082 SDValue RHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesRHS); 5083 RHSIndices = 5084 convertToScalableVector(IndexContainerVT, RHSIndices, DAG, Subtarget); 5085 Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V2, RHSIndices, Gather, 5086 SelectMask, VL); 5087 } 5088 } 5089 5090 return convertFromScalableVector(VT, Gather, DAG, Subtarget); 5091} 5092 5093bool RISCVTargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const { 5094 // Support splats for any type. These should type legalize well. 5095 if (ShuffleVectorSDNode::isSplatMask(M.data(), VT)) 5096 return true; 5097 5098 // Only support legal VTs for other shuffles for now. 5099 if (!isTypeLegal(VT)) 5100 return false; 5101 5102 MVT SVT = VT.getSimpleVT(); 5103 5104 // Not for i1 vectors. 5105 if (SVT.getScalarType() == MVT::i1) 5106 return false; 5107 5108 int Dummy1, Dummy2; 5109 return (isElementRotate(Dummy1, Dummy2, M) > 0) || 5110 isInterleaveShuffle(M, SVT, Dummy1, Dummy2, Subtarget); 5111} 5112 5113// Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting 5114// the exponent. 5115SDValue 5116RISCVTargetLowering::lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op, 5117 SelectionDAG &DAG) const { 5118 MVT VT = Op.getSimpleValueType(); 5119 unsigned EltSize = VT.getScalarSizeInBits(); 5120 SDValue Src = Op.getOperand(0); 5121 SDLoc DL(Op); 5122 MVT ContainerVT = VT; 5123 5124 SDValue Mask, VL; 5125 if (Op->isVPOpcode()) { 5126 Mask = Op.getOperand(1); 5127 if (VT.isFixedLengthVector()) 5128 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG, 5129 Subtarget); 5130 VL = Op.getOperand(2); 5131 } 5132 5133 // We choose FP type that can represent the value if possible. Otherwise, we 5134 // use rounding to zero conversion for correct exponent of the result. 5135 // TODO: Use f16 for i8 when possible? 5136 MVT FloatEltVT = (EltSize >= 32) ? MVT::f64 : MVT::f32; 5137 if (!isTypeLegal(MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount()))) 5138 FloatEltVT = MVT::f32; 5139 MVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount()); 5140 5141 // Legal types should have been checked in the RISCVTargetLowering 5142 // constructor. 5143 // TODO: Splitting may make sense in some cases. 5144 assert(DAG.getTargetLoweringInfo().isTypeLegal(FloatVT) && 5145 "Expected legal float type!"); 5146 5147 // For CTTZ_ZERO_UNDEF, we need to extract the lowest set bit using X & -X. 5148 // The trailing zero count is equal to log2 of this single bit value. 5149 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) { 5150 SDValue Neg = DAG.getNegative(Src, DL, VT); 5151 Src = DAG.getNode(ISD::AND, DL, VT, Src, Neg); 5152 } else if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF) { 5153 SDValue Neg = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(0, DL, VT), 5154 Src, Mask, VL); 5155 Src = DAG.getNode(ISD::VP_AND, DL, VT, Src, Neg, Mask, VL); 5156 } 5157 5158 // We have a legal FP type, convert to it. 5159 SDValue FloatVal; 5160 if (FloatVT.bitsGT(VT)) { 5161 if (Op->isVPOpcode()) 5162 FloatVal = DAG.getNode(ISD::VP_UINT_TO_FP, DL, FloatVT, Src, Mask, VL); 5163 else 5164 FloatVal = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVT, Src); 5165 } else { 5166 // Use RTZ to avoid rounding influencing exponent of FloatVal. 5167 if (VT.isFixedLengthVector()) { 5168 ContainerVT = getContainerForFixedLengthVector(VT); 5169 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget); 5170 } 5171 if (!Op->isVPOpcode()) 5172 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 5173 SDValue RTZRM = 5174 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, Subtarget.getXLenVT()); 5175 MVT ContainerFloatVT = 5176 MVT::getVectorVT(FloatEltVT, ContainerVT.getVectorElementCount()); 5177 FloatVal = DAG.getNode(RISCVISD::VFCVT_RM_F_XU_VL, DL, ContainerFloatVT, 5178 Src, Mask, RTZRM, VL); 5179 if (VT.isFixedLengthVector()) 5180 FloatVal = convertFromScalableVector(FloatVT, FloatVal, DAG, Subtarget); 5181 } 5182 // Bitcast to integer and shift the exponent to the LSB. 5183 EVT IntVT = FloatVT.changeVectorElementTypeToInteger(); 5184 SDValue Bitcast = DAG.getBitcast(IntVT, FloatVal); 5185 unsigned ShiftAmt = FloatEltVT == MVT::f64 ? 52 : 23; 5186 5187 SDValue Exp; 5188 // Restore back to original type. Truncation after SRL is to generate vnsrl. 5189 if (Op->isVPOpcode()) { 5190 Exp = DAG.getNode(ISD::VP_LSHR, DL, IntVT, Bitcast, 5191 DAG.getConstant(ShiftAmt, DL, IntVT), Mask, VL); 5192 Exp = DAG.getVPZExtOrTrunc(DL, VT, Exp, Mask, VL); 5193 } else { 5194 Exp = DAG.getNode(ISD::SRL, DL, IntVT, Bitcast, 5195 DAG.getConstant(ShiftAmt, DL, IntVT)); 5196 if (IntVT.bitsLT(VT)) 5197 Exp = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Exp); 5198 else if (IntVT.bitsGT(VT)) 5199 Exp = DAG.getNode(ISD::TRUNCATE, DL, VT, Exp); 5200 } 5201 5202 // The exponent contains log2 of the value in biased form. 5203 unsigned ExponentBias = FloatEltVT == MVT::f64 ? 1023 : 127; 5204 // For trailing zeros, we just need to subtract the bias. 5205 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) 5206 return DAG.getNode(ISD::SUB, DL, VT, Exp, 5207 DAG.getConstant(ExponentBias, DL, VT)); 5208 if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF) 5209 return DAG.getNode(ISD::VP_SUB, DL, VT, Exp, 5210 DAG.getConstant(ExponentBias, DL, VT), Mask, VL); 5211 5212 // For leading zeros, we need to remove the bias and convert from log2 to 5213 // leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)). 5214 unsigned Adjust = ExponentBias + (EltSize - 1); 5215 SDValue Res; 5216 if (Op->isVPOpcode()) 5217 Res = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp, 5218 Mask, VL); 5219 else 5220 Res = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp); 5221 5222 // The above result with zero input equals to Adjust which is greater than 5223 // EltSize. Hence, we can do min(Res, EltSize) for CTLZ. 5224 if (Op.getOpcode() == ISD::CTLZ) 5225 Res = DAG.getNode(ISD::UMIN, DL, VT, Res, DAG.getConstant(EltSize, DL, VT)); 5226 else if (Op.getOpcode() == ISD::VP_CTLZ) 5227 Res = DAG.getNode(ISD::VP_UMIN, DL, VT, Res, 5228 DAG.getConstant(EltSize, DL, VT), Mask, VL); 5229 return Res; 5230} 5231 5232// While RVV has alignment restrictions, we should always be able to load as a 5233// legal equivalently-sized byte-typed vector instead. This method is 5234// responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If 5235// the load is already correctly-aligned, it returns SDValue(). 5236SDValue RISCVTargetLowering::expandUnalignedRVVLoad(SDValue Op, 5237 SelectionDAG &DAG) const { 5238 auto *Load = cast<LoadSDNode>(Op); 5239 assert(Load && Load->getMemoryVT().isVector() && "Expected vector load"); 5240 5241 if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), 5242 Load->getMemoryVT(), 5243 *Load->getMemOperand())) 5244 return SDValue(); 5245 5246 SDLoc DL(Op); 5247 MVT VT = Op.getSimpleValueType(); 5248 unsigned EltSizeBits = VT.getScalarSizeInBits(); 5249 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) && 5250 "Unexpected unaligned RVV load type"); 5251 MVT NewVT = 5252 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8)); 5253 assert(NewVT.isValid() && 5254 "Expecting equally-sized RVV vector types to be legal"); 5255 SDValue L = DAG.getLoad(NewVT, DL, Load->getChain(), Load->getBasePtr(), 5256 Load->getPointerInfo(), Load->getOriginalAlign(), 5257 Load->getMemOperand()->getFlags()); 5258 return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL); 5259} 5260 5261// While RVV has alignment restrictions, we should always be able to store as a 5262// legal equivalently-sized byte-typed vector instead. This method is 5263// responsible for re-expressing a ISD::STORE via a correctly-aligned type. It 5264// returns SDValue() if the store is already correctly aligned. 5265SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op, 5266 SelectionDAG &DAG) const { 5267 auto *Store = cast<StoreSDNode>(Op); 5268 assert(Store && Store->getValue().getValueType().isVector() && 5269 "Expected vector store"); 5270 5271 if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), 5272 Store->getMemoryVT(), 5273 *Store->getMemOperand())) 5274 return SDValue(); 5275 5276 SDLoc DL(Op); 5277 SDValue StoredVal = Store->getValue(); 5278 MVT VT = StoredVal.getSimpleValueType(); 5279 unsigned EltSizeBits = VT.getScalarSizeInBits(); 5280 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) && 5281 "Unexpected unaligned RVV store type"); 5282 MVT NewVT = 5283 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8)); 5284 assert(NewVT.isValid() && 5285 "Expecting equally-sized RVV vector types to be legal"); 5286 StoredVal = DAG.getBitcast(NewVT, StoredVal); 5287 return DAG.getStore(Store->getChain(), DL, StoredVal, Store->getBasePtr(), 5288 Store->getPointerInfo(), Store->getOriginalAlign(), 5289 Store->getMemOperand()->getFlags()); 5290} 5291 5292static SDValue lowerConstant(SDValue Op, SelectionDAG &DAG, 5293 const RISCVSubtarget &Subtarget) { 5294 assert(Op.getValueType() == MVT::i64 && "Unexpected VT"); 5295 5296 int64_t Imm = cast<ConstantSDNode>(Op)->getSExtValue(); 5297 5298 // All simm32 constants should be handled by isel. 5299 // NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making 5300 // this check redundant, but small immediates are common so this check 5301 // should have better compile time. 5302 if (isInt<32>(Imm)) 5303 return Op; 5304 5305 // We only need to cost the immediate, if constant pool lowering is enabled. 5306 if (!Subtarget.useConstantPoolForLargeInts()) 5307 return Op; 5308 5309 RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Imm, Subtarget); 5310 if (Seq.size() <= Subtarget.getMaxBuildIntsCost()) 5311 return Op; 5312 5313 // Optimizations below are disabled for opt size. If we're optimizing for 5314 // size, use a constant pool. 5315 if (DAG.shouldOptForSize()) 5316 return SDValue(); 5317 5318 // Special case. See if we can build the constant as (ADD (SLLI X, C), X) do 5319 // that if it will avoid a constant pool. 5320 // It will require an extra temporary register though. 5321 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where 5322 // low and high 32 bits are the same and bit 31 and 63 are set. 5323 unsigned ShiftAmt, AddOpc; 5324 RISCVMatInt::InstSeq SeqLo = 5325 RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc); 5326 if (!SeqLo.empty() && (SeqLo.size() + 2) <= Subtarget.getMaxBuildIntsCost()) 5327 return Op; 5328 5329 return SDValue(); 5330} 5331 5332static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, 5333 const RISCVSubtarget &Subtarget) { 5334 SDLoc dl(Op); 5335 AtomicOrdering FenceOrdering = 5336 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1)); 5337 SyncScope::ID FenceSSID = 5338 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2)); 5339 5340 if (Subtarget.hasStdExtZtso()) { 5341 // The only fence that needs an instruction is a sequentially-consistent 5342 // cross-thread fence. 5343 if (FenceOrdering == AtomicOrdering::SequentiallyConsistent && 5344 FenceSSID == SyncScope::System) 5345 return Op; 5346 5347 // MEMBARRIER is a compiler barrier; it codegens to a no-op. 5348 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0)); 5349 } 5350 5351 // singlethread fences only synchronize with signal handlers on the same 5352 // thread and thus only need to preserve instruction order, not actually 5353 // enforce memory ordering. 5354 if (FenceSSID == SyncScope::SingleThread) 5355 // MEMBARRIER is a compiler barrier; it codegens to a no-op. 5356 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0)); 5357 5358 return Op; 5359} 5360 5361SDValue RISCVTargetLowering::LowerIS_FPCLASS(SDValue Op, 5362 SelectionDAG &DAG) const { 5363 SDLoc DL(Op); 5364 MVT VT = Op.getSimpleValueType(); 5365 MVT XLenVT = Subtarget.getXLenVT(); 5366 unsigned Check = Op.getConstantOperandVal(1); 5367 unsigned TDCMask = 0; 5368 if (Check & fcSNan) 5369 TDCMask |= RISCV::FPMASK_Signaling_NaN; 5370 if (Check & fcQNan) 5371 TDCMask |= RISCV::FPMASK_Quiet_NaN; 5372 if (Check & fcPosInf) 5373 TDCMask |= RISCV::FPMASK_Positive_Infinity; 5374 if (Check & fcNegInf) 5375 TDCMask |= RISCV::FPMASK_Negative_Infinity; 5376 if (Check & fcPosNormal) 5377 TDCMask |= RISCV::FPMASK_Positive_Normal; 5378 if (Check & fcNegNormal) 5379 TDCMask |= RISCV::FPMASK_Negative_Normal; 5380 if (Check & fcPosSubnormal) 5381 TDCMask |= RISCV::FPMASK_Positive_Subnormal; 5382 if (Check & fcNegSubnormal) 5383 TDCMask |= RISCV::FPMASK_Negative_Subnormal; 5384 if (Check & fcPosZero) 5385 TDCMask |= RISCV::FPMASK_Positive_Zero; 5386 if (Check & fcNegZero) 5387 TDCMask |= RISCV::FPMASK_Negative_Zero; 5388 5389 bool IsOneBitMask = isPowerOf2_32(TDCMask); 5390 5391 SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, XLenVT); 5392 5393 if (VT.isVector()) { 5394 SDValue Op0 = Op.getOperand(0); 5395 MVT VT0 = Op.getOperand(0).getSimpleValueType(); 5396 5397 if (VT.isScalableVector()) { 5398 MVT DstVT = VT0.changeVectorElementTypeToInteger(); 5399 auto [Mask, VL] = getDefaultScalableVLOps(VT0, DL, DAG, Subtarget); 5400 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) { 5401 Mask = Op.getOperand(2); 5402 VL = Op.getOperand(3); 5403 } 5404 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, DstVT, Op0, Mask, 5405 VL, Op->getFlags()); 5406 if (IsOneBitMask) 5407 return DAG.getSetCC(DL, VT, FPCLASS, 5408 DAG.getConstant(TDCMask, DL, DstVT), 5409 ISD::CondCode::SETEQ); 5410 SDValue AND = DAG.getNode(ISD::AND, DL, DstVT, FPCLASS, 5411 DAG.getConstant(TDCMask, DL, DstVT)); 5412 return DAG.getSetCC(DL, VT, AND, DAG.getConstant(0, DL, DstVT), 5413 ISD::SETNE); 5414 } 5415 5416 MVT ContainerVT0 = getContainerForFixedLengthVector(VT0); 5417 MVT ContainerVT = getContainerForFixedLengthVector(VT); 5418 MVT ContainerDstVT = ContainerVT0.changeVectorElementTypeToInteger(); 5419 auto [Mask, VL] = getDefaultVLOps(VT0, ContainerVT0, DL, DAG, Subtarget); 5420 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) { 5421 Mask = Op.getOperand(2); 5422 MVT MaskContainerVT = 5423 getContainerForFixedLengthVector(Mask.getSimpleValueType()); 5424 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget); 5425 VL = Op.getOperand(3); 5426 } 5427 Op0 = convertToScalableVector(ContainerVT0, Op0, DAG, Subtarget); 5428 5429 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, ContainerDstVT, Op0, 5430 Mask, VL, Op->getFlags()); 5431 5432 TDCMaskV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT, 5433 DAG.getUNDEF(ContainerDstVT), TDCMaskV, VL); 5434 if (IsOneBitMask) { 5435 SDValue VMSEQ = 5436 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT, 5437 {FPCLASS, TDCMaskV, DAG.getCondCode(ISD::SETEQ), 5438 DAG.getUNDEF(ContainerVT), Mask, VL}); 5439 return convertFromScalableVector(VT, VMSEQ, DAG, Subtarget); 5440 } 5441 SDValue AND = DAG.getNode(RISCVISD::AND_VL, DL, ContainerDstVT, FPCLASS, 5442 TDCMaskV, DAG.getUNDEF(ContainerDstVT), Mask, VL); 5443 5444 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT); 5445 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT, 5446 DAG.getUNDEF(ContainerDstVT), SplatZero, VL); 5447 5448 SDValue VMSNE = DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT, 5449 {AND, SplatZero, DAG.getCondCode(ISD::SETNE), 5450 DAG.getUNDEF(ContainerVT), Mask, VL}); 5451 return convertFromScalableVector(VT, VMSNE, DAG, Subtarget); 5452 } 5453 5454 SDValue FCLASS = DAG.getNode(RISCVISD::FCLASS, DL, XLenVT, Op.getOperand(0)); 5455 SDValue AND = DAG.getNode(ISD::AND, DL, XLenVT, FCLASS, TDCMaskV); 5456 SDValue Res = DAG.getSetCC(DL, XLenVT, AND, DAG.getConstant(0, DL, XLenVT), 5457 ISD::CondCode::SETNE); 5458 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res); 5459} 5460 5461// Lower fmaximum and fminimum. Unlike our fmax and fmin instructions, these 5462// operations propagate nans. 5463static SDValue lowerFMAXIMUM_FMINIMUM(SDValue Op, SelectionDAG &DAG, 5464 const RISCVSubtarget &Subtarget) { 5465 SDLoc DL(Op); 5466 MVT VT = Op.getSimpleValueType(); 5467 5468 SDValue X = Op.getOperand(0); 5469 SDValue Y = Op.getOperand(1); 5470 5471 if (!VT.isVector()) { 5472 MVT XLenVT = Subtarget.getXLenVT(); 5473 5474 // If X is a nan, replace Y with X. If Y is a nan, replace X with Y. This 5475 // ensures that when one input is a nan, the other will also be a nan 5476 // allowing the nan to propagate. If both inputs are nan, this will swap the 5477 // inputs which is harmless. 5478 5479 SDValue NewY = Y; 5480 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(X)) { 5481 SDValue XIsNonNan = DAG.getSetCC(DL, XLenVT, X, X, ISD::SETOEQ); 5482 NewY = DAG.getSelect(DL, VT, XIsNonNan, Y, X); 5483 } 5484 5485 SDValue NewX = X; 5486 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(Y)) { 5487 SDValue YIsNonNan = DAG.getSetCC(DL, XLenVT, Y, Y, ISD::SETOEQ); 5488 NewX = DAG.getSelect(DL, VT, YIsNonNan, X, Y); 5489 } 5490 5491 unsigned Opc = 5492 Op.getOpcode() == ISD::FMAXIMUM ? RISCVISD::FMAX : RISCVISD::FMIN; 5493 return DAG.getNode(Opc, DL, VT, NewX, NewY); 5494 } 5495 5496 // Check no NaNs before converting to fixed vector scalable. 5497 bool XIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(X); 5498 bool YIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(Y); 5499 5500 MVT ContainerVT = VT; 5501 if (VT.isFixedLengthVector()) { 5502 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); 5503 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget); 5504 Y = convertToScalableVector(ContainerVT, Y, DAG, Subtarget); 5505 } 5506 5507 SDValue Mask, VL; 5508 if (Op->isVPOpcode()) { 5509 Mask = Op.getOperand(2); 5510 if (VT.isFixedLengthVector()) 5511 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG, 5512 Subtarget); 5513 VL = Op.getOperand(3); 5514 } else { 5515 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 5516 } 5517 5518 SDValue NewY = Y; 5519 if (!XIsNeverNan) { 5520 SDValue XIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(), 5521 {X, X, DAG.getCondCode(ISD::SETOEQ), 5522 DAG.getUNDEF(ContainerVT), Mask, VL}); 5523 NewY = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, XIsNonNan, Y, X, 5524 DAG.getUNDEF(ContainerVT), VL); 5525 } 5526 5527 SDValue NewX = X; 5528 if (!YIsNeverNan) { 5529 SDValue YIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(), 5530 {Y, Y, DAG.getCondCode(ISD::SETOEQ), 5531 DAG.getUNDEF(ContainerVT), Mask, VL}); 5532 NewX = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, YIsNonNan, X, Y, 5533 DAG.getUNDEF(ContainerVT), VL); 5534 } 5535 5536 unsigned Opc = 5537 Op.getOpcode() == ISD::FMAXIMUM || Op->getOpcode() == ISD::VP_FMAXIMUM 5538 ? RISCVISD::VFMAX_VL 5539 : RISCVISD::VFMIN_VL; 5540 SDValue Res = DAG.getNode(Opc, DL, ContainerVT, NewX, NewY, 5541 DAG.getUNDEF(ContainerVT), Mask, VL); 5542 if (VT.isFixedLengthVector()) 5543 Res = convertFromScalableVector(VT, Res, DAG, Subtarget); 5544 return Res; 5545} 5546 5547/// Get a RISC-V target specified VL op for a given SDNode. 5548static unsigned getRISCVVLOp(SDValue Op) { 5549#define OP_CASE(NODE) \ 5550 case ISD::NODE: \ 5551 return RISCVISD::NODE##_VL; 5552#define VP_CASE(NODE) \ 5553 case ISD::VP_##NODE: \ 5554 return RISCVISD::NODE##_VL; 5555 // clang-format off 5556 switch (Op.getOpcode()) { 5557 default: 5558 llvm_unreachable("don't have RISC-V specified VL op for this SDNode"); 5559 OP_CASE(ADD) 5560 OP_CASE(SUB) 5561 OP_CASE(MUL) 5562 OP_CASE(MULHS) 5563 OP_CASE(MULHU) 5564 OP_CASE(SDIV) 5565 OP_CASE(SREM) 5566 OP_CASE(UDIV) 5567 OP_CASE(UREM) 5568 OP_CASE(SHL) 5569 OP_CASE(SRA) 5570 OP_CASE(SRL) 5571 OP_CASE(ROTL) 5572 OP_CASE(ROTR) 5573 OP_CASE(BSWAP) 5574 OP_CASE(CTTZ) 5575 OP_CASE(CTLZ) 5576 OP_CASE(CTPOP) 5577 OP_CASE(BITREVERSE) 5578 OP_CASE(SADDSAT) 5579 OP_CASE(UADDSAT) 5580 OP_CASE(SSUBSAT) 5581 OP_CASE(USUBSAT) 5582 OP_CASE(AVGFLOORU) 5583 OP_CASE(AVGCEILU) 5584 OP_CASE(FADD) 5585 OP_CASE(FSUB) 5586 OP_CASE(FMUL) 5587 OP_CASE(FDIV) 5588 OP_CASE(FNEG) 5589 OP_CASE(FABS) 5590 OP_CASE(FSQRT) 5591 OP_CASE(SMIN) 5592 OP_CASE(SMAX) 5593 OP_CASE(UMIN) 5594 OP_CASE(UMAX) 5595 OP_CASE(STRICT_FADD) 5596 OP_CASE(STRICT_FSUB) 5597 OP_CASE(STRICT_FMUL) 5598 OP_CASE(STRICT_FDIV) 5599 OP_CASE(STRICT_FSQRT) 5600 VP_CASE(ADD) // VP_ADD 5601 VP_CASE(SUB) // VP_SUB 5602 VP_CASE(MUL) // VP_MUL 5603 VP_CASE(SDIV) // VP_SDIV 5604 VP_CASE(SREM) // VP_SREM 5605 VP_CASE(UDIV) // VP_UDIV 5606 VP_CASE(UREM) // VP_UREM 5607 VP_CASE(SHL) // VP_SHL 5608 VP_CASE(FADD) // VP_FADD 5609 VP_CASE(FSUB) // VP_FSUB 5610 VP_CASE(FMUL) // VP_FMUL 5611 VP_CASE(FDIV) // VP_FDIV 5612 VP_CASE(FNEG) // VP_FNEG 5613 VP_CASE(FABS) // VP_FABS 5614 VP_CASE(SMIN) // VP_SMIN 5615 VP_CASE(SMAX) // VP_SMAX 5616 VP_CASE(UMIN) // VP_UMIN 5617 VP_CASE(UMAX) // VP_UMAX 5618 VP_CASE(FCOPYSIGN) // VP_FCOPYSIGN 5619 VP_CASE(SETCC) // VP_SETCC 5620 VP_CASE(SINT_TO_FP) // VP_SINT_TO_FP 5621 VP_CASE(UINT_TO_FP) // VP_UINT_TO_FP 5622 VP_CASE(BITREVERSE) // VP_BITREVERSE 5623 VP_CASE(BSWAP) // VP_BSWAP 5624 VP_CASE(CTLZ) // VP_CTLZ 5625 VP_CASE(CTTZ) // VP_CTTZ 5626 VP_CASE(CTPOP) // VP_CTPOP 5627 case ISD::CTLZ_ZERO_UNDEF: 5628 case ISD::VP_CTLZ_ZERO_UNDEF: 5629 return RISCVISD::CTLZ_VL; 5630 case ISD::CTTZ_ZERO_UNDEF: 5631 case ISD::VP_CTTZ_ZERO_UNDEF: 5632 return RISCVISD::CTTZ_VL; 5633 case ISD::FMA: 5634 case ISD::VP_FMA: 5635 return RISCVISD::VFMADD_VL; 5636 case ISD::STRICT_FMA: 5637 return RISCVISD::STRICT_VFMADD_VL; 5638 case ISD::AND: 5639 case ISD::VP_AND: 5640 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1) 5641 return RISCVISD::VMAND_VL; 5642 return RISCVISD::AND_VL; 5643 case ISD::OR: 5644 case ISD::VP_OR: 5645 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1) 5646 return RISCVISD::VMOR_VL; 5647 return RISCVISD::OR_VL; 5648 case ISD::XOR: 5649 case ISD::VP_XOR: 5650 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1) 5651 return RISCVISD::VMXOR_VL; 5652 return RISCVISD::XOR_VL; 5653 case ISD::VP_SELECT: 5654 case ISD::VP_MERGE: 5655 return RISCVISD::VMERGE_VL; 5656 case ISD::VP_ASHR: 5657 return RISCVISD::SRA_VL; 5658 case ISD::VP_LSHR: 5659 return RISCVISD::SRL_VL; 5660 case ISD::VP_SQRT: 5661 return RISCVISD::FSQRT_VL; 5662 case ISD::VP_SIGN_EXTEND: 5663 return RISCVISD::VSEXT_VL; 5664 case ISD::VP_ZERO_EXTEND: 5665 return RISCVISD::VZEXT_VL; 5666 case ISD::VP_FP_TO_SINT: 5667 return RISCVISD::VFCVT_RTZ_X_F_VL; 5668 case ISD::VP_FP_TO_UINT: 5669 return RISCVISD::VFCVT_RTZ_XU_F_VL; 5670 case ISD::FMINNUM: 5671 case ISD::VP_FMINNUM: 5672 return RISCVISD::VFMIN_VL; 5673 case ISD::FMAXNUM: 5674 case ISD::VP_FMAXNUM: 5675 return RISCVISD::VFMAX_VL; 5676 } 5677 // clang-format on 5678#undef OP_CASE 5679#undef VP_CASE 5680} 5681 5682/// Return true if a RISC-V target specified op has a merge operand. 5683static bool hasMergeOp(unsigned Opcode) { 5684 assert(Opcode > RISCVISD::FIRST_NUMBER && 5685 Opcode <= RISCVISD::LAST_RISCV_STRICTFP_OPCODE && 5686 "not a RISC-V target specific op"); 5687 static_assert(RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP == 5688 126 && 5689 RISCVISD::LAST_RISCV_STRICTFP_OPCODE - 5690 ISD::FIRST_TARGET_STRICTFP_OPCODE == 5691 21 && 5692 "adding target specific op should update this function"); 5693 if (Opcode >= RISCVISD::ADD_VL && Opcode <= RISCVISD::VFMAX_VL) 5694 return true; 5695 if (Opcode == RISCVISD::FCOPYSIGN_VL) 5696 return true; 5697 if (Opcode >= RISCVISD::VWMUL_VL && Opcode <= RISCVISD::VFWSUB_W_VL) 5698 return true; 5699 if (Opcode == RISCVISD::SETCC_VL) 5700 return true; 5701 if (Opcode >= RISCVISD::STRICT_FADD_VL && Opcode <= RISCVISD::STRICT_FDIV_VL) 5702 return true; 5703 if (Opcode == RISCVISD::VMERGE_VL) 5704 return true; 5705 return false; 5706} 5707 5708/// Return true if a RISC-V target specified op has a mask operand. 5709static bool hasMaskOp(unsigned Opcode) { 5710 assert(Opcode > RISCVISD::FIRST_NUMBER && 5711 Opcode <= RISCVISD::LAST_RISCV_STRICTFP_OPCODE && 5712 "not a RISC-V target specific op"); 5713 static_assert(RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP == 5714 126 && 5715 RISCVISD::LAST_RISCV_STRICTFP_OPCODE - 5716 ISD::FIRST_TARGET_STRICTFP_OPCODE == 5717 21 && 5718 "adding target specific op should update this function"); 5719 if (Opcode >= RISCVISD::TRUNCATE_VECTOR_VL && Opcode <= RISCVISD::SETCC_VL) 5720 return true; 5721 if (Opcode >= RISCVISD::VRGATHER_VX_VL && Opcode <= RISCVISD::VFIRST_VL) 5722 return true; 5723 if (Opcode >= RISCVISD::STRICT_FADD_VL && 5724 Opcode <= RISCVISD::STRICT_VFROUND_NOEXCEPT_VL) 5725 return true; 5726 return false; 5727} 5728 5729static SDValue SplitVectorOp(SDValue Op, SelectionDAG &DAG) { 5730 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType()); 5731 SDLoc DL(Op); 5732 5733 SmallVector<SDValue, 4> LoOperands(Op.getNumOperands()); 5734 SmallVector<SDValue, 4> HiOperands(Op.getNumOperands()); 5735 5736 for (unsigned j = 0; j != Op.getNumOperands(); ++j) { 5737 if (!Op.getOperand(j).getValueType().isVector()) { 5738 LoOperands[j] = Op.getOperand(j); 5739 HiOperands[j] = Op.getOperand(j); 5740 continue; 5741 } 5742 std::tie(LoOperands[j], HiOperands[j]) = 5743 DAG.SplitVector(Op.getOperand(j), DL); 5744 } 5745 5746 SDValue LoRes = 5747 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags()); 5748 SDValue HiRes = 5749 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags()); 5750 5751 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes); 5752} 5753 5754static SDValue SplitVPOp(SDValue Op, SelectionDAG &DAG) { 5755 assert(ISD::isVPOpcode(Op.getOpcode()) && "Not a VP op"); 5756 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType()); 5757 SDLoc DL(Op); 5758 5759 SmallVector<SDValue, 4> LoOperands(Op.getNumOperands()); 5760 SmallVector<SDValue, 4> HiOperands(Op.getNumOperands()); 5761 5762 for (unsigned j = 0; j != Op.getNumOperands(); ++j) { 5763 if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == j) { 5764 std::tie(LoOperands[j], HiOperands[j]) = 5765 DAG.SplitEVL(Op.getOperand(j), Op.getValueType(), DL); 5766 continue; 5767 } 5768 if (!Op.getOperand(j).getValueType().isVector()) { 5769 LoOperands[j] = Op.getOperand(j); 5770 HiOperands[j] = Op.getOperand(j); 5771 continue; 5772 } 5773 std::tie(LoOperands[j], HiOperands[j]) = 5774 DAG.SplitVector(Op.getOperand(j), DL); 5775 } 5776 5777 SDValue LoRes = 5778 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags()); 5779 SDValue HiRes = 5780 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags()); 5781 5782 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes); 5783} 5784 5785static SDValue SplitVectorReductionOp(SDValue Op, SelectionDAG &DAG) { 5786 SDLoc DL(Op); 5787 5788 auto [Lo, Hi] = DAG.SplitVector(Op.getOperand(1), DL); 5789 auto [MaskLo, MaskHi] = DAG.SplitVector(Op.getOperand(2), DL); 5790 auto [EVLLo, EVLHi] = 5791 DAG.SplitEVL(Op.getOperand(3), Op.getOperand(1).getValueType(), DL); 5792 5793 SDValue ResLo = 5794 DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), 5795 {Op.getOperand(0), Lo, MaskLo, EVLLo}, Op->getFlags()); 5796 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), 5797 {ResLo, Hi, MaskHi, EVLHi}, Op->getFlags()); 5798} 5799 5800static SDValue SplitStrictFPVectorOp(SDValue Op, SelectionDAG &DAG) { 5801 5802 assert(Op->isStrictFPOpcode()); 5803 5804 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op->getValueType(0)); 5805 5806 SDVTList LoVTs = DAG.getVTList(LoVT, Op->getValueType(1)); 5807 SDVTList HiVTs = DAG.getVTList(HiVT, Op->getValueType(1)); 5808 5809 SDLoc DL(Op); 5810 5811 SmallVector<SDValue, 4> LoOperands(Op.getNumOperands()); 5812 SmallVector<SDValue, 4> HiOperands(Op.getNumOperands()); 5813 5814 for (unsigned j = 0; j != Op.getNumOperands(); ++j) { 5815 if (!Op.getOperand(j).getValueType().isVector()) { 5816 LoOperands[j] = Op.getOperand(j); 5817 HiOperands[j] = Op.getOperand(j); 5818 continue; 5819 } 5820 std::tie(LoOperands[j], HiOperands[j]) = 5821 DAG.SplitVector(Op.getOperand(j), DL); 5822 } 5823 5824 SDValue LoRes = 5825 DAG.getNode(Op.getOpcode(), DL, LoVTs, LoOperands, Op->getFlags()); 5826 HiOperands[0] = LoRes.getValue(1); 5827 SDValue HiRes = 5828 DAG.getNode(Op.getOpcode(), DL, HiVTs, HiOperands, Op->getFlags()); 5829 5830 SDValue V = DAG.getNode(ISD::CONCAT_VECTORS, DL, Op->getValueType(0), 5831 LoRes.getValue(0), HiRes.getValue(0)); 5832 return DAG.getMergeValues({V, HiRes.getValue(1)}, DL); 5833} 5834 5835SDValue RISCVTargetLowering::LowerOperation(SDValue Op, 5836 SelectionDAG &DAG) const { 5837 switch (Op.getOpcode()) { 5838 default: 5839 report_fatal_error("unimplemented operand"); 5840 case ISD::ATOMIC_FENCE: 5841 return LowerATOMIC_FENCE(Op, DAG, Subtarget); 5842 case ISD::GlobalAddress: 5843 return lowerGlobalAddress(Op, DAG); 5844 case ISD::BlockAddress: 5845 return lowerBlockAddress(Op, DAG); 5846 case ISD::ConstantPool: 5847 return lowerConstantPool(Op, DAG); 5848 case ISD::JumpTable: 5849 return lowerJumpTable(Op, DAG); 5850 case ISD::GlobalTLSAddress: 5851 return lowerGlobalTLSAddress(Op, DAG); 5852 case ISD::Constant: 5853 return lowerConstant(Op, DAG, Subtarget); 5854 case ISD::SELECT: 5855 return lowerSELECT(Op, DAG); 5856 case ISD::BRCOND: 5857 return lowerBRCOND(Op, DAG); 5858 case ISD::VASTART: 5859 return lowerVASTART(Op, DAG); 5860 case ISD::FRAMEADDR: 5861 return lowerFRAMEADDR(Op, DAG); 5862 case ISD::RETURNADDR: 5863 return lowerRETURNADDR(Op, DAG); 5864 case ISD::SHL_PARTS: 5865 return lowerShiftLeftParts(Op, DAG); 5866 case ISD::SRA_PARTS: 5867 return lowerShiftRightParts(Op, DAG, true); 5868 case ISD::SRL_PARTS: 5869 return lowerShiftRightParts(Op, DAG, false); 5870 case ISD::ROTL: 5871 case ISD::ROTR: 5872 if (Op.getValueType().isFixedLengthVector()) { 5873 assert(Subtarget.hasStdExtZvkb()); 5874 return lowerToScalableOp(Op, DAG); 5875 } 5876 assert(Subtarget.hasVendorXTHeadBb() && 5877 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) && 5878 "Unexpected custom legalization"); 5879 // XTHeadBb only supports rotate by constant. 5880 if (!isa<ConstantSDNode>(Op.getOperand(1))) 5881 return SDValue(); 5882 return Op; 5883 case ISD::BITCAST: { 5884 SDLoc DL(Op); 5885 EVT VT = Op.getValueType(); 5886 SDValue Op0 = Op.getOperand(0); 5887 EVT Op0VT = Op0.getValueType(); 5888 MVT XLenVT = Subtarget.getXLenVT(); 5889 if (VT == MVT::f16 && Op0VT == MVT::i16 && 5890 Subtarget.hasStdExtZfhminOrZhinxmin()) { 5891 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0); 5892 SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0); 5893 return FPConv; 5894 } 5895 if (VT == MVT::bf16 && Op0VT == MVT::i16 && 5896 Subtarget.hasStdExtZfbfmin()) { 5897 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0); 5898 SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::bf16, NewOp0); 5899 return FPConv; 5900 } 5901 if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() && 5902 Subtarget.hasStdExtFOrZfinx()) { 5903 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0); 5904 SDValue FPConv = 5905 DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0); 5906 return FPConv; 5907 } 5908 if (VT == MVT::f64 && Op0VT == MVT::i64 && XLenVT == MVT::i32 && 5909 Subtarget.hasStdExtZfa()) { 5910 SDValue Lo, Hi; 5911 std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32); 5912 SDValue RetReg = 5913 DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi); 5914 return RetReg; 5915 } 5916 5917 // Consider other scalar<->scalar casts as legal if the types are legal. 5918 // Otherwise expand them. 5919 if (!VT.isVector() && !Op0VT.isVector()) { 5920 if (isTypeLegal(VT) && isTypeLegal(Op0VT)) 5921 return Op; 5922 return SDValue(); 5923 } 5924 5925 assert(!VT.isScalableVector() && !Op0VT.isScalableVector() && 5926 "Unexpected types"); 5927 5928 if (VT.isFixedLengthVector()) { 5929 // We can handle fixed length vector bitcasts with a simple replacement 5930 // in isel. 5931 if (Op0VT.isFixedLengthVector()) 5932 return Op; 5933 // When bitcasting from scalar to fixed-length vector, insert the scalar 5934 // into a one-element vector of the result type, and perform a vector 5935 // bitcast. 5936 if (!Op0VT.isVector()) { 5937 EVT BVT = EVT::getVectorVT(*DAG.getContext(), Op0VT, 1); 5938 if (!isTypeLegal(BVT)) 5939 return SDValue(); 5940 return DAG.getBitcast(VT, DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, BVT, 5941 DAG.getUNDEF(BVT), Op0, 5942 DAG.getConstant(0, DL, XLenVT))); 5943 } 5944 return SDValue(); 5945 } 5946 // Custom-legalize bitcasts from fixed-length vector types to scalar types 5947 // thus: bitcast the vector to a one-element vector type whose element type 5948 // is the same as the result type, and extract the first element. 5949 if (!VT.isVector() && Op0VT.isFixedLengthVector()) { 5950 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1); 5951 if (!isTypeLegal(BVT)) 5952 return SDValue(); 5953 SDValue BVec = DAG.getBitcast(BVT, Op0); 5954 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec, 5955 DAG.getConstant(0, DL, XLenVT)); 5956 } 5957 return SDValue(); 5958 } 5959 case ISD::INTRINSIC_WO_CHAIN: 5960 return LowerINTRINSIC_WO_CHAIN(Op, DAG); 5961 case ISD::INTRINSIC_W_CHAIN: 5962 return LowerINTRINSIC_W_CHAIN(Op, DAG); 5963 case ISD::INTRINSIC_VOID: 5964 return LowerINTRINSIC_VOID(Op, DAG); 5965 case ISD::IS_FPCLASS: 5966 return LowerIS_FPCLASS(Op, DAG); 5967 case ISD::BITREVERSE: { 5968 MVT VT = Op.getSimpleValueType(); 5969 if (VT.isFixedLengthVector()) { 5970 assert(Subtarget.hasStdExtZvbb()); 5971 return lowerToScalableOp(Op, DAG); 5972 } 5973 SDLoc DL(Op); 5974 assert(Subtarget.hasStdExtZbkb() && "Unexpected custom legalization"); 5975 assert(Op.getOpcode() == ISD::BITREVERSE && "Unexpected opcode"); 5976 // Expand bitreverse to a bswap(rev8) followed by brev8. 5977 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Op.getOperand(0)); 5978 return DAG.getNode(RISCVISD::BREV8, DL, VT, BSwap); 5979 } 5980 case ISD::TRUNCATE: 5981 // Only custom-lower vector truncates 5982 if (!Op.getSimpleValueType().isVector()) 5983 return Op; 5984 return lowerVectorTruncLike(Op, DAG); 5985 case ISD::ANY_EXTEND: 5986 case ISD::ZERO_EXTEND: 5987 if (Op.getOperand(0).getValueType().isVector() && 5988 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1) 5989 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1); 5990 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VZEXT_VL); 5991 case ISD::SIGN_EXTEND: 5992 if (Op.getOperand(0).getValueType().isVector() && 5993 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1) 5994 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1); 5995 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VSEXT_VL); 5996 case ISD::SPLAT_VECTOR_PARTS: 5997 return lowerSPLAT_VECTOR_PARTS(Op, DAG); 5998 case ISD::INSERT_VECTOR_ELT: 5999 return lowerINSERT_VECTOR_ELT(Op, DAG); 6000 case ISD::EXTRACT_VECTOR_ELT: 6001 return lowerEXTRACT_VECTOR_ELT(Op, DAG); 6002 case ISD::SCALAR_TO_VECTOR: { 6003 MVT VT = Op.getSimpleValueType(); 6004 SDLoc DL(Op); 6005 SDValue Scalar = Op.getOperand(0); 6006 if (VT.getVectorElementType() == MVT::i1) { 6007 MVT WideVT = VT.changeVectorElementType(MVT::i8); 6008 SDValue V = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, WideVT, Scalar); 6009 return DAG.getNode(ISD::TRUNCATE, DL, VT, V); 6010 } 6011 MVT ContainerVT = VT; 6012 if (VT.isFixedLengthVector()) 6013 ContainerVT = getContainerForFixedLengthVector(VT); 6014 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second; 6015 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Scalar); 6016 SDValue V = DAG.getNode(RISCVISD::VMV_S_X_VL, DL, ContainerVT, 6017 DAG.getUNDEF(ContainerVT), Scalar, VL); 6018 if (VT.isFixedLengthVector()) 6019 V = convertFromScalableVector(VT, V, DAG, Subtarget); 6020 return V; 6021 } 6022 case ISD::VSCALE: { 6023 MVT XLenVT = Subtarget.getXLenVT(); 6024 MVT VT = Op.getSimpleValueType(); 6025 SDLoc DL(Op); 6026 SDValue Res = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT); 6027 // We define our scalable vector types for lmul=1 to use a 64 bit known 6028 // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate 6029 // vscale as VLENB / 8. 6030 static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!"); 6031 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock) 6032 report_fatal_error("Support for VLEN==32 is incomplete."); 6033 // We assume VLENB is a multiple of 8. We manually choose the best shift 6034 // here because SimplifyDemandedBits isn't always able to simplify it. 6035 uint64_t Val = Op.getConstantOperandVal(0); 6036 if (isPowerOf2_64(Val)) { 6037 uint64_t Log2 = Log2_64(Val); 6038 if (Log2 < 3) 6039 Res = DAG.getNode(ISD::SRL, DL, XLenVT, Res, 6040 DAG.getConstant(3 - Log2, DL, VT)); 6041 else if (Log2 > 3) 6042 Res = DAG.getNode(ISD::SHL, DL, XLenVT, Res, 6043 DAG.getConstant(Log2 - 3, DL, XLenVT)); 6044 } else if ((Val % 8) == 0) { 6045 // If the multiplier is a multiple of 8, scale it down to avoid needing 6046 // to shift the VLENB value. 6047 Res = DAG.getNode(ISD::MUL, DL, XLenVT, Res, 6048 DAG.getConstant(Val / 8, DL, XLenVT)); 6049 } else { 6050 SDValue VScale = DAG.getNode(ISD::SRL, DL, XLenVT, Res, 6051 DAG.getConstant(3, DL, XLenVT)); 6052 Res = DAG.getNode(ISD::MUL, DL, XLenVT, VScale, 6053 DAG.getConstant(Val, DL, XLenVT)); 6054 } 6055 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res); 6056 } 6057 case ISD::FPOWI: { 6058 // Custom promote f16 powi with illegal i32 integer type on RV64. Once 6059 // promoted this will be legalized into a libcall by LegalizeIntegerTypes. 6060 if (Op.getValueType() == MVT::f16 && Subtarget.is64Bit() && 6061 Op.getOperand(1).getValueType() == MVT::i32) { 6062 SDLoc DL(Op); 6063 SDValue Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0)); 6064 SDValue Powi = 6065 DAG.getNode(ISD::FPOWI, DL, MVT::f32, Op0, Op.getOperand(1)); 6066 return DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, Powi, 6067 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true)); 6068 } 6069 return SDValue(); 6070 } 6071 case ISD::FMAXIMUM: 6072 case ISD::FMINIMUM: 6073 if (Op.getValueType() == MVT::nxv32f16 && 6074 (Subtarget.hasVInstructionsF16Minimal() && 6075 !Subtarget.hasVInstructionsF16())) 6076 return SplitVectorOp(Op, DAG); 6077 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget); 6078 case ISD::FP_EXTEND: { 6079 SDLoc DL(Op); 6080 EVT VT = Op.getValueType(); 6081 SDValue Op0 = Op.getOperand(0); 6082 EVT Op0VT = Op0.getValueType(); 6083 if (VT == MVT::f32 && Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) 6084 return DAG.getNode(RISCVISD::FP_EXTEND_BF16, DL, MVT::f32, Op0); 6085 if (VT == MVT::f64 && Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) { 6086 SDValue FloatVal = 6087 DAG.getNode(RISCVISD::FP_EXTEND_BF16, DL, MVT::f32, Op0); 6088 return DAG.getNode(ISD::FP_EXTEND, DL, MVT::f64, FloatVal); 6089 } 6090 6091 if (!Op.getValueType().isVector()) 6092 return Op; 6093 return lowerVectorFPExtendOrRoundLike(Op, DAG); 6094 } 6095 case ISD::FP_ROUND: { 6096 SDLoc DL(Op); 6097 EVT VT = Op.getValueType(); 6098 SDValue Op0 = Op.getOperand(0); 6099 EVT Op0VT = Op0.getValueType(); 6100 if (VT == MVT::bf16 && Op0VT == MVT::f32 && Subtarget.hasStdExtZfbfmin()) 6101 return DAG.getNode(RISCVISD::FP_ROUND_BF16, DL, MVT::bf16, Op0); 6102 if (VT == MVT::bf16 && Op0VT == MVT::f64 && Subtarget.hasStdExtZfbfmin() && 6103 Subtarget.hasStdExtDOrZdinx()) { 6104 SDValue FloatVal = 6105 DAG.getNode(ISD::FP_ROUND, DL, MVT::f32, Op0, 6106 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true)); 6107 return DAG.getNode(RISCVISD::FP_ROUND_BF16, DL, MVT::bf16, FloatVal); 6108 } 6109 6110 if (!Op.getValueType().isVector()) 6111 return Op; 6112 return lowerVectorFPExtendOrRoundLike(Op, DAG); 6113 } 6114 case ISD::STRICT_FP_ROUND: 6115 case ISD::STRICT_FP_EXTEND: 6116 return lowerStrictFPExtendOrRoundLike(Op, DAG); 6117 case ISD::SINT_TO_FP: 6118 case ISD::UINT_TO_FP: 6119 if (Op.getValueType().isVector() && 6120 Op.getValueType().getScalarType() == MVT::f16 && 6121 (Subtarget.hasVInstructionsF16Minimal() && 6122 !Subtarget.hasVInstructionsF16())) { 6123 if (Op.getValueType() == MVT::nxv32f16) 6124 return SplitVectorOp(Op, DAG); 6125 // int -> f32 6126 SDLoc DL(Op); 6127 MVT NVT = 6128 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount()); 6129 SDValue NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops()); 6130 // f32 -> f16 6131 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC, 6132 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true)); 6133 } 6134 [[fallthrough]]; 6135 case ISD::FP_TO_SINT: 6136 case ISD::FP_TO_UINT: 6137 if (SDValue Op1 = Op.getOperand(0); 6138 Op1.getValueType().isVector() && 6139 Op1.getValueType().getScalarType() == MVT::f16 && 6140 (Subtarget.hasVInstructionsF16Minimal() && 6141 !Subtarget.hasVInstructionsF16())) { 6142 if (Op1.getValueType() == MVT::nxv32f16) 6143 return SplitVectorOp(Op, DAG); 6144 // f16 -> f32 6145 SDLoc DL(Op); 6146 MVT NVT = MVT::getVectorVT(MVT::f32, 6147 Op1.getValueType().getVectorElementCount()); 6148 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1); 6149 // f32 -> int 6150 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), WidenVec); 6151 } 6152 [[fallthrough]]; 6153 case ISD::STRICT_FP_TO_SINT: 6154 case ISD::STRICT_FP_TO_UINT: 6155 case ISD::STRICT_SINT_TO_FP: 6156 case ISD::STRICT_UINT_TO_FP: { 6157 // RVV can only do fp<->int conversions to types half/double the size as 6158 // the source. We custom-lower any conversions that do two hops into 6159 // sequences. 6160 MVT VT = Op.getSimpleValueType(); 6161 if (!VT.isVector()) 6162 return Op; 6163 SDLoc DL(Op); 6164 bool IsStrict = Op->isStrictFPOpcode(); 6165 SDValue Src = Op.getOperand(0 + IsStrict); 6166 MVT EltVT = VT.getVectorElementType(); 6167 MVT SrcVT = Src.getSimpleValueType(); 6168 MVT SrcEltVT = SrcVT.getVectorElementType(); 6169 unsigned EltSize = EltVT.getSizeInBits(); 6170 unsigned SrcEltSize = SrcEltVT.getSizeInBits(); 6171 assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) && 6172 "Unexpected vector element types"); 6173 6174 bool IsInt2FP = SrcEltVT.isInteger(); 6175 // Widening conversions 6176 if (EltSize > (2 * SrcEltSize)) { 6177 if (IsInt2FP) { 6178 // Do a regular integer sign/zero extension then convert to float. 6179 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize / 2), 6180 VT.getVectorElementCount()); 6181 unsigned ExtOpcode = (Op.getOpcode() == ISD::UINT_TO_FP || 6182 Op.getOpcode() == ISD::STRICT_UINT_TO_FP) 6183 ? ISD::ZERO_EXTEND 6184 : ISD::SIGN_EXTEND; 6185 SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src); 6186 if (IsStrict) 6187 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(), 6188 Op.getOperand(0), Ext); 6189 return DAG.getNode(Op.getOpcode(), DL, VT, Ext); 6190 } 6191 // FP2Int 6192 assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering"); 6193 // Do one doubling fp_extend then complete the operation by converting 6194 // to int. 6195 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); 6196 if (IsStrict) { 6197 auto [FExt, Chain] = 6198 DAG.getStrictFPExtendOrRound(Src, Op.getOperand(0), DL, InterimFVT); 6199 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(), Chain, FExt); 6200 } 6201 SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT); 6202 return DAG.getNode(Op.getOpcode(), DL, VT, FExt); 6203 } 6204 6205 // Narrowing conversions 6206 if (SrcEltSize > (2 * EltSize)) { 6207 if (IsInt2FP) { 6208 // One narrowing int_to_fp, then an fp_round. 6209 assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering"); 6210 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); 6211 if (IsStrict) { 6212 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, 6213 DAG.getVTList(InterimFVT, MVT::Other), 6214 Op.getOperand(0), Src); 6215 SDValue Chain = Int2FP.getValue(1); 6216 return DAG.getStrictFPExtendOrRound(Int2FP, Chain, DL, VT).first; 6217 } 6218 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src); 6219 return DAG.getFPExtendOrRound(Int2FP, DL, VT); 6220 } 6221 // FP2Int 6222 // One narrowing fp_to_int, then truncate the integer. If the float isn't 6223 // representable by the integer, the result is poison. 6224 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2), 6225 VT.getVectorElementCount()); 6226 if (IsStrict) { 6227 SDValue FP2Int = 6228 DAG.getNode(Op.getOpcode(), DL, DAG.getVTList(IVecVT, MVT::Other), 6229 Op.getOperand(0), Src); 6230 SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int); 6231 return DAG.getMergeValues({Res, FP2Int.getValue(1)}, DL); 6232 } 6233 SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src); 6234 return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int); 6235 } 6236 6237 // Scalable vectors can exit here. Patterns will handle equally-sized 6238 // conversions halving/doubling ones. 6239 if (!VT.isFixedLengthVector()) 6240 return Op; 6241 6242 // For fixed-length vectors we lower to a custom "VL" node. 6243 unsigned RVVOpc = 0; 6244 switch (Op.getOpcode()) { 6245 default: 6246 llvm_unreachable("Impossible opcode"); 6247 case ISD::FP_TO_SINT: 6248 RVVOpc = RISCVISD::VFCVT_RTZ_X_F_VL; 6249 break; 6250 case ISD::FP_TO_UINT: 6251 RVVOpc = RISCVISD::VFCVT_RTZ_XU_F_VL; 6252 break; 6253 case ISD::SINT_TO_FP: 6254 RVVOpc = RISCVISD::SINT_TO_FP_VL; 6255 break; 6256 case ISD::UINT_TO_FP: 6257 RVVOpc = RISCVISD::UINT_TO_FP_VL; 6258 break; 6259 case ISD::STRICT_FP_TO_SINT: 6260 RVVOpc = RISCVISD::STRICT_VFCVT_RTZ_X_F_VL; 6261 break; 6262 case ISD::STRICT_FP_TO_UINT: 6263 RVVOpc = RISCVISD::STRICT_VFCVT_RTZ_XU_F_VL; 6264 break; 6265 case ISD::STRICT_SINT_TO_FP: 6266 RVVOpc = RISCVISD::STRICT_SINT_TO_FP_VL; 6267 break; 6268 case ISD::STRICT_UINT_TO_FP: 6269 RVVOpc = RISCVISD::STRICT_UINT_TO_FP_VL; 6270 break; 6271 } 6272 6273 MVT ContainerVT = getContainerForFixedLengthVector(VT); 6274 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT); 6275 assert(ContainerVT.getVectorElementCount() == SrcContainerVT.getVectorElementCount() && 6276 "Expected same element count"); 6277 6278 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 6279 6280 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget); 6281 if (IsStrict) { 6282 Src = DAG.getNode(RVVOpc, DL, DAG.getVTList(ContainerVT, MVT::Other), 6283 Op.getOperand(0), Src, Mask, VL); 6284 SDValue SubVec = convertFromScalableVector(VT, Src, DAG, Subtarget); 6285 return DAG.getMergeValues({SubVec, Src.getValue(1)}, DL); 6286 } 6287 Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL); 6288 return convertFromScalableVector(VT, Src, DAG, Subtarget); 6289 } 6290 case ISD::FP_TO_SINT_SAT: 6291 case ISD::FP_TO_UINT_SAT: 6292 return lowerFP_TO_INT_SAT(Op, DAG, Subtarget); 6293 case ISD::FP_TO_BF16: { 6294 // Custom lower to ensure the libcall return is passed in an FPR on hard 6295 // float ABIs. 6296 assert(!Subtarget.isSoftFPABI() && "Unexpected custom legalization"); 6297 SDLoc DL(Op); 6298 MakeLibCallOptions CallOptions; 6299 RTLIB::Libcall LC = 6300 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16); 6301 SDValue Res = 6302 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first; 6303 if (Subtarget.is64Bit() && !RV64LegalI32) 6304 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res); 6305 return DAG.getBitcast(MVT::i32, Res); 6306 } 6307 case ISD::BF16_TO_FP: { 6308 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalization"); 6309 MVT VT = Op.getSimpleValueType(); 6310 SDLoc DL(Op); 6311 Op = DAG.getNode( 6312 ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0), 6313 DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL)); 6314 SDValue Res = Subtarget.is64Bit() 6315 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Op) 6316 : DAG.getBitcast(MVT::f32, Op); 6317 // fp_extend if the target VT is bigger than f32. 6318 if (VT != MVT::f32) 6319 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res); 6320 return Res; 6321 } 6322 case ISD::FP_TO_FP16: { 6323 // Custom lower to ensure the libcall return is passed in an FPR on hard 6324 // float ABIs. 6325 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation"); 6326 SDLoc DL(Op); 6327 MakeLibCallOptions CallOptions; 6328 RTLIB::Libcall LC = 6329 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::f16); 6330 SDValue Res = 6331 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first; 6332 if (Subtarget.is64Bit() && !RV64LegalI32) 6333 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res); 6334 return DAG.getBitcast(MVT::i32, Res); 6335 } 6336 case ISD::FP16_TO_FP: { 6337 // Custom lower to ensure the libcall argument is passed in an FPR on hard 6338 // float ABIs. 6339 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation"); 6340 SDLoc DL(Op); 6341 MakeLibCallOptions CallOptions; 6342 SDValue Arg = Subtarget.is64Bit() 6343 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, 6344 Op.getOperand(0)) 6345 : DAG.getBitcast(MVT::f32, Op.getOperand(0)); 6346 SDValue Res = 6347 makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg, CallOptions, DL) 6348 .first; 6349 return Res; 6350 } 6351 case ISD::FTRUNC: 6352 case ISD::FCEIL: 6353 case ISD::FFLOOR: 6354 case ISD::FNEARBYINT: 6355 case ISD::FRINT: 6356 case ISD::FROUND: 6357 case ISD::FROUNDEVEN: 6358 return lowerFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget); 6359 case ISD::LRINT: 6360 case ISD::LLRINT: 6361 return lowerVectorXRINT(Op, DAG, Subtarget); 6362 case ISD::VECREDUCE_ADD: 6363 case ISD::VECREDUCE_UMAX: 6364 case ISD::VECREDUCE_SMAX: 6365 case ISD::VECREDUCE_UMIN: 6366 case ISD::VECREDUCE_SMIN: 6367 return lowerVECREDUCE(Op, DAG); 6368 case ISD::VECREDUCE_AND: 6369 case ISD::VECREDUCE_OR: 6370 case ISD::VECREDUCE_XOR: 6371 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1) 6372 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ false); 6373 return lowerVECREDUCE(Op, DAG); 6374 case ISD::VECREDUCE_FADD: 6375 case ISD::VECREDUCE_SEQ_FADD: 6376 case ISD::VECREDUCE_FMIN: 6377 case ISD::VECREDUCE_FMAX: 6378 return lowerFPVECREDUCE(Op, DAG); 6379 case ISD::VP_REDUCE_ADD: 6380 case ISD::VP_REDUCE_UMAX: 6381 case ISD::VP_REDUCE_SMAX: 6382 case ISD::VP_REDUCE_UMIN: 6383 case ISD::VP_REDUCE_SMIN: 6384 case ISD::VP_REDUCE_FADD: 6385 case ISD::VP_REDUCE_SEQ_FADD: 6386 case ISD::VP_REDUCE_FMIN: 6387 case ISD::VP_REDUCE_FMAX: 6388 if (Op.getOperand(1).getValueType() == MVT::nxv32f16 && 6389 (Subtarget.hasVInstructionsF16Minimal() && 6390 !Subtarget.hasVInstructionsF16())) 6391 return SplitVectorReductionOp(Op, DAG); 6392 return lowerVPREDUCE(Op, DAG); 6393 case ISD::VP_REDUCE_AND: 6394 case ISD::VP_REDUCE_OR: 6395 case ISD::VP_REDUCE_XOR: 6396 if (Op.getOperand(1).getValueType().getVectorElementType() == MVT::i1) 6397 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ true); 6398 return lowerVPREDUCE(Op, DAG); 6399 case ISD::UNDEF: { 6400 MVT ContainerVT = getContainerForFixedLengthVector(Op.getSimpleValueType()); 6401 return convertFromScalableVector(Op.getSimpleValueType(), 6402 DAG.getUNDEF(ContainerVT), DAG, Subtarget); 6403 } 6404 case ISD::INSERT_SUBVECTOR: 6405 return lowerINSERT_SUBVECTOR(Op, DAG); 6406 case ISD::EXTRACT_SUBVECTOR: 6407 return lowerEXTRACT_SUBVECTOR(Op, DAG); 6408 case ISD::VECTOR_DEINTERLEAVE: 6409 return lowerVECTOR_DEINTERLEAVE(Op, DAG); 6410 case ISD::VECTOR_INTERLEAVE: 6411 return lowerVECTOR_INTERLEAVE(Op, DAG); 6412 case ISD::STEP_VECTOR: 6413 return lowerSTEP_VECTOR(Op, DAG); 6414 case ISD::VECTOR_REVERSE: 6415 return lowerVECTOR_REVERSE(Op, DAG); 6416 case ISD::VECTOR_SPLICE: 6417 return lowerVECTOR_SPLICE(Op, DAG); 6418 case ISD::BUILD_VECTOR: 6419 return lowerBUILD_VECTOR(Op, DAG, Subtarget); 6420 case ISD::SPLAT_VECTOR: 6421 if (Op.getValueType().getScalarType() == MVT::f16 && 6422 (Subtarget.hasVInstructionsF16Minimal() && 6423 !Subtarget.hasVInstructionsF16())) { 6424 if (Op.getValueType() == MVT::nxv32f16) 6425 return SplitVectorOp(Op, DAG); 6426 SDLoc DL(Op); 6427 SDValue NewScalar = 6428 DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0)); 6429 SDValue NewSplat = DAG.getNode( 6430 ISD::SPLAT_VECTOR, DL, 6431 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount()), 6432 NewScalar); 6433 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NewSplat, 6434 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true)); 6435 } 6436 if (Op.getValueType().getVectorElementType() == MVT::i1) 6437 return lowerVectorMaskSplat(Op, DAG); 6438 return SDValue(); 6439 case ISD::VECTOR_SHUFFLE: 6440 return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget); 6441 case ISD::CONCAT_VECTORS: { 6442 // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is 6443 // better than going through the stack, as the default expansion does. 6444 SDLoc DL(Op); 6445 MVT VT = Op.getSimpleValueType(); 6446 unsigned NumOpElts = 6447 Op.getOperand(0).getSimpleValueType().getVectorMinNumElements(); 6448 SDValue Vec = DAG.getUNDEF(VT); 6449 for (const auto &OpIdx : enumerate(Op->ops())) { 6450 SDValue SubVec = OpIdx.value(); 6451 // Don't insert undef subvectors. 6452 if (SubVec.isUndef()) 6453 continue; 6454 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Vec, SubVec, 6455 DAG.getIntPtrConstant(OpIdx.index() * NumOpElts, DL)); 6456 } 6457 return Vec; 6458 } 6459 case ISD::LOAD: 6460 if (auto V = expandUnalignedRVVLoad(Op, DAG)) 6461 return V; 6462 if (Op.getValueType().isFixedLengthVector()) 6463 return lowerFixedLengthVectorLoadToRVV(Op, DAG); 6464 return Op; 6465 case ISD::STORE: 6466 if (auto V = expandUnalignedRVVStore(Op, DAG)) 6467 return V; 6468 if (Op.getOperand(1).getValueType().isFixedLengthVector()) 6469 return lowerFixedLengthVectorStoreToRVV(Op, DAG); 6470 return Op; 6471 case ISD::MLOAD: 6472 case ISD::VP_LOAD: 6473 return lowerMaskedLoad(Op, DAG); 6474 case ISD::MSTORE: 6475 case ISD::VP_STORE: 6476 return lowerMaskedStore(Op, DAG); 6477 case ISD::SELECT_CC: { 6478 // This occurs because we custom legalize SETGT and SETUGT for setcc. That 6479 // causes LegalizeDAG to think we need to custom legalize select_cc. Expand 6480 // into separate SETCC+SELECT just like LegalizeDAG. 6481 SDValue Tmp1 = Op.getOperand(0); 6482 SDValue Tmp2 = Op.getOperand(1); 6483 SDValue True = Op.getOperand(2); 6484 SDValue False = Op.getOperand(3); 6485 EVT VT = Op.getValueType(); 6486 SDValue CC = Op.getOperand(4); 6487 EVT CmpVT = Tmp1.getValueType(); 6488 EVT CCVT = 6489 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT); 6490 SDLoc DL(Op); 6491 SDValue Cond = 6492 DAG.getNode(ISD::SETCC, DL, CCVT, Tmp1, Tmp2, CC, Op->getFlags()); 6493 return DAG.getSelect(DL, VT, Cond, True, False); 6494 } 6495 case ISD::SETCC: { 6496 MVT OpVT = Op.getOperand(0).getSimpleValueType(); 6497 if (OpVT.isScalarInteger()) { 6498 MVT VT = Op.getSimpleValueType(); 6499 SDValue LHS = Op.getOperand(0); 6500 SDValue RHS = Op.getOperand(1); 6501 ISD::CondCode CCVal = cast<CondCodeSDNode>(Op.getOperand(2))->get(); 6502 assert((CCVal == ISD::SETGT || CCVal == ISD::SETUGT) && 6503 "Unexpected CondCode"); 6504 6505 SDLoc DL(Op); 6506 6507 // If the RHS is a constant in the range [-2049, 0) or (0, 2046], we can 6508 // convert this to the equivalent of (set(u)ge X, C+1) by using 6509 // (xori (slti(u) X, C+1), 1). This avoids materializing a small constant 6510 // in a register. 6511 if (isa<ConstantSDNode>(RHS)) { 6512 int64_t Imm = cast<ConstantSDNode>(RHS)->getSExtValue(); 6513 if (Imm != 0 && isInt<12>((uint64_t)Imm + 1)) { 6514 // If this is an unsigned compare and the constant is -1, incrementing 6515 // the constant would change behavior. The result should be false. 6516 if (CCVal == ISD::SETUGT && Imm == -1) 6517 return DAG.getConstant(0, DL, VT); 6518 // Using getSetCCSwappedOperands will convert SET(U)GT->SET(U)LT. 6519 CCVal = ISD::getSetCCSwappedOperands(CCVal); 6520 SDValue SetCC = DAG.getSetCC( 6521 DL, VT, LHS, DAG.getConstant(Imm + 1, DL, OpVT), CCVal); 6522 return DAG.getLogicalNOT(DL, SetCC, VT); 6523 } 6524 } 6525 6526 // Not a constant we could handle, swap the operands and condition code to 6527 // SETLT/SETULT. 6528 CCVal = ISD::getSetCCSwappedOperands(CCVal); 6529 return DAG.getSetCC(DL, VT, RHS, LHS, CCVal); 6530 } 6531 6532 if (Op.getOperand(0).getSimpleValueType() == MVT::nxv32f16 && 6533 (Subtarget.hasVInstructionsF16Minimal() && 6534 !Subtarget.hasVInstructionsF16())) 6535 return SplitVectorOp(Op, DAG); 6536 6537 return lowerFixedLengthVectorSetccToRVV(Op, DAG); 6538 } 6539 case ISD::ADD: 6540 case ISD::SUB: 6541 case ISD::MUL: 6542 case ISD::MULHS: 6543 case ISD::MULHU: 6544 case ISD::AND: 6545 case ISD::OR: 6546 case ISD::XOR: 6547 case ISD::SDIV: 6548 case ISD::SREM: 6549 case ISD::UDIV: 6550 case ISD::UREM: 6551 case ISD::BSWAP: 6552 case ISD::CTPOP: 6553 return lowerToScalableOp(Op, DAG); 6554 case ISD::SHL: 6555 case ISD::SRA: 6556 case ISD::SRL: 6557 if (Op.getSimpleValueType().isFixedLengthVector()) 6558 return lowerToScalableOp(Op, DAG); 6559 // This can be called for an i32 shift amount that needs to be promoted. 6560 assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() && 6561 "Unexpected custom legalisation"); 6562 return SDValue(); 6563 case ISD::FADD: 6564 case ISD::FSUB: 6565 case ISD::FMUL: 6566 case ISD::FDIV: 6567 case ISD::FNEG: 6568 case ISD::FABS: 6569 case ISD::FSQRT: 6570 case ISD::FMA: 6571 case ISD::FMINNUM: 6572 case ISD::FMAXNUM: 6573 if (Op.getValueType() == MVT::nxv32f16 && 6574 (Subtarget.hasVInstructionsF16Minimal() && 6575 !Subtarget.hasVInstructionsF16())) 6576 return SplitVectorOp(Op, DAG); 6577 [[fallthrough]]; 6578 case ISD::AVGFLOORU: 6579 case ISD::AVGCEILU: 6580 case ISD::SADDSAT: 6581 case ISD::UADDSAT: 6582 case ISD::SSUBSAT: 6583 case ISD::USUBSAT: 6584 case ISD::SMIN: 6585 case ISD::SMAX: 6586 case ISD::UMIN: 6587 case ISD::UMAX: 6588 return lowerToScalableOp(Op, DAG); 6589 case ISD::ABS: 6590 case ISD::VP_ABS: 6591 return lowerABS(Op, DAG); 6592 case ISD::CTLZ: 6593 case ISD::CTLZ_ZERO_UNDEF: 6594 case ISD::CTTZ: 6595 case ISD::CTTZ_ZERO_UNDEF: 6596 if (Subtarget.hasStdExtZvbb()) 6597 return lowerToScalableOp(Op, DAG); 6598 assert(Op.getOpcode() != ISD::CTTZ); 6599 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG); 6600 case ISD::VSELECT: 6601 return lowerFixedLengthVectorSelectToRVV(Op, DAG); 6602 case ISD::FCOPYSIGN: 6603 if (Op.getValueType() == MVT::nxv32f16 && 6604 (Subtarget.hasVInstructionsF16Minimal() && 6605 !Subtarget.hasVInstructionsF16())) 6606 return SplitVectorOp(Op, DAG); 6607 return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG); 6608 case ISD::STRICT_FADD: 6609 case ISD::STRICT_FSUB: 6610 case ISD::STRICT_FMUL: 6611 case ISD::STRICT_FDIV: 6612 case ISD::STRICT_FSQRT: 6613 case ISD::STRICT_FMA: 6614 if (Op.getValueType() == MVT::nxv32f16 && 6615 (Subtarget.hasVInstructionsF16Minimal() && 6616 !Subtarget.hasVInstructionsF16())) 6617 return SplitStrictFPVectorOp(Op, DAG); 6618 return lowerToScalableOp(Op, DAG); 6619 case ISD::STRICT_FSETCC: 6620 case ISD::STRICT_FSETCCS: 6621 return lowerVectorStrictFSetcc(Op, DAG); 6622 case ISD::STRICT_FCEIL: 6623 case ISD::STRICT_FRINT: 6624 case ISD::STRICT_FFLOOR: 6625 case ISD::STRICT_FTRUNC: 6626 case ISD::STRICT_FNEARBYINT: 6627 case ISD::STRICT_FROUND: 6628 case ISD::STRICT_FROUNDEVEN: 6629 return lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget); 6630 case ISD::MGATHER: 6631 case ISD::VP_GATHER: 6632 return lowerMaskedGather(Op, DAG); 6633 case ISD::MSCATTER: 6634 case ISD::VP_SCATTER: 6635 return lowerMaskedScatter(Op, DAG); 6636 case ISD::GET_ROUNDING: 6637 return lowerGET_ROUNDING(Op, DAG); 6638 case ISD::SET_ROUNDING: 6639 return lowerSET_ROUNDING(Op, DAG); 6640 case ISD::EH_DWARF_CFA: 6641 return lowerEH_DWARF_CFA(Op, DAG); 6642 case ISD::VP_SELECT: 6643 case ISD::VP_MERGE: 6644 case ISD::VP_ADD: 6645 case ISD::VP_SUB: 6646 case ISD::VP_MUL: 6647 case ISD::VP_SDIV: 6648 case ISD::VP_UDIV: 6649 case ISD::VP_SREM: 6650 case ISD::VP_UREM: 6651 return lowerVPOp(Op, DAG); 6652 case ISD::VP_AND: 6653 case ISD::VP_OR: 6654 case ISD::VP_XOR: 6655 return lowerLogicVPOp(Op, DAG); 6656 case ISD::VP_FADD: 6657 case ISD::VP_FSUB: 6658 case ISD::VP_FMUL: 6659 case ISD::VP_FDIV: 6660 case ISD::VP_FNEG: 6661 case ISD::VP_FABS: 6662 case ISD::VP_SQRT: 6663 case ISD::VP_FMA: 6664 case ISD::VP_FMINNUM: 6665 case ISD::VP_FMAXNUM: 6666 case ISD::VP_FCOPYSIGN: 6667 if (Op.getValueType() == MVT::nxv32f16 && 6668 (Subtarget.hasVInstructionsF16Minimal() && 6669 !Subtarget.hasVInstructionsF16())) 6670 return SplitVPOp(Op, DAG); 6671 [[fallthrough]]; 6672 case ISD::VP_ASHR: 6673 case ISD::VP_LSHR: 6674 case ISD::VP_SHL: 6675 return lowerVPOp(Op, DAG); 6676 case ISD::VP_IS_FPCLASS: 6677 return LowerIS_FPCLASS(Op, DAG); 6678 case ISD::VP_SIGN_EXTEND: 6679 case ISD::VP_ZERO_EXTEND: 6680 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1) 6681 return lowerVPExtMaskOp(Op, DAG); 6682 return lowerVPOp(Op, DAG); 6683 case ISD::VP_TRUNCATE: 6684 return lowerVectorTruncLike(Op, DAG); 6685 case ISD::VP_FP_EXTEND: 6686 case ISD::VP_FP_ROUND: 6687 return lowerVectorFPExtendOrRoundLike(Op, DAG); 6688 case ISD::VP_SINT_TO_FP: 6689 case ISD::VP_UINT_TO_FP: 6690 if (Op.getValueType().isVector() && 6691 Op.getValueType().getScalarType() == MVT::f16 && 6692 (Subtarget.hasVInstructionsF16Minimal() && 6693 !Subtarget.hasVInstructionsF16())) { 6694 if (Op.getValueType() == MVT::nxv32f16) 6695 return SplitVPOp(Op, DAG); 6696 // int -> f32 6697 SDLoc DL(Op); 6698 MVT NVT = 6699 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount()); 6700 auto NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops()); 6701 // f32 -> f16 6702 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC, 6703 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true)); 6704 } 6705 [[fallthrough]]; 6706 case ISD::VP_FP_TO_SINT: 6707 case ISD::VP_FP_TO_UINT: 6708 if (SDValue Op1 = Op.getOperand(0); 6709 Op1.getValueType().isVector() && 6710 Op1.getValueType().getScalarType() == MVT::f16 && 6711 (Subtarget.hasVInstructionsF16Minimal() && 6712 !Subtarget.hasVInstructionsF16())) { 6713 if (Op1.getValueType() == MVT::nxv32f16) 6714 return SplitVPOp(Op, DAG); 6715 // f16 -> f32 6716 SDLoc DL(Op); 6717 MVT NVT = MVT::getVectorVT(MVT::f32, 6718 Op1.getValueType().getVectorElementCount()); 6719 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1); 6720 // f32 -> int 6721 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), 6722 {WidenVec, Op.getOperand(1), Op.getOperand(2)}); 6723 } 6724 return lowerVPFPIntConvOp(Op, DAG); 6725 case ISD::VP_SETCC: 6726 if (Op.getOperand(0).getSimpleValueType() == MVT::nxv32f16 && 6727 (Subtarget.hasVInstructionsF16Minimal() && 6728 !Subtarget.hasVInstructionsF16())) 6729 return SplitVPOp(Op, DAG); 6730 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1) 6731 return lowerVPSetCCMaskOp(Op, DAG); 6732 [[fallthrough]]; 6733 case ISD::VP_SMIN: 6734 case ISD::VP_SMAX: 6735 case ISD::VP_UMIN: 6736 case ISD::VP_UMAX: 6737 case ISD::VP_BITREVERSE: 6738 case ISD::VP_BSWAP: 6739 return lowerVPOp(Op, DAG); 6740 case ISD::VP_CTLZ: 6741 case ISD::VP_CTLZ_ZERO_UNDEF: 6742 if (Subtarget.hasStdExtZvbb()) 6743 return lowerVPOp(Op, DAG); 6744 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG); 6745 case ISD::VP_CTTZ: 6746 case ISD::VP_CTTZ_ZERO_UNDEF: 6747 if (Subtarget.hasStdExtZvbb()) 6748 return lowerVPOp(Op, DAG); 6749 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG); 6750 case ISD::VP_CTPOP: 6751 return lowerVPOp(Op, DAG); 6752 case ISD::EXPERIMENTAL_VP_STRIDED_LOAD: 6753 return lowerVPStridedLoad(Op, DAG); 6754 case ISD::EXPERIMENTAL_VP_STRIDED_STORE: 6755 return lowerVPStridedStore(Op, DAG); 6756 case ISD::VP_FCEIL: 6757 case ISD::VP_FFLOOR: 6758 case ISD::VP_FRINT: 6759 case ISD::VP_FNEARBYINT: 6760 case ISD::VP_FROUND: 6761 case ISD::VP_FROUNDEVEN: 6762 case ISD::VP_FROUNDTOZERO: 6763 if (Op.getValueType() == MVT::nxv32f16 && 6764 (Subtarget.hasVInstructionsF16Minimal() && 6765 !Subtarget.hasVInstructionsF16())) 6766 return SplitVPOp(Op, DAG); 6767 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget); 6768 case ISD::VP_FMAXIMUM: 6769 case ISD::VP_FMINIMUM: 6770 if (Op.getValueType() == MVT::nxv32f16 && 6771 (Subtarget.hasVInstructionsF16Minimal() && 6772 !Subtarget.hasVInstructionsF16())) 6773 return SplitVPOp(Op, DAG); 6774 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget); 6775 case ISD::EXPERIMENTAL_VP_SPLICE: 6776 return lowerVPSpliceExperimental(Op, DAG); 6777 case ISD::EXPERIMENTAL_VP_REVERSE: 6778 return lowerVPReverseExperimental(Op, DAG); 6779 } 6780} 6781 6782static SDValue getTargetNode(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty, 6783 SelectionDAG &DAG, unsigned Flags) { 6784 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags); 6785} 6786 6787static SDValue getTargetNode(BlockAddressSDNode *N, const SDLoc &DL, EVT Ty, 6788 SelectionDAG &DAG, unsigned Flags) { 6789 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(), 6790 Flags); 6791} 6792 6793static SDValue getTargetNode(ConstantPoolSDNode *N, const SDLoc &DL, EVT Ty, 6794 SelectionDAG &DAG, unsigned Flags) { 6795 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(), 6796 N->getOffset(), Flags); 6797} 6798 6799static SDValue getTargetNode(JumpTableSDNode *N, const SDLoc &DL, EVT Ty, 6800 SelectionDAG &DAG, unsigned Flags) { 6801 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags); 6802} 6803 6804template <class NodeTy> 6805SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG, 6806 bool IsLocal, bool IsExternWeak) const { 6807 SDLoc DL(N); 6808 EVT Ty = getPointerTy(DAG.getDataLayout()); 6809 6810 // When HWASAN is used and tagging of global variables is enabled 6811 // they should be accessed via the GOT, since the tagged address of a global 6812 // is incompatible with existing code models. This also applies to non-pic 6813 // mode. 6814 if (isPositionIndependent() || Subtarget.allowTaggedGlobals()) { 6815 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0); 6816 if (IsLocal && !Subtarget.allowTaggedGlobals()) 6817 // Use PC-relative addressing to access the symbol. This generates the 6818 // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym)) 6819 // %pcrel_lo(auipc)). 6820 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr); 6821 6822 // Use PC-relative addressing to access the GOT for this symbol, then load 6823 // the address from the GOT. This generates the pattern (PseudoLGA sym), 6824 // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))). 6825 SDValue Load = 6826 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0); 6827 MachineFunction &MF = DAG.getMachineFunction(); 6828 MachineMemOperand *MemOp = MF.getMachineMemOperand( 6829 MachinePointerInfo::getGOT(MF), 6830 MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable | 6831 MachineMemOperand::MOInvariant, 6832 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8)); 6833 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp}); 6834 return Load; 6835 } 6836 6837 switch (getTargetMachine().getCodeModel()) { 6838 default: 6839 report_fatal_error("Unsupported code model for lowering"); 6840 case CodeModel::Small: { 6841 // Generate a sequence for accessing addresses within the first 2 GiB of 6842 // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)). 6843 SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI); 6844 SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO); 6845 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi); 6846 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNHi, AddrLo); 6847 } 6848 case CodeModel::Medium: { 6849 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0); 6850 if (IsExternWeak) { 6851 // An extern weak symbol may be undefined, i.e. have value 0, which may 6852 // not be within 2GiB of PC, so use GOT-indirect addressing to access the 6853 // symbol. This generates the pattern (PseudoLGA sym), which expands to 6854 // (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))). 6855 SDValue Load = 6856 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0); 6857 MachineFunction &MF = DAG.getMachineFunction(); 6858 MachineMemOperand *MemOp = MF.getMachineMemOperand( 6859 MachinePointerInfo::getGOT(MF), 6860 MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable | 6861 MachineMemOperand::MOInvariant, 6862 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8)); 6863 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp}); 6864 return Load; 6865 } 6866 6867 // Generate a sequence for accessing addresses within any 2GiB range within 6868 // the address space. This generates the pattern (PseudoLLA sym), which 6869 // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)). 6870 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr); 6871 } 6872 } 6873} 6874 6875SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op, 6876 SelectionDAG &DAG) const { 6877 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op); 6878 assert(N->getOffset() == 0 && "unexpected offset in global node"); 6879 const GlobalValue *GV = N->getGlobal(); 6880 return getAddr(N, DAG, GV->isDSOLocal(), GV->hasExternalWeakLinkage()); 6881} 6882 6883SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op, 6884 SelectionDAG &DAG) const { 6885 BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op); 6886 6887 return getAddr(N, DAG); 6888} 6889 6890SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op, 6891 SelectionDAG &DAG) const { 6892 ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op); 6893 6894 return getAddr(N, DAG); 6895} 6896 6897SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op, 6898 SelectionDAG &DAG) const { 6899 JumpTableSDNode *N = cast<JumpTableSDNode>(Op); 6900 6901 return getAddr(N, DAG); 6902} 6903 6904SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N, 6905 SelectionDAG &DAG, 6906 bool UseGOT) const { 6907 SDLoc DL(N); 6908 EVT Ty = getPointerTy(DAG.getDataLayout()); 6909 const GlobalValue *GV = N->getGlobal(); 6910 MVT XLenVT = Subtarget.getXLenVT(); 6911 6912 if (UseGOT) { 6913 // Use PC-relative addressing to access the GOT for this TLS symbol, then 6914 // load the address from the GOT and add the thread pointer. This generates 6915 // the pattern (PseudoLA_TLS_IE sym), which expands to 6916 // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)). 6917 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0); 6918 SDValue Load = 6919 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0); 6920 MachineFunction &MF = DAG.getMachineFunction(); 6921 MachineMemOperand *MemOp = MF.getMachineMemOperand( 6922 MachinePointerInfo::getGOT(MF), 6923 MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable | 6924 MachineMemOperand::MOInvariant, 6925 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8)); 6926 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp}); 6927 6928 // Add the thread pointer. 6929 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT); 6930 return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg); 6931 } 6932 6933 // Generate a sequence for accessing the address relative to the thread 6934 // pointer, with the appropriate adjustment for the thread pointer offset. 6935 // This generates the pattern 6936 // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym)) 6937 SDValue AddrHi = 6938 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_HI); 6939 SDValue AddrAdd = 6940 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_ADD); 6941 SDValue AddrLo = 6942 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_LO); 6943 6944 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi); 6945 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT); 6946 SDValue MNAdd = 6947 DAG.getNode(RISCVISD::ADD_TPREL, DL, Ty, MNHi, TPReg, AddrAdd); 6948 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNAdd, AddrLo); 6949} 6950 6951SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N, 6952 SelectionDAG &DAG) const { 6953 SDLoc DL(N); 6954 EVT Ty = getPointerTy(DAG.getDataLayout()); 6955 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits()); 6956 const GlobalValue *GV = N->getGlobal(); 6957 6958 // Use a PC-relative addressing mode to access the global dynamic GOT address. 6959 // This generates the pattern (PseudoLA_TLS_GD sym), which expands to 6960 // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)). 6961 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0); 6962 SDValue Load = 6963 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0); 6964 6965 // Prepare argument list to generate call. 6966 ArgListTy Args; 6967 ArgListEntry Entry; 6968 Entry.Node = Load; 6969 Entry.Ty = CallTy; 6970 Args.push_back(Entry); 6971 6972 // Setup call to __tls_get_addr. 6973 TargetLowering::CallLoweringInfo CLI(DAG); 6974 CLI.setDebugLoc(DL) 6975 .setChain(DAG.getEntryNode()) 6976 .setLibCallee(CallingConv::C, CallTy, 6977 DAG.getExternalSymbol("__tls_get_addr", Ty), 6978 std::move(Args)); 6979 6980 return LowerCallTo(CLI).first; 6981} 6982 6983SDValue RISCVTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N, 6984 SelectionDAG &DAG) const { 6985 SDLoc DL(N); 6986 EVT Ty = getPointerTy(DAG.getDataLayout()); 6987 const GlobalValue *GV = N->getGlobal(); 6988 6989 // Use a PC-relative addressing mode to access the global dynamic GOT address. 6990 // This generates the pattern (PseudoLA_TLSDESC sym), which expands to 6991 // 6992 // auipc tX, %tlsdesc_hi(symbol) // R_RISCV_TLSDESC_HI20(symbol) 6993 // lw tY, tX, %tlsdesc_lo_load(label) // R_RISCV_TLSDESC_LOAD_LO12_I(label) 6994 // addi a0, tX, %tlsdesc_lo_add(label) // R_RISCV_TLSDESC_ADD_LO12_I(label) 6995 // jalr t0, tY // R_RISCV_TLSDESC_CALL(label) 6996 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0); 6997 return SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLSDESC, DL, Ty, Addr), 0); 6998} 6999 7000SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op, 7001 SelectionDAG &DAG) const { 7002 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op); 7003 assert(N->getOffset() == 0 && "unexpected offset in global node"); 7004 7005 if (DAG.getTarget().useEmulatedTLS()) 7006 return LowerToTLSEmulatedModel(N, DAG); 7007 7008 TLSModel::Model Model = getTargetMachine().getTLSModel(N->getGlobal()); 7009 7010 if (DAG.getMachineFunction().getFunction().getCallingConv() == 7011 CallingConv::GHC) 7012 report_fatal_error("In GHC calling convention TLS is not supported"); 7013 7014 SDValue Addr; 7015 switch (Model) { 7016 case TLSModel::LocalExec: 7017 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false); 7018 break; 7019 case TLSModel::InitialExec: 7020 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true); 7021 break; 7022 case TLSModel::LocalDynamic: 7023 case TLSModel::GeneralDynamic: 7024 Addr = DAG.getTarget().useTLSDESC() ? getTLSDescAddr(N, DAG) 7025 : getDynamicTLSAddr(N, DAG); 7026 break; 7027 } 7028 7029 return Addr; 7030} 7031 7032// Return true if Val is equal to (setcc LHS, RHS, CC). 7033// Return false if Val is the inverse of (setcc LHS, RHS, CC). 7034// Otherwise, return std::nullopt. 7035static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS, 7036 ISD::CondCode CC, SDValue Val) { 7037 assert(Val->getOpcode() == ISD::SETCC); 7038 SDValue LHS2 = Val.getOperand(0); 7039 SDValue RHS2 = Val.getOperand(1); 7040 ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get(); 7041 7042 if (LHS == LHS2 && RHS == RHS2) { 7043 if (CC == CC2) 7044 return true; 7045 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType())) 7046 return false; 7047 } else if (LHS == RHS2 && RHS == LHS2) { 7048 CC2 = ISD::getSetCCSwappedOperands(CC2); 7049 if (CC == CC2) 7050 return true; 7051 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType())) 7052 return false; 7053 } 7054 7055 return std::nullopt; 7056} 7057 7058static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG, 7059 const RISCVSubtarget &Subtarget) { 7060 SDValue CondV = N->getOperand(0); 7061 SDValue TrueV = N->getOperand(1); 7062 SDValue FalseV = N->getOperand(2); 7063 MVT VT = N->getSimpleValueType(0); 7064 SDLoc DL(N); 7065 7066 if (!Subtarget.hasConditionalMoveFusion()) { 7067 // (select c, -1, y) -> -c | y 7068 if (isAllOnesConstant(TrueV)) { 7069 SDValue Neg = DAG.getNegative(CondV, DL, VT); 7070 return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV); 7071 } 7072 // (select c, y, -1) -> (c-1) | y 7073 if (isAllOnesConstant(FalseV)) { 7074 SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV, 7075 DAG.getAllOnesConstant(DL, VT)); 7076 return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV); 7077 } 7078 7079 // (select c, 0, y) -> (c-1) & y 7080 if (isNullConstant(TrueV)) { 7081 SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV, 7082 DAG.getAllOnesConstant(DL, VT)); 7083 return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV); 7084 } 7085 // (select c, y, 0) -> -c & y 7086 if (isNullConstant(FalseV)) { 7087 SDValue Neg = DAG.getNegative(CondV, DL, VT); 7088 return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV); 7089 } 7090 } 7091 7092 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops 7093 // when both truev and falsev are also setcc. 7094 if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC && 7095 FalseV.getOpcode() == ISD::SETCC) { 7096 SDValue LHS = CondV.getOperand(0); 7097 SDValue RHS = CondV.getOperand(1); 7098 ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get(); 7099 7100 // (select x, x, y) -> x | y 7101 // (select !x, x, y) -> x & y 7102 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) { 7103 return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV, 7104 FalseV); 7105 } 7106 // (select x, y, x) -> x & y 7107 // (select !x, y, x) -> x | y 7108 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) { 7109 return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT, TrueV, 7110 FalseV); 7111 } 7112 } 7113 7114 return SDValue(); 7115} 7116 7117// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants 7118// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable. 7119// For now we only consider transformation profitable if `binOp(c0, c1)` ends up 7120// being `0` or `-1`. In such cases we can replace `select` with `and`. 7121// TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize 7122// than `c0`? 7123static SDValue 7124foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG, 7125 const RISCVSubtarget &Subtarget) { 7126 if (Subtarget.hasShortForwardBranchOpt()) 7127 return SDValue(); 7128 7129 unsigned SelOpNo = 0; 7130 SDValue Sel = BO->getOperand(0); 7131 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) { 7132 SelOpNo = 1; 7133 Sel = BO->getOperand(1); 7134 } 7135 7136 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) 7137 return SDValue(); 7138 7139 unsigned ConstSelOpNo = 1; 7140 unsigned OtherSelOpNo = 2; 7141 if (!dyn_cast<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) { 7142 ConstSelOpNo = 2; 7143 OtherSelOpNo = 1; 7144 } 7145 SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo); 7146 ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp); 7147 if (!ConstSelOpNode || ConstSelOpNode->isOpaque()) 7148 return SDValue(); 7149 7150 SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1); 7151 ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp); 7152 if (!ConstBinOpNode || ConstBinOpNode->isOpaque()) 7153 return SDValue(); 7154 7155 SDLoc DL(Sel); 7156 EVT VT = BO->getValueType(0); 7157 7158 SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp}; 7159 if (SelOpNo == 1) 7160 std::swap(NewConstOps[0], NewConstOps[1]); 7161 7162 SDValue NewConstOp = 7163 DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps); 7164 if (!NewConstOp) 7165 return SDValue(); 7166 7167 const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal(); 7168 if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes()) 7169 return SDValue(); 7170 7171 SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo); 7172 SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp}; 7173 if (SelOpNo == 1) 7174 std::swap(NewNonConstOps[0], NewNonConstOps[1]); 7175 SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps); 7176 7177 SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp; 7178 SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp; 7179 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF); 7180} 7181 7182SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const { 7183 SDValue CondV = Op.getOperand(0); 7184 SDValue TrueV = Op.getOperand(1); 7185 SDValue FalseV = Op.getOperand(2); 7186 SDLoc DL(Op); 7187 MVT VT = Op.getSimpleValueType(); 7188 MVT XLenVT = Subtarget.getXLenVT(); 7189 7190 // Lower vector SELECTs to VSELECTs by splatting the condition. 7191 if (VT.isVector()) { 7192 MVT SplatCondVT = VT.changeVectorElementType(MVT::i1); 7193 SDValue CondSplat = DAG.getSplat(SplatCondVT, DL, CondV); 7194 return DAG.getNode(ISD::VSELECT, DL, VT, CondSplat, TrueV, FalseV); 7195 } 7196 7197 // When Zicond or XVentanaCondOps is present, emit CZERO_EQZ and CZERO_NEZ 7198 // nodes to implement the SELECT. Performing the lowering here allows for 7199 // greater control over when CZERO_{EQZ/NEZ} are used vs another branchless 7200 // sequence or RISCVISD::SELECT_CC node (branch-based select). 7201 if ((Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps()) && 7202 VT.isScalarInteger()) { 7203 // (select c, t, 0) -> (czero_eqz t, c) 7204 if (isNullConstant(FalseV)) 7205 return DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV); 7206 // (select c, 0, f) -> (czero_nez f, c) 7207 if (isNullConstant(TrueV)) 7208 return DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV); 7209 7210 // (select c, (and f, x), f) -> (or (and f, x), (czero_nez f, c)) 7211 if (TrueV.getOpcode() == ISD::AND && 7212 (TrueV.getOperand(0) == FalseV || TrueV.getOperand(1) == FalseV)) 7213 return DAG.getNode( 7214 ISD::OR, DL, VT, TrueV, 7215 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV)); 7216 // (select c, t, (and t, x)) -> (or (czero_eqz t, c), (and t, x)) 7217 if (FalseV.getOpcode() == ISD::AND && 7218 (FalseV.getOperand(0) == TrueV || FalseV.getOperand(1) == TrueV)) 7219 return DAG.getNode( 7220 ISD::OR, DL, VT, FalseV, 7221 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV)); 7222 7223 // Try some other optimizations before falling back to generic lowering. 7224 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget)) 7225 return V; 7226 7227 // (select c, t, f) -> (or (czero_eqz t, c), (czero_nez f, c)) 7228 // Unless we have the short forward branch optimization. 7229 if (!Subtarget.hasConditionalMoveFusion()) 7230 return DAG.getNode( 7231 ISD::OR, DL, VT, 7232 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV), 7233 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV)); 7234 } 7235 7236 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget)) 7237 return V; 7238 7239 if (Op.hasOneUse()) { 7240 unsigned UseOpc = Op->use_begin()->getOpcode(); 7241 if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) { 7242 SDNode *BinOp = *Op->use_begin(); 7243 if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->use_begin(), 7244 DAG, Subtarget)) { 7245 DAG.ReplaceAllUsesWith(BinOp, &NewSel); 7246 return lowerSELECT(NewSel, DAG); 7247 } 7248 } 7249 } 7250 7251 // (select cc, 1.0, 0.0) -> (sint_to_fp (zext cc)) 7252 // (select cc, 0.0, 1.0) -> (sint_to_fp (zext (xor cc, 1))) 7253 const ConstantFPSDNode *FPTV = dyn_cast<ConstantFPSDNode>(TrueV); 7254 const ConstantFPSDNode *FPFV = dyn_cast<ConstantFPSDNode>(FalseV); 7255 if (FPTV && FPFV) { 7256 if (FPTV->isExactlyValue(1.0) && FPFV->isExactlyValue(0.0)) 7257 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, CondV); 7258 if (FPTV->isExactlyValue(0.0) && FPFV->isExactlyValue(1.0)) { 7259 SDValue XOR = DAG.getNode(ISD::XOR, DL, XLenVT, CondV, 7260 DAG.getConstant(1, DL, XLenVT)); 7261 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, XOR); 7262 } 7263 } 7264 7265 // If the condition is not an integer SETCC which operates on XLenVT, we need 7266 // to emit a RISCVISD::SELECT_CC comparing the condition to zero. i.e.: 7267 // (select condv, truev, falsev) 7268 // -> (riscvisd::select_cc condv, zero, setne, truev, falsev) 7269 if (CondV.getOpcode() != ISD::SETCC || 7270 CondV.getOperand(0).getSimpleValueType() != XLenVT) { 7271 SDValue Zero = DAG.getConstant(0, DL, XLenVT); 7272 SDValue SetNE = DAG.getCondCode(ISD::SETNE); 7273 7274 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV}; 7275 7276 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops); 7277 } 7278 7279 // If the CondV is the output of a SETCC node which operates on XLenVT inputs, 7280 // then merge the SETCC node into the lowered RISCVISD::SELECT_CC to take 7281 // advantage of the integer compare+branch instructions. i.e.: 7282 // (select (setcc lhs, rhs, cc), truev, falsev) 7283 // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev) 7284 SDValue LHS = CondV.getOperand(0); 7285 SDValue RHS = CondV.getOperand(1); 7286 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get(); 7287 7288 // Special case for a select of 2 constants that have a diffence of 1. 7289 // Normally this is done by DAGCombine, but if the select is introduced by 7290 // type legalization or op legalization, we miss it. Restricting to SETLT 7291 // case for now because that is what signed saturating add/sub need. 7292 // FIXME: We don't need the condition to be SETLT or even a SETCC, 7293 // but we would probably want to swap the true/false values if the condition 7294 // is SETGE/SETLE to avoid an XORI. 7295 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) && 7296 CCVal == ISD::SETLT) { 7297 const APInt &TrueVal = TrueV->getAsAPIntVal(); 7298 const APInt &FalseVal = FalseV->getAsAPIntVal(); 7299 if (TrueVal - 1 == FalseVal) 7300 return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV); 7301 if (TrueVal + 1 == FalseVal) 7302 return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV); 7303 } 7304 7305 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG); 7306 // 1 < x ? x : 1 -> 0 < x ? x : 1 7307 if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) && 7308 RHS == TrueV && LHS == FalseV) { 7309 LHS = DAG.getConstant(0, DL, VT); 7310 // 0 <u x is the same as x != 0. 7311 if (CCVal == ISD::SETULT) { 7312 std::swap(LHS, RHS); 7313 CCVal = ISD::SETNE; 7314 } 7315 } 7316 7317 // x <s -1 ? x : -1 -> x <s 0 ? x : -1 7318 if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV && 7319 RHS == FalseV) { 7320 RHS = DAG.getConstant(0, DL, VT); 7321 } 7322 7323 SDValue TargetCC = DAG.getCondCode(CCVal); 7324 7325 if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) { 7326 // (select (setcc lhs, rhs, CC), constant, falsev) 7327 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant) 7328 std::swap(TrueV, FalseV); 7329 TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType())); 7330 } 7331 7332 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV}; 7333 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops); 7334} 7335 7336SDValue RISCVTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const { 7337 SDValue CondV = Op.getOperand(1); 7338 SDLoc DL(Op); 7339 MVT XLenVT = Subtarget.getXLenVT(); 7340 7341 if (CondV.getOpcode() == ISD::SETCC && 7342 CondV.getOperand(0).getValueType() == XLenVT) { 7343 SDValue LHS = CondV.getOperand(0); 7344 SDValue RHS = CondV.getOperand(1); 7345 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get(); 7346 7347 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG); 7348 7349 SDValue TargetCC = DAG.getCondCode(CCVal); 7350 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0), 7351 LHS, RHS, TargetCC, Op.getOperand(2)); 7352 } 7353 7354 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0), 7355 CondV, DAG.getConstant(0, DL, XLenVT), 7356 DAG.getCondCode(ISD::SETNE), Op.getOperand(2)); 7357} 7358 7359SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const { 7360 MachineFunction &MF = DAG.getMachineFunction(); 7361 RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>(); 7362 7363 SDLoc DL(Op); 7364 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), 7365 getPointerTy(MF.getDataLayout())); 7366 7367 // vastart just stores the address of the VarArgsFrameIndex slot into the 7368 // memory location argument. 7369 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 7370 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1), 7371 MachinePointerInfo(SV)); 7372} 7373 7374SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op, 7375 SelectionDAG &DAG) const { 7376 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo(); 7377 MachineFunction &MF = DAG.getMachineFunction(); 7378 MachineFrameInfo &MFI = MF.getFrameInfo(); 7379 MFI.setFrameAddressIsTaken(true); 7380 Register FrameReg = RI.getFrameRegister(MF); 7381 int XLenInBytes = Subtarget.getXLen() / 8; 7382 7383 EVT VT = Op.getValueType(); 7384 SDLoc DL(Op); 7385 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT); 7386 unsigned Depth = Op.getConstantOperandVal(0); 7387 while (Depth--) { 7388 int Offset = -(XLenInBytes * 2); 7389 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr, 7390 DAG.getIntPtrConstant(Offset, DL)); 7391 FrameAddr = 7392 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo()); 7393 } 7394 return FrameAddr; 7395} 7396 7397SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op, 7398 SelectionDAG &DAG) const { 7399 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo(); 7400 MachineFunction &MF = DAG.getMachineFunction(); 7401 MachineFrameInfo &MFI = MF.getFrameInfo(); 7402 MFI.setReturnAddressIsTaken(true); 7403 MVT XLenVT = Subtarget.getXLenVT(); 7404 int XLenInBytes = Subtarget.getXLen() / 8; 7405 7406 if (verifyReturnAddressArgumentIsConstant(Op, DAG)) 7407 return SDValue(); 7408 7409 EVT VT = Op.getValueType(); 7410 SDLoc DL(Op); 7411 unsigned Depth = Op.getConstantOperandVal(0); 7412 if (Depth) { 7413 int Off = -XLenInBytes; 7414 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG); 7415 SDValue Offset = DAG.getConstant(Off, DL, VT); 7416 return DAG.getLoad(VT, DL, DAG.getEntryNode(), 7417 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset), 7418 MachinePointerInfo()); 7419 } 7420 7421 // Return the value of the return address register, marking it an implicit 7422 // live-in. 7423 Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT)); 7424 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT); 7425} 7426 7427SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op, 7428 SelectionDAG &DAG) const { 7429 SDLoc DL(Op); 7430 SDValue Lo = Op.getOperand(0); 7431 SDValue Hi = Op.getOperand(1); 7432 SDValue Shamt = Op.getOperand(2); 7433 EVT VT = Lo.getValueType(); 7434 7435 // if Shamt-XLEN < 0: // Shamt < XLEN 7436 // Lo = Lo << Shamt 7437 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt)) 7438 // else: 7439 // Lo = 0 7440 // Hi = Lo << (Shamt-XLEN) 7441 7442 SDValue Zero = DAG.getConstant(0, DL, VT); 7443 SDValue One = DAG.getConstant(1, DL, VT); 7444 SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT); 7445 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT); 7446 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen); 7447 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt); 7448 7449 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt); 7450 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One); 7451 SDValue ShiftRightLo = 7452 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt); 7453 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt); 7454 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo); 7455 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen); 7456 7457 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT); 7458 7459 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero); 7460 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse); 7461 7462 SDValue Parts[2] = {Lo, Hi}; 7463 return DAG.getMergeValues(Parts, DL); 7464} 7465 7466SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG, 7467 bool IsSRA) const { 7468 SDLoc DL(Op); 7469 SDValue Lo = Op.getOperand(0); 7470 SDValue Hi = Op.getOperand(1); 7471 SDValue Shamt = Op.getOperand(2); 7472 EVT VT = Lo.getValueType(); 7473 7474 // SRA expansion: 7475 // if Shamt-XLEN < 0: // Shamt < XLEN 7476 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt)) 7477 // Hi = Hi >>s Shamt 7478 // else: 7479 // Lo = Hi >>s (Shamt-XLEN); 7480 // Hi = Hi >>s (XLEN-1) 7481 // 7482 // SRL expansion: 7483 // if Shamt-XLEN < 0: // Shamt < XLEN 7484 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt)) 7485 // Hi = Hi >>u Shamt 7486 // else: 7487 // Lo = Hi >>u (Shamt-XLEN); 7488 // Hi = 0; 7489 7490 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL; 7491 7492 SDValue Zero = DAG.getConstant(0, DL, VT); 7493 SDValue One = DAG.getConstant(1, DL, VT); 7494 SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT); 7495 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT); 7496 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen); 7497 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt); 7498 7499 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt); 7500 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One); 7501 SDValue ShiftLeftHi = 7502 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt); 7503 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi); 7504 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt); 7505 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen); 7506 SDValue HiFalse = 7507 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero; 7508 7509 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT); 7510 7511 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse); 7512 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse); 7513 7514 SDValue Parts[2] = {Lo, Hi}; 7515 return DAG.getMergeValues(Parts, DL); 7516} 7517 7518// Lower splats of i1 types to SETCC. For each mask vector type, we have a 7519// legal equivalently-sized i8 type, so we can use that as a go-between. 7520SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op, 7521 SelectionDAG &DAG) const { 7522 SDLoc DL(Op); 7523 MVT VT = Op.getSimpleValueType(); 7524 SDValue SplatVal = Op.getOperand(0); 7525 // All-zeros or all-ones splats are handled specially. 7526 if (ISD::isConstantSplatVectorAllOnes(Op.getNode())) { 7527 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second; 7528 return DAG.getNode(RISCVISD::VMSET_VL, DL, VT, VL); 7529 } 7530 if (ISD::isConstantSplatVectorAllZeros(Op.getNode())) { 7531 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second; 7532 return DAG.getNode(RISCVISD::VMCLR_VL, DL, VT, VL); 7533 } 7534 MVT InterVT = VT.changeVectorElementType(MVT::i8); 7535 SplatVal = DAG.getNode(ISD::AND, DL, SplatVal.getValueType(), SplatVal, 7536 DAG.getConstant(1, DL, SplatVal.getValueType())); 7537 SDValue LHS = DAG.getSplatVector(InterVT, DL, SplatVal); 7538 SDValue Zero = DAG.getConstant(0, DL, InterVT); 7539 return DAG.getSetCC(DL, VT, LHS, Zero, ISD::SETNE); 7540} 7541 7542// Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is 7543// illegal (currently only vXi64 RV32). 7544// FIXME: We could also catch non-constant sign-extended i32 values and lower 7545// them to VMV_V_X_VL. 7546SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op, 7547 SelectionDAG &DAG) const { 7548 SDLoc DL(Op); 7549 MVT VecVT = Op.getSimpleValueType(); 7550 assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 && 7551 "Unexpected SPLAT_VECTOR_PARTS lowering"); 7552 7553 assert(Op.getNumOperands() == 2 && "Unexpected number of operands!"); 7554 SDValue Lo = Op.getOperand(0); 7555 SDValue Hi = Op.getOperand(1); 7556 7557 MVT ContainerVT = VecVT; 7558 if (VecVT.isFixedLengthVector()) 7559 ContainerVT = getContainerForFixedLengthVector(VecVT); 7560 7561 auto VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second; 7562 7563 SDValue Res = 7564 splatPartsI64WithVL(DL, ContainerVT, SDValue(), Lo, Hi, VL, DAG); 7565 7566 if (VecVT.isFixedLengthVector()) 7567 Res = convertFromScalableVector(VecVT, Res, DAG, Subtarget); 7568 7569 return Res; 7570} 7571 7572// Custom-lower extensions from mask vectors by using a vselect either with 1 7573// for zero/any-extension or -1 for sign-extension: 7574// (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0) 7575// Note that any-extension is lowered identically to zero-extension. 7576SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG, 7577 int64_t ExtTrueVal) const { 7578 SDLoc DL(Op); 7579 MVT VecVT = Op.getSimpleValueType(); 7580 SDValue Src = Op.getOperand(0); 7581 // Only custom-lower extensions from mask types 7582 assert(Src.getValueType().isVector() && 7583 Src.getValueType().getVectorElementType() == MVT::i1); 7584 7585 if (VecVT.isScalableVector()) { 7586 SDValue SplatZero = DAG.getConstant(0, DL, VecVT); 7587 SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, VecVT); 7588 return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero); 7589 } 7590 7591 MVT ContainerVT = getContainerForFixedLengthVector(VecVT); 7592 MVT I1ContainerVT = 7593 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 7594 7595 SDValue CC = convertToScalableVector(I1ContainerVT, Src, DAG, Subtarget); 7596 7597 SDValue VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second; 7598 7599 MVT XLenVT = Subtarget.getXLenVT(); 7600 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT); 7601 SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, XLenVT); 7602 7603 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, 7604 DAG.getUNDEF(ContainerVT), SplatZero, VL); 7605 SplatTrueVal = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, 7606 DAG.getUNDEF(ContainerVT), SplatTrueVal, VL); 7607 SDValue Select = 7608 DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, CC, SplatTrueVal, 7609 SplatZero, DAG.getUNDEF(ContainerVT), VL); 7610 7611 return convertFromScalableVector(VecVT, Select, DAG, Subtarget); 7612} 7613 7614SDValue RISCVTargetLowering::lowerFixedLengthVectorExtendToRVV( 7615 SDValue Op, SelectionDAG &DAG, unsigned ExtendOpc) const { 7616 MVT ExtVT = Op.getSimpleValueType(); 7617 // Only custom-lower extensions from fixed-length vector types. 7618 if (!ExtVT.isFixedLengthVector()) 7619 return Op; 7620 MVT VT = Op.getOperand(0).getSimpleValueType(); 7621 // Grab the canonical container type for the extended type. Infer the smaller 7622 // type from that to ensure the same number of vector elements, as we know 7623 // the LMUL will be sufficient to hold the smaller type. 7624 MVT ContainerExtVT = getContainerForFixedLengthVector(ExtVT); 7625 // Get the extended container type manually to ensure the same number of 7626 // vector elements between source and dest. 7627 MVT ContainerVT = MVT::getVectorVT(VT.getVectorElementType(), 7628 ContainerExtVT.getVectorElementCount()); 7629 7630 SDValue Op1 = 7631 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget); 7632 7633 SDLoc DL(Op); 7634 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 7635 7636 SDValue Ext = DAG.getNode(ExtendOpc, DL, ContainerExtVT, Op1, Mask, VL); 7637 7638 return convertFromScalableVector(ExtVT, Ext, DAG, Subtarget); 7639} 7640 7641// Custom-lower truncations from vectors to mask vectors by using a mask and a 7642// setcc operation: 7643// (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne) 7644SDValue RISCVTargetLowering::lowerVectorMaskTruncLike(SDValue Op, 7645 SelectionDAG &DAG) const { 7646 bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE; 7647 SDLoc DL(Op); 7648 EVT MaskVT = Op.getValueType(); 7649 // Only expect to custom-lower truncations to mask types 7650 assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 && 7651 "Unexpected type for vector mask lowering"); 7652 SDValue Src = Op.getOperand(0); 7653 MVT VecVT = Src.getSimpleValueType(); 7654 SDValue Mask, VL; 7655 if (IsVPTrunc) { 7656 Mask = Op.getOperand(1); 7657 VL = Op.getOperand(2); 7658 } 7659 // If this is a fixed vector, we need to convert it to a scalable vector. 7660 MVT ContainerVT = VecVT; 7661 7662 if (VecVT.isFixedLengthVector()) { 7663 ContainerVT = getContainerForFixedLengthVector(VecVT); 7664 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget); 7665 if (IsVPTrunc) { 7666 MVT MaskContainerVT = 7667 getContainerForFixedLengthVector(Mask.getSimpleValueType()); 7668 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget); 7669 } 7670 } 7671 7672 if (!IsVPTrunc) { 7673 std::tie(Mask, VL) = 7674 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); 7675 } 7676 7677 SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT()); 7678 SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT()); 7679 7680 SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, 7681 DAG.getUNDEF(ContainerVT), SplatOne, VL); 7682 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, 7683 DAG.getUNDEF(ContainerVT), SplatZero, VL); 7684 7685 MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1); 7686 SDValue Trunc = DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne, 7687 DAG.getUNDEF(ContainerVT), Mask, VL); 7688 Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT, 7689 {Trunc, SplatZero, DAG.getCondCode(ISD::SETNE), 7690 DAG.getUNDEF(MaskContainerVT), Mask, VL}); 7691 if (MaskVT.isFixedLengthVector()) 7692 Trunc = convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget); 7693 return Trunc; 7694} 7695 7696SDValue RISCVTargetLowering::lowerVectorTruncLike(SDValue Op, 7697 SelectionDAG &DAG) const { 7698 bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE; 7699 SDLoc DL(Op); 7700 7701 MVT VT = Op.getSimpleValueType(); 7702 // Only custom-lower vector truncates 7703 assert(VT.isVector() && "Unexpected type for vector truncate lowering"); 7704 7705 // Truncates to mask types are handled differently 7706 if (VT.getVectorElementType() == MVT::i1) 7707 return lowerVectorMaskTruncLike(Op, DAG); 7708 7709 // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary 7710 // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which 7711 // truncate by one power of two at a time. 7712 MVT DstEltVT = VT.getVectorElementType(); 7713 7714 SDValue Src = Op.getOperand(0); 7715 MVT SrcVT = Src.getSimpleValueType(); 7716 MVT SrcEltVT = SrcVT.getVectorElementType(); 7717 7718 assert(DstEltVT.bitsLT(SrcEltVT) && isPowerOf2_64(DstEltVT.getSizeInBits()) && 7719 isPowerOf2_64(SrcEltVT.getSizeInBits()) && 7720 "Unexpected vector truncate lowering"); 7721 7722 MVT ContainerVT = SrcVT; 7723 SDValue Mask, VL; 7724 if (IsVPTrunc) { 7725 Mask = Op.getOperand(1); 7726 VL = Op.getOperand(2); 7727 } 7728 if (SrcVT.isFixedLengthVector()) { 7729 ContainerVT = getContainerForFixedLengthVector(SrcVT); 7730 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget); 7731 if (IsVPTrunc) { 7732 MVT MaskVT = getMaskTypeFor(ContainerVT); 7733 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); 7734 } 7735 } 7736 7737 SDValue Result = Src; 7738 if (!IsVPTrunc) { 7739 std::tie(Mask, VL) = 7740 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget); 7741 } 7742 7743 LLVMContext &Context = *DAG.getContext(); 7744 const ElementCount Count = ContainerVT.getVectorElementCount(); 7745 do { 7746 SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2); 7747 EVT ResultVT = EVT::getVectorVT(Context, SrcEltVT, Count); 7748 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, ResultVT, Result, 7749 Mask, VL); 7750 } while (SrcEltVT != DstEltVT); 7751 7752 if (SrcVT.isFixedLengthVector()) 7753 Result = convertFromScalableVector(VT, Result, DAG, Subtarget); 7754 7755 return Result; 7756} 7757 7758SDValue 7759RISCVTargetLowering::lowerStrictFPExtendOrRoundLike(SDValue Op, 7760 SelectionDAG &DAG) const { 7761 SDLoc DL(Op); 7762 SDValue Chain = Op.getOperand(0); 7763 SDValue Src = Op.getOperand(1); 7764 MVT VT = Op.getSimpleValueType(); 7765 MVT SrcVT = Src.getSimpleValueType(); 7766 MVT ContainerVT = VT; 7767 if (VT.isFixedLengthVector()) { 7768 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT); 7769 ContainerVT = 7770 SrcContainerVT.changeVectorElementType(VT.getVectorElementType()); 7771 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget); 7772 } 7773 7774 auto [Mask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget); 7775 7776 // RVV can only widen/truncate fp to types double/half the size as the source. 7777 if ((VT.getVectorElementType() == MVT::f64 && 7778 SrcVT.getVectorElementType() == MVT::f16) || 7779 (VT.getVectorElementType() == MVT::f16 && 7780 SrcVT.getVectorElementType() == MVT::f64)) { 7781 // For double rounding, the intermediate rounding should be round-to-odd. 7782 unsigned InterConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND 7783 ? RISCVISD::STRICT_FP_EXTEND_VL 7784 : RISCVISD::STRICT_VFNCVT_ROD_VL; 7785 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32); 7786 Src = DAG.getNode(InterConvOpc, DL, DAG.getVTList(InterVT, MVT::Other), 7787 Chain, Src, Mask, VL); 7788 Chain = Src.getValue(1); 7789 } 7790 7791 unsigned ConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND 7792 ? RISCVISD::STRICT_FP_EXTEND_VL 7793 : RISCVISD::STRICT_FP_ROUND_VL; 7794 SDValue Res = DAG.getNode(ConvOpc, DL, DAG.getVTList(ContainerVT, MVT::Other), 7795 Chain, Src, Mask, VL); 7796 if (VT.isFixedLengthVector()) { 7797 // StrictFP operations have two result values. Their lowered result should 7798 // have same result count. 7799 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget); 7800 Res = DAG.getMergeValues({SubVec, Res.getValue(1)}, DL); 7801 } 7802 return Res; 7803} 7804 7805SDValue 7806RISCVTargetLowering::lowerVectorFPExtendOrRoundLike(SDValue Op, 7807 SelectionDAG &DAG) const { 7808 bool IsVP = 7809 Op.getOpcode() == ISD::VP_FP_ROUND || Op.getOpcode() == ISD::VP_FP_EXTEND; 7810 bool IsExtend = 7811 Op.getOpcode() == ISD::VP_FP_EXTEND || Op.getOpcode() == ISD::FP_EXTEND; 7812 // RVV can only do truncate fp to types half the size as the source. We 7813 // custom-lower f64->f16 rounds via RVV's round-to-odd float 7814 // conversion instruction. 7815 SDLoc DL(Op); 7816 MVT VT = Op.getSimpleValueType(); 7817 7818 assert(VT.isVector() && "Unexpected type for vector truncate lowering"); 7819 7820 SDValue Src = Op.getOperand(0); 7821 MVT SrcVT = Src.getSimpleValueType(); 7822 7823 bool IsDirectExtend = IsExtend && (VT.getVectorElementType() != MVT::f64 || 7824 SrcVT.getVectorElementType() != MVT::f16); 7825 bool IsDirectTrunc = !IsExtend && (VT.getVectorElementType() != MVT::f16 || 7826 SrcVT.getVectorElementType() != MVT::f64); 7827 7828 bool IsDirectConv = IsDirectExtend || IsDirectTrunc; 7829 7830 // Prepare any fixed-length vector operands. 7831 MVT ContainerVT = VT; 7832 SDValue Mask, VL; 7833 if (IsVP) { 7834 Mask = Op.getOperand(1); 7835 VL = Op.getOperand(2); 7836 } 7837 if (VT.isFixedLengthVector()) { 7838 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT); 7839 ContainerVT = 7840 SrcContainerVT.changeVectorElementType(VT.getVectorElementType()); 7841 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget); 7842 if (IsVP) { 7843 MVT MaskVT = getMaskTypeFor(ContainerVT); 7844 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); 7845 } 7846 } 7847 7848 if (!IsVP) 7849 std::tie(Mask, VL) = 7850 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget); 7851 7852 unsigned ConvOpc = IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::FP_ROUND_VL; 7853 7854 if (IsDirectConv) { 7855 Src = DAG.getNode(ConvOpc, DL, ContainerVT, Src, Mask, VL); 7856 if (VT.isFixedLengthVector()) 7857 Src = convertFromScalableVector(VT, Src, DAG, Subtarget); 7858 return Src; 7859 } 7860 7861 unsigned InterConvOpc = 7862 IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::VFNCVT_ROD_VL; 7863 7864 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32); 7865 SDValue IntermediateConv = 7866 DAG.getNode(InterConvOpc, DL, InterVT, Src, Mask, VL); 7867 SDValue Result = 7868 DAG.getNode(ConvOpc, DL, ContainerVT, IntermediateConv, Mask, VL); 7869 if (VT.isFixedLengthVector()) 7870 return convertFromScalableVector(VT, Result, DAG, Subtarget); 7871 return Result; 7872} 7873 7874// Given a scalable vector type and an index into it, returns the type for the 7875// smallest subvector that the index fits in. This can be used to reduce LMUL 7876// for operations like vslidedown. 7877// 7878// E.g. With Zvl128b, index 3 in a nxv4i32 fits within the first nxv2i32. 7879static std::optional<MVT> 7880getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG, 7881 const RISCVSubtarget &Subtarget) { 7882 assert(VecVT.isScalableVector()); 7883 const unsigned EltSize = VecVT.getScalarSizeInBits(); 7884 const unsigned VectorBitsMin = Subtarget.getRealMinVLen(); 7885 const unsigned MinVLMAX = VectorBitsMin / EltSize; 7886 MVT SmallerVT; 7887 if (MaxIdx < MinVLMAX) 7888 SmallerVT = getLMUL1VT(VecVT); 7889 else if (MaxIdx < MinVLMAX * 2) 7890 SmallerVT = getLMUL1VT(VecVT).getDoubleNumVectorElementsVT(); 7891 else if (MaxIdx < MinVLMAX * 4) 7892 SmallerVT = getLMUL1VT(VecVT) 7893 .getDoubleNumVectorElementsVT() 7894 .getDoubleNumVectorElementsVT(); 7895 if (!SmallerVT.isValid() || !VecVT.bitsGT(SmallerVT)) 7896 return std::nullopt; 7897 return SmallerVT; 7898} 7899 7900// Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the 7901// first position of a vector, and that vector is slid up to the insert index. 7902// By limiting the active vector length to index+1 and merging with the 7903// original vector (with an undisturbed tail policy for elements >= VL), we 7904// achieve the desired result of leaving all elements untouched except the one 7905// at VL-1, which is replaced with the desired value. 7906SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, 7907 SelectionDAG &DAG) const { 7908 SDLoc DL(Op); 7909 MVT VecVT = Op.getSimpleValueType(); 7910 SDValue Vec = Op.getOperand(0); 7911 SDValue Val = Op.getOperand(1); 7912 SDValue Idx = Op.getOperand(2); 7913 7914 if (VecVT.getVectorElementType() == MVT::i1) { 7915 // FIXME: For now we just promote to an i8 vector and insert into that, 7916 // but this is probably not optimal. 7917 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount()); 7918 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec); 7919 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideVT, Vec, Val, Idx); 7920 return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Vec); 7921 } 7922 7923 MVT ContainerVT = VecVT; 7924 // If the operand is a fixed-length vector, convert to a scalable one. 7925 if (VecVT.isFixedLengthVector()) { 7926 ContainerVT = getContainerForFixedLengthVector(VecVT); 7927 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 7928 } 7929 7930 // If we know the index we're going to insert at, we can shrink Vec so that 7931 // we're performing the scalar inserts and slideup on a smaller LMUL. 7932 MVT OrigContainerVT = ContainerVT; 7933 SDValue OrigVec = Vec; 7934 SDValue AlignedIdx; 7935 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx)) { 7936 const unsigned OrigIdx = IdxC->getZExtValue(); 7937 // Do we know an upper bound on LMUL? 7938 if (auto ShrunkVT = getSmallestVTForIndex(ContainerVT, OrigIdx, 7939 DL, DAG, Subtarget)) { 7940 ContainerVT = *ShrunkVT; 7941 AlignedIdx = DAG.getVectorIdxConstant(0, DL); 7942 } 7943 7944 // If we're compiling for an exact VLEN value, we can always perform 7945 // the insert in m1 as we can determine the register corresponding to 7946 // the index in the register group. 7947 const unsigned MinVLen = Subtarget.getRealMinVLen(); 7948 const unsigned MaxVLen = Subtarget.getRealMaxVLen(); 7949 const MVT M1VT = getLMUL1VT(ContainerVT); 7950 if (MinVLen == MaxVLen && ContainerVT.bitsGT(M1VT)) { 7951 EVT ElemVT = VecVT.getVectorElementType(); 7952 unsigned ElemsPerVReg = MinVLen / ElemVT.getFixedSizeInBits(); 7953 unsigned RemIdx = OrigIdx % ElemsPerVReg; 7954 unsigned SubRegIdx = OrigIdx / ElemsPerVReg; 7955 unsigned ExtractIdx = 7956 SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue(); 7957 AlignedIdx = DAG.getVectorIdxConstant(ExtractIdx, DL); 7958 Idx = DAG.getVectorIdxConstant(RemIdx, DL); 7959 ContainerVT = M1VT; 7960 } 7961 7962 if (AlignedIdx) 7963 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec, 7964 AlignedIdx); 7965 } 7966 7967 MVT XLenVT = Subtarget.getXLenVT(); 7968 7969 bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64; 7970 // Even i64-element vectors on RV32 can be lowered without scalar 7971 // legalization if the most-significant 32 bits of the value are not affected 7972 // by the sign-extension of the lower 32 bits. 7973 // TODO: We could also catch sign extensions of a 32-bit value. 7974 if (!IsLegalInsert && isa<ConstantSDNode>(Val)) { 7975 const auto *CVal = cast<ConstantSDNode>(Val); 7976 if (isInt<32>(CVal->getSExtValue())) { 7977 IsLegalInsert = true; 7978 Val = DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32); 7979 } 7980 } 7981 7982 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); 7983 7984 SDValue ValInVec; 7985 7986 if (IsLegalInsert) { 7987 unsigned Opc = 7988 VecVT.isFloatingPoint() ? RISCVISD::VFMV_S_F_VL : RISCVISD::VMV_S_X_VL; 7989 if (isNullConstant(Idx)) { 7990 if (!VecVT.isFloatingPoint()) 7991 Val = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Val); 7992 Vec = DAG.getNode(Opc, DL, ContainerVT, Vec, Val, VL); 7993 7994 if (AlignedIdx) 7995 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec, 7996 Vec, AlignedIdx); 7997 if (!VecVT.isFixedLengthVector()) 7998 return Vec; 7999 return convertFromScalableVector(VecVT, Vec, DAG, Subtarget); 8000 } 8001 ValInVec = lowerScalarInsert(Val, VL, ContainerVT, DL, DAG, Subtarget); 8002 } else { 8003 // On RV32, i64-element vectors must be specially handled to place the 8004 // value at element 0, by using two vslide1down instructions in sequence on 8005 // the i32 split lo/hi value. Use an equivalently-sized i32 vector for 8006 // this. 8007 SDValue ValLo, ValHi; 8008 std::tie(ValLo, ValHi) = DAG.SplitScalar(Val, DL, MVT::i32, MVT::i32); 8009 MVT I32ContainerVT = 8010 MVT::getVectorVT(MVT::i32, ContainerVT.getVectorElementCount() * 2); 8011 SDValue I32Mask = 8012 getDefaultScalableVLOps(I32ContainerVT, DL, DAG, Subtarget).first; 8013 // Limit the active VL to two. 8014 SDValue InsertI64VL = DAG.getConstant(2, DL, XLenVT); 8015 // If the Idx is 0 we can insert directly into the vector. 8016 if (isNullConstant(Idx)) { 8017 // First slide in the lo value, then the hi in above it. We use slide1down 8018 // to avoid the register group overlap constraint of vslide1up. 8019 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT, 8020 Vec, Vec, ValLo, I32Mask, InsertI64VL); 8021 // If the source vector is undef don't pass along the tail elements from 8022 // the previous slide1down. 8023 SDValue Tail = Vec.isUndef() ? Vec : ValInVec; 8024 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT, 8025 Tail, ValInVec, ValHi, I32Mask, InsertI64VL); 8026 // Bitcast back to the right container type. 8027 ValInVec = DAG.getBitcast(ContainerVT, ValInVec); 8028 8029 if (AlignedIdx) 8030 ValInVec = 8031 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec, 8032 ValInVec, AlignedIdx); 8033 if (!VecVT.isFixedLengthVector()) 8034 return ValInVec; 8035 return convertFromScalableVector(VecVT, ValInVec, DAG, Subtarget); 8036 } 8037 8038 // First slide in the lo value, then the hi in above it. We use slide1down 8039 // to avoid the register group overlap constraint of vslide1up. 8040 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT, 8041 DAG.getUNDEF(I32ContainerVT), 8042 DAG.getUNDEF(I32ContainerVT), ValLo, 8043 I32Mask, InsertI64VL); 8044 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT, 8045 DAG.getUNDEF(I32ContainerVT), ValInVec, ValHi, 8046 I32Mask, InsertI64VL); 8047 // Bitcast back to the right container type. 8048 ValInVec = DAG.getBitcast(ContainerVT, ValInVec); 8049 } 8050 8051 // Now that the value is in a vector, slide it into position. 8052 SDValue InsertVL = 8053 DAG.getNode(ISD::ADD, DL, XLenVT, Idx, DAG.getConstant(1, DL, XLenVT)); 8054 8055 // Use tail agnostic policy if Idx is the last index of Vec. 8056 unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED; 8057 if (VecVT.isFixedLengthVector() && isa<ConstantSDNode>(Idx) && 8058 Idx->getAsZExtVal() + 1 == VecVT.getVectorNumElements()) 8059 Policy = RISCVII::TAIL_AGNOSTIC; 8060 SDValue Slideup = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, ValInVec, 8061 Idx, Mask, InsertVL, Policy); 8062 8063 if (AlignedIdx) 8064 Slideup = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec, 8065 Slideup, AlignedIdx); 8066 if (!VecVT.isFixedLengthVector()) 8067 return Slideup; 8068 return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget); 8069} 8070 8071// Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then 8072// extract the first element: (extractelt (slidedown vec, idx), 0). For integer 8073// types this is done using VMV_X_S to allow us to glean information about the 8074// sign bits of the result. 8075SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, 8076 SelectionDAG &DAG) const { 8077 SDLoc DL(Op); 8078 SDValue Idx = Op.getOperand(1); 8079 SDValue Vec = Op.getOperand(0); 8080 EVT EltVT = Op.getValueType(); 8081 MVT VecVT = Vec.getSimpleValueType(); 8082 MVT XLenVT = Subtarget.getXLenVT(); 8083 8084 if (VecVT.getVectorElementType() == MVT::i1) { 8085 // Use vfirst.m to extract the first bit. 8086 if (isNullConstant(Idx)) { 8087 MVT ContainerVT = VecVT; 8088 if (VecVT.isFixedLengthVector()) { 8089 ContainerVT = getContainerForFixedLengthVector(VecVT); 8090 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 8091 } 8092 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); 8093 SDValue Vfirst = 8094 DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Vec, Mask, VL); 8095 SDValue Res = DAG.getSetCC(DL, XLenVT, Vfirst, 8096 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ); 8097 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res); 8098 } 8099 if (VecVT.isFixedLengthVector()) { 8100 unsigned NumElts = VecVT.getVectorNumElements(); 8101 if (NumElts >= 8) { 8102 MVT WideEltVT; 8103 unsigned WidenVecLen; 8104 SDValue ExtractElementIdx; 8105 SDValue ExtractBitIdx; 8106 unsigned MaxEEW = Subtarget.getELen(); 8107 MVT LargestEltVT = MVT::getIntegerVT( 8108 std::min(MaxEEW, unsigned(XLenVT.getSizeInBits()))); 8109 if (NumElts <= LargestEltVT.getSizeInBits()) { 8110 assert(isPowerOf2_32(NumElts) && 8111 "the number of elements should be power of 2"); 8112 WideEltVT = MVT::getIntegerVT(NumElts); 8113 WidenVecLen = 1; 8114 ExtractElementIdx = DAG.getConstant(0, DL, XLenVT); 8115 ExtractBitIdx = Idx; 8116 } else { 8117 WideEltVT = LargestEltVT; 8118 WidenVecLen = NumElts / WideEltVT.getSizeInBits(); 8119 // extract element index = index / element width 8120 ExtractElementIdx = DAG.getNode( 8121 ISD::SRL, DL, XLenVT, Idx, 8122 DAG.getConstant(Log2_64(WideEltVT.getSizeInBits()), DL, XLenVT)); 8123 // mask bit index = index % element width 8124 ExtractBitIdx = DAG.getNode( 8125 ISD::AND, DL, XLenVT, Idx, 8126 DAG.getConstant(WideEltVT.getSizeInBits() - 1, DL, XLenVT)); 8127 } 8128 MVT WideVT = MVT::getVectorVT(WideEltVT, WidenVecLen); 8129 Vec = DAG.getNode(ISD::BITCAST, DL, WideVT, Vec); 8130 SDValue ExtractElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT, 8131 Vec, ExtractElementIdx); 8132 // Extract the bit from GPR. 8133 SDValue ShiftRight = 8134 DAG.getNode(ISD::SRL, DL, XLenVT, ExtractElt, ExtractBitIdx); 8135 SDValue Res = DAG.getNode(ISD::AND, DL, XLenVT, ShiftRight, 8136 DAG.getConstant(1, DL, XLenVT)); 8137 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res); 8138 } 8139 } 8140 // Otherwise, promote to an i8 vector and extract from that. 8141 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount()); 8142 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec); 8143 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, Idx); 8144 } 8145 8146 // If this is a fixed vector, we need to convert it to a scalable vector. 8147 MVT ContainerVT = VecVT; 8148 if (VecVT.isFixedLengthVector()) { 8149 ContainerVT = getContainerForFixedLengthVector(VecVT); 8150 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 8151 } 8152 8153 // If we're compiling for an exact VLEN value and we have a known 8154 // constant index, we can always perform the extract in m1 (or 8155 // smaller) as we can determine the register corresponding to 8156 // the index in the register group. 8157 const unsigned MinVLen = Subtarget.getRealMinVLen(); 8158 const unsigned MaxVLen = Subtarget.getRealMaxVLen(); 8159 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx); 8160 IdxC && MinVLen == MaxVLen && 8161 VecVT.getSizeInBits().getKnownMinValue() > MinVLen) { 8162 MVT M1VT = getLMUL1VT(ContainerVT); 8163 unsigned OrigIdx = IdxC->getZExtValue(); 8164 EVT ElemVT = VecVT.getVectorElementType(); 8165 unsigned ElemsPerVReg = MinVLen / ElemVT.getFixedSizeInBits(); 8166 unsigned RemIdx = OrigIdx % ElemsPerVReg; 8167 unsigned SubRegIdx = OrigIdx / ElemsPerVReg; 8168 unsigned ExtractIdx = 8169 SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue(); 8170 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Vec, 8171 DAG.getVectorIdxConstant(ExtractIdx, DL)); 8172 Idx = DAG.getVectorIdxConstant(RemIdx, DL); 8173 ContainerVT = M1VT; 8174 } 8175 8176 // Reduce the LMUL of our slidedown and vmv.x.s to the smallest LMUL which 8177 // contains our index. 8178 std::optional<uint64_t> MaxIdx; 8179 if (VecVT.isFixedLengthVector()) 8180 MaxIdx = VecVT.getVectorNumElements() - 1; 8181 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx)) 8182 MaxIdx = IdxC->getZExtValue(); 8183 if (MaxIdx) { 8184 if (auto SmallerVT = 8185 getSmallestVTForIndex(ContainerVT, *MaxIdx, DL, DAG, Subtarget)) { 8186 ContainerVT = *SmallerVT; 8187 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec, 8188 DAG.getConstant(0, DL, XLenVT)); 8189 } 8190 } 8191 8192 // If after narrowing, the required slide is still greater than LMUL2, 8193 // fallback to generic expansion and go through the stack. This is done 8194 // for a subtle reason: extracting *all* elements out of a vector is 8195 // widely expected to be linear in vector size, but because vslidedown 8196 // is linear in LMUL, performing N extracts using vslidedown becomes 8197 // O(n^2) / (VLEN/ETYPE) work. On the surface, going through the stack 8198 // seems to have the same problem (the store is linear in LMUL), but the 8199 // generic expansion *memoizes* the store, and thus for many extracts of 8200 // the same vector we end up with one store and a bunch of loads. 8201 // TODO: We don't have the same code for insert_vector_elt because we 8202 // have BUILD_VECTOR and handle the degenerate case there. Should we 8203 // consider adding an inverse BUILD_VECTOR node? 8204 MVT LMUL2VT = getLMUL1VT(ContainerVT).getDoubleNumVectorElementsVT(); 8205 if (ContainerVT.bitsGT(LMUL2VT) && VecVT.isFixedLengthVector()) 8206 return SDValue(); 8207 8208 // If the index is 0, the vector is already in the right position. 8209 if (!isNullConstant(Idx)) { 8210 // Use a VL of 1 to avoid processing more elements than we need. 8211 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget); 8212 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, 8213 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL); 8214 } 8215 8216 if (!EltVT.isInteger()) { 8217 // Floating-point extracts are handled in TableGen. 8218 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, 8219 DAG.getConstant(0, DL, XLenVT)); 8220 } 8221 8222 SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec); 8223 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0); 8224} 8225 8226// Some RVV intrinsics may claim that they want an integer operand to be 8227// promoted or expanded. 8228static SDValue lowerVectorIntrinsicScalars(SDValue Op, SelectionDAG &DAG, 8229 const RISCVSubtarget &Subtarget) { 8230 assert((Op.getOpcode() == ISD::INTRINSIC_VOID || 8231 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || 8232 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) && 8233 "Unexpected opcode"); 8234 8235 if (!Subtarget.hasVInstructions()) 8236 return SDValue(); 8237 8238 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID || 8239 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN; 8240 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0); 8241 8242 SDLoc DL(Op); 8243 8244 const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II = 8245 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo); 8246 if (!II || !II->hasScalarOperand()) 8247 return SDValue(); 8248 8249 unsigned SplatOp = II->ScalarOperand + 1 + HasChain; 8250 assert(SplatOp < Op.getNumOperands()); 8251 8252 SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end()); 8253 SDValue &ScalarOp = Operands[SplatOp]; 8254 MVT OpVT = ScalarOp.getSimpleValueType(); 8255 MVT XLenVT = Subtarget.getXLenVT(); 8256 8257 // If this isn't a scalar, or its type is XLenVT we're done. 8258 if (!OpVT.isScalarInteger() || OpVT == XLenVT) 8259 return SDValue(); 8260 8261 // Simplest case is that the operand needs to be promoted to XLenVT. 8262 if (OpVT.bitsLT(XLenVT)) { 8263 // If the operand is a constant, sign extend to increase our chances 8264 // of being able to use a .vi instruction. ANY_EXTEND would become a 8265 // a zero extend and the simm5 check in isel would fail. 8266 // FIXME: Should we ignore the upper bits in isel instead? 8267 unsigned ExtOpc = 8268 isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND; 8269 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp); 8270 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands); 8271 } 8272 8273 // Use the previous operand to get the vXi64 VT. The result might be a mask 8274 // VT for compares. Using the previous operand assumes that the previous 8275 // operand will never have a smaller element size than a scalar operand and 8276 // that a widening operation never uses SEW=64. 8277 // NOTE: If this fails the below assert, we can probably just find the 8278 // element count from any operand or result and use it to construct the VT. 8279 assert(II->ScalarOperand > 0 && "Unexpected splat operand!"); 8280 MVT VT = Op.getOperand(SplatOp - 1).getSimpleValueType(); 8281 8282 // The more complex case is when the scalar is larger than XLenVT. 8283 assert(XLenVT == MVT::i32 && OpVT == MVT::i64 && 8284 VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!"); 8285 8286 // If this is a sign-extended 32-bit value, we can truncate it and rely on the 8287 // instruction to sign-extend since SEW>XLEN. 8288 if (DAG.ComputeNumSignBits(ScalarOp) > 32) { 8289 ScalarOp = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, ScalarOp); 8290 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands); 8291 } 8292 8293 switch (IntNo) { 8294 case Intrinsic::riscv_vslide1up: 8295 case Intrinsic::riscv_vslide1down: 8296 case Intrinsic::riscv_vslide1up_mask: 8297 case Intrinsic::riscv_vslide1down_mask: { 8298 // We need to special case these when the scalar is larger than XLen. 8299 unsigned NumOps = Op.getNumOperands(); 8300 bool IsMasked = NumOps == 7; 8301 8302 // Convert the vector source to the equivalent nxvXi32 vector. 8303 MVT I32VT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2); 8304 SDValue Vec = DAG.getBitcast(I32VT, Operands[2]); 8305 SDValue ScalarLo, ScalarHi; 8306 std::tie(ScalarLo, ScalarHi) = 8307 DAG.SplitScalar(ScalarOp, DL, MVT::i32, MVT::i32); 8308 8309 // Double the VL since we halved SEW. 8310 SDValue AVL = getVLOperand(Op); 8311 SDValue I32VL; 8312 8313 // Optimize for constant AVL 8314 if (isa<ConstantSDNode>(AVL)) { 8315 const auto [MinVLMAX, MaxVLMAX] = 8316 RISCVTargetLowering::computeVLMAXBounds(VT, Subtarget); 8317 8318 uint64_t AVLInt = AVL->getAsZExtVal(); 8319 if (AVLInt <= MinVLMAX) { 8320 I32VL = DAG.getConstant(2 * AVLInt, DL, XLenVT); 8321 } else if (AVLInt >= 2 * MaxVLMAX) { 8322 // Just set vl to VLMAX in this situation 8323 RISCVII::VLMUL Lmul = RISCVTargetLowering::getLMUL(I32VT); 8324 SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT); 8325 unsigned Sew = RISCVVType::encodeSEW(I32VT.getScalarSizeInBits()); 8326 SDValue SEW = DAG.getConstant(Sew, DL, XLenVT); 8327 SDValue SETVLMAX = DAG.getTargetConstant( 8328 Intrinsic::riscv_vsetvlimax, DL, MVT::i32); 8329 I32VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVLMAX, SEW, 8330 LMUL); 8331 } else { 8332 // For AVL between (MinVLMAX, 2 * MaxVLMAX), the actual working vl 8333 // is related to the hardware implementation. 8334 // So let the following code handle 8335 } 8336 } 8337 if (!I32VL) { 8338 RISCVII::VLMUL Lmul = RISCVTargetLowering::getLMUL(VT); 8339 SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT); 8340 unsigned Sew = RISCVVType::encodeSEW(VT.getScalarSizeInBits()); 8341 SDValue SEW = DAG.getConstant(Sew, DL, XLenVT); 8342 SDValue SETVL = 8343 DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, MVT::i32); 8344 // Using vsetvli instruction to get actually used length which related to 8345 // the hardware implementation 8346 SDValue VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVL, AVL, 8347 SEW, LMUL); 8348 I32VL = 8349 DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT)); 8350 } 8351 8352 SDValue I32Mask = getAllOnesMask(I32VT, I32VL, DL, DAG); 8353 8354 // Shift the two scalar parts in using SEW=32 slide1up/slide1down 8355 // instructions. 8356 SDValue Passthru; 8357 if (IsMasked) 8358 Passthru = DAG.getUNDEF(I32VT); 8359 else 8360 Passthru = DAG.getBitcast(I32VT, Operands[1]); 8361 8362 if (IntNo == Intrinsic::riscv_vslide1up || 8363 IntNo == Intrinsic::riscv_vslide1up_mask) { 8364 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec, 8365 ScalarHi, I32Mask, I32VL); 8366 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec, 8367 ScalarLo, I32Mask, I32VL); 8368 } else { 8369 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec, 8370 ScalarLo, I32Mask, I32VL); 8371 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec, 8372 ScalarHi, I32Mask, I32VL); 8373 } 8374 8375 // Convert back to nxvXi64. 8376 Vec = DAG.getBitcast(VT, Vec); 8377 8378 if (!IsMasked) 8379 return Vec; 8380 // Apply mask after the operation. 8381 SDValue Mask = Operands[NumOps - 3]; 8382 SDValue MaskedOff = Operands[1]; 8383 // Assume Policy operand is the last operand. 8384 uint64_t Policy = Operands[NumOps - 1]->getAsZExtVal(); 8385 // We don't need to select maskedoff if it's undef. 8386 if (MaskedOff.isUndef()) 8387 return Vec; 8388 // TAMU 8389 if (Policy == RISCVII::TAIL_AGNOSTIC) 8390 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff, 8391 DAG.getUNDEF(VT), AVL); 8392 // TUMA or TUMU: Currently we always emit tumu policy regardless of tuma. 8393 // It's fine because vmerge does not care mask policy. 8394 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff, 8395 MaskedOff, AVL); 8396 } 8397 } 8398 8399 // We need to convert the scalar to a splat vector. 8400 SDValue VL = getVLOperand(Op); 8401 assert(VL.getValueType() == XLenVT); 8402 ScalarOp = splatSplitI64WithVL(DL, VT, SDValue(), ScalarOp, VL, DAG); 8403 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands); 8404} 8405 8406// Lower the llvm.get.vector.length intrinsic to vsetvli. We only support 8407// scalable vector llvm.get.vector.length for now. 8408// 8409// We need to convert from a scalable VF to a vsetvli with VLMax equal to 8410// (vscale * VF). The vscale and VF are independent of element width. We use 8411// SEW=8 for the vsetvli because it is the only element width that supports all 8412// fractional LMULs. The LMUL is choosen so that with SEW=8 the VLMax is 8413// (vscale * VF). Where vscale is defined as VLEN/RVVBitsPerBlock. The 8414// InsertVSETVLI pass can fix up the vtype of the vsetvli if a different 8415// SEW and LMUL are better for the surrounding vector instructions. 8416static SDValue lowerGetVectorLength(SDNode *N, SelectionDAG &DAG, 8417 const RISCVSubtarget &Subtarget) { 8418 MVT XLenVT = Subtarget.getXLenVT(); 8419 8420 // The smallest LMUL is only valid for the smallest element width. 8421 const unsigned ElementWidth = 8; 8422 8423 // Determine the VF that corresponds to LMUL 1 for ElementWidth. 8424 unsigned LMul1VF = RISCV::RVVBitsPerBlock / ElementWidth; 8425 // We don't support VF==1 with ELEN==32. 8426 unsigned MinVF = RISCV::RVVBitsPerBlock / Subtarget.getELen(); 8427 8428 unsigned VF = N->getConstantOperandVal(2); 8429 assert(VF >= MinVF && VF <= (LMul1VF * 8) && isPowerOf2_32(VF) && 8430 "Unexpected VF"); 8431 (void)MinVF; 8432 8433 bool Fractional = VF < LMul1VF; 8434 unsigned LMulVal = Fractional ? LMul1VF / VF : VF / LMul1VF; 8435 unsigned VLMUL = (unsigned)RISCVVType::encodeLMUL(LMulVal, Fractional); 8436 unsigned VSEW = RISCVVType::encodeSEW(ElementWidth); 8437 8438 SDLoc DL(N); 8439 8440 SDValue LMul = DAG.getTargetConstant(VLMUL, DL, XLenVT); 8441 SDValue Sew = DAG.getTargetConstant(VSEW, DL, XLenVT); 8442 8443 SDValue AVL = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, N->getOperand(1)); 8444 8445 SDValue ID = DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, XLenVT); 8446 SDValue Res = 8447 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, ID, AVL, Sew, LMul); 8448 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res); 8449} 8450 8451static void getVCIXOperands(SDValue &Op, SelectionDAG &DAG, 8452 SmallVector<SDValue> &Ops) { 8453 SDLoc DL(Op); 8454 8455 const RISCVSubtarget &Subtarget = 8456 DAG.getMachineFunction().getSubtarget<RISCVSubtarget>(); 8457 for (const SDValue &V : Op->op_values()) { 8458 EVT ValType = V.getValueType(); 8459 if (ValType.isScalableVector() && ValType.isFloatingPoint()) { 8460 MVT InterimIVT = 8461 MVT::getVectorVT(MVT::getIntegerVT(ValType.getScalarSizeInBits()), 8462 ValType.getVectorElementCount()); 8463 Ops.push_back(DAG.getBitcast(InterimIVT, V)); 8464 } else if (ValType.isFixedLengthVector()) { 8465 MVT OpContainerVT = getContainerForFixedLengthVector( 8466 DAG, V.getSimpleValueType(), Subtarget); 8467 Ops.push_back(convertToScalableVector(OpContainerVT, V, DAG, Subtarget)); 8468 } else 8469 Ops.push_back(V); 8470 } 8471} 8472 8473// LMUL * VLEN should be greater than or equal to EGS * SEW 8474static inline bool isValidEGW(int EGS, EVT VT, 8475 const RISCVSubtarget &Subtarget) { 8476 return (Subtarget.getRealMinVLen() * 8477 VT.getSizeInBits().getKnownMinValue()) / RISCV::RVVBitsPerBlock >= 8478 EGS * VT.getScalarSizeInBits(); 8479} 8480 8481SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, 8482 SelectionDAG &DAG) const { 8483 unsigned IntNo = Op.getConstantOperandVal(0); 8484 SDLoc DL(Op); 8485 MVT XLenVT = Subtarget.getXLenVT(); 8486 8487 switch (IntNo) { 8488 default: 8489 break; // Don't custom lower most intrinsics. 8490 case Intrinsic::thread_pointer: { 8491 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 8492 return DAG.getRegister(RISCV::X4, PtrVT); 8493 } 8494 case Intrinsic::riscv_orc_b: 8495 case Intrinsic::riscv_brev8: 8496 case Intrinsic::riscv_sha256sig0: 8497 case Intrinsic::riscv_sha256sig1: 8498 case Intrinsic::riscv_sha256sum0: 8499 case Intrinsic::riscv_sha256sum1: 8500 case Intrinsic::riscv_sm3p0: 8501 case Intrinsic::riscv_sm3p1: { 8502 unsigned Opc; 8503 switch (IntNo) { 8504 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break; 8505 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break; 8506 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break; 8507 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break; 8508 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break; 8509 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break; 8510 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break; 8511 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break; 8512 } 8513 8514 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) { 8515 SDValue NewOp = 8516 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1)); 8517 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp); 8518 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res); 8519 } 8520 8521 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1)); 8522 } 8523 case Intrinsic::riscv_sm4ks: 8524 case Intrinsic::riscv_sm4ed: { 8525 unsigned Opc = 8526 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED; 8527 8528 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) { 8529 SDValue NewOp0 = 8530 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1)); 8531 SDValue NewOp1 = 8532 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2)); 8533 SDValue Res = 8534 DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, Op.getOperand(3)); 8535 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res); 8536 } 8537 8538 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2), 8539 Op.getOperand(3)); 8540 } 8541 case Intrinsic::riscv_zip: 8542 case Intrinsic::riscv_unzip: { 8543 unsigned Opc = 8544 IntNo == Intrinsic::riscv_zip ? RISCVISD::ZIP : RISCVISD::UNZIP; 8545 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1)); 8546 } 8547 case Intrinsic::riscv_clmul: 8548 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) { 8549 SDValue NewOp0 = 8550 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1)); 8551 SDValue NewOp1 = 8552 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2)); 8553 SDValue Res = DAG.getNode(RISCVISD::CLMUL, DL, MVT::i64, NewOp0, NewOp1); 8554 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res); 8555 } 8556 return DAG.getNode(RISCVISD::CLMUL, DL, XLenVT, Op.getOperand(1), 8557 Op.getOperand(2)); 8558 case Intrinsic::riscv_clmulh: 8559 case Intrinsic::riscv_clmulr: { 8560 unsigned Opc = 8561 IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH : RISCVISD::CLMULR; 8562 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) { 8563 SDValue NewOp0 = 8564 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1)); 8565 SDValue NewOp1 = 8566 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2)); 8567 NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0, 8568 DAG.getConstant(32, DL, MVT::i64)); 8569 NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1, 8570 DAG.getConstant(32, DL, MVT::i64)); 8571 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1); 8572 Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res, 8573 DAG.getConstant(32, DL, MVT::i64)); 8574 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res); 8575 } 8576 8577 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2)); 8578 } 8579 case Intrinsic::experimental_get_vector_length: 8580 return lowerGetVectorLength(Op.getNode(), DAG, Subtarget); 8581 case Intrinsic::riscv_vmv_x_s: { 8582 SDValue Res = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Op.getOperand(1)); 8583 return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res); 8584 } 8585 case Intrinsic::riscv_vfmv_f_s: 8586 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(), 8587 Op.getOperand(1), DAG.getConstant(0, DL, XLenVT)); 8588 case Intrinsic::riscv_vmv_v_x: 8589 return lowerScalarSplat(Op.getOperand(1), Op.getOperand(2), 8590 Op.getOperand(3), Op.getSimpleValueType(), DL, DAG, 8591 Subtarget); 8592 case Intrinsic::riscv_vfmv_v_f: 8593 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(), 8594 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); 8595 case Intrinsic::riscv_vmv_s_x: { 8596 SDValue Scalar = Op.getOperand(2); 8597 8598 if (Scalar.getValueType().bitsLE(XLenVT)) { 8599 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Scalar); 8600 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, Op.getValueType(), 8601 Op.getOperand(1), Scalar, Op.getOperand(3)); 8602 } 8603 8604 assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!"); 8605 8606 // This is an i64 value that lives in two scalar registers. We have to 8607 // insert this in a convoluted way. First we build vXi64 splat containing 8608 // the two values that we assemble using some bit math. Next we'll use 8609 // vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask 8610 // to merge element 0 from our splat into the source vector. 8611 // FIXME: This is probably not the best way to do this, but it is 8612 // consistent with INSERT_VECTOR_ELT lowering so it is a good starting 8613 // point. 8614 // sw lo, (a0) 8615 // sw hi, 4(a0) 8616 // vlse vX, (a0) 8617 // 8618 // vid.v vVid 8619 // vmseq.vx mMask, vVid, 0 8620 // vmerge.vvm vDest, vSrc, vVal, mMask 8621 MVT VT = Op.getSimpleValueType(); 8622 SDValue Vec = Op.getOperand(1); 8623 SDValue VL = getVLOperand(Op); 8624 8625 SDValue SplattedVal = splatSplitI64WithVL(DL, VT, SDValue(), Scalar, VL, DAG); 8626 if (Op.getOperand(1).isUndef()) 8627 return SplattedVal; 8628 SDValue SplattedIdx = 8629 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT), 8630 DAG.getConstant(0, DL, MVT::i32), VL); 8631 8632 MVT MaskVT = getMaskTypeFor(VT); 8633 SDValue Mask = getAllOnesMask(VT, VL, DL, DAG); 8634 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL); 8635 SDValue SelectCond = 8636 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, 8637 {VID, SplattedIdx, DAG.getCondCode(ISD::SETEQ), 8638 DAG.getUNDEF(MaskVT), Mask, VL}); 8639 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, SelectCond, SplattedVal, 8640 Vec, DAG.getUNDEF(VT), VL); 8641 } 8642 case Intrinsic::riscv_vfmv_s_f: 8643 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, Op.getSimpleValueType(), 8644 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); 8645 // EGS * EEW >= 128 bits 8646 case Intrinsic::riscv_vaesdf_vv: 8647 case Intrinsic::riscv_vaesdf_vs: 8648 case Intrinsic::riscv_vaesdm_vv: 8649 case Intrinsic::riscv_vaesdm_vs: 8650 case Intrinsic::riscv_vaesef_vv: 8651 case Intrinsic::riscv_vaesef_vs: 8652 case Intrinsic::riscv_vaesem_vv: 8653 case Intrinsic::riscv_vaesem_vs: 8654 case Intrinsic::riscv_vaeskf1: 8655 case Intrinsic::riscv_vaeskf2: 8656 case Intrinsic::riscv_vaesz_vs: 8657 case Intrinsic::riscv_vsm4k: 8658 case Intrinsic::riscv_vsm4r_vv: 8659 case Intrinsic::riscv_vsm4r_vs: { 8660 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) || 8661 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) || 8662 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget)) 8663 report_fatal_error("EGW should be greater than or equal to 4 * SEW."); 8664 return Op; 8665 } 8666 // EGS * EEW >= 256 bits 8667 case Intrinsic::riscv_vsm3c: 8668 case Intrinsic::riscv_vsm3me: { 8669 if (!isValidEGW(8, Op.getSimpleValueType(), Subtarget) || 8670 !isValidEGW(8, Op->getOperand(1).getSimpleValueType(), Subtarget)) 8671 report_fatal_error("EGW should be greater than or equal to 8 * SEW."); 8672 return Op; 8673 } 8674 // zvknha(SEW=32)/zvknhb(SEW=[32|64]) 8675 case Intrinsic::riscv_vsha2ch: 8676 case Intrinsic::riscv_vsha2cl: 8677 case Intrinsic::riscv_vsha2ms: { 8678 if (Op->getSimpleValueType(0).getScalarSizeInBits() == 64 && 8679 !Subtarget.hasStdExtZvknhb()) 8680 report_fatal_error("SEW=64 needs Zvknhb to be enabled."); 8681 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) || 8682 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) || 8683 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget)) 8684 report_fatal_error("EGW should be greater than or equal to 4 * SEW."); 8685 return Op; 8686 } 8687 case Intrinsic::riscv_sf_vc_v_x: 8688 case Intrinsic::riscv_sf_vc_v_i: 8689 case Intrinsic::riscv_sf_vc_v_xv: 8690 case Intrinsic::riscv_sf_vc_v_iv: 8691 case Intrinsic::riscv_sf_vc_v_vv: 8692 case Intrinsic::riscv_sf_vc_v_fv: 8693 case Intrinsic::riscv_sf_vc_v_xvv: 8694 case Intrinsic::riscv_sf_vc_v_ivv: 8695 case Intrinsic::riscv_sf_vc_v_vvv: 8696 case Intrinsic::riscv_sf_vc_v_fvv: 8697 case Intrinsic::riscv_sf_vc_v_xvw: 8698 case Intrinsic::riscv_sf_vc_v_ivw: 8699 case Intrinsic::riscv_sf_vc_v_vvw: 8700 case Intrinsic::riscv_sf_vc_v_fvw: { 8701 MVT VT = Op.getSimpleValueType(); 8702 8703 SmallVector<SDValue> Ops; 8704 getVCIXOperands(Op, DAG, Ops); 8705 8706 MVT RetVT = VT; 8707 if (VT.isFixedLengthVector()) 8708 RetVT = getContainerForFixedLengthVector(VT); 8709 else if (VT.isFloatingPoint()) 8710 RetVT = MVT::getVectorVT(MVT::getIntegerVT(VT.getScalarSizeInBits()), 8711 VT.getVectorElementCount()); 8712 8713 SDValue NewNode = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, RetVT, Ops); 8714 8715 if (VT.isFixedLengthVector()) 8716 NewNode = convertFromScalableVector(VT, NewNode, DAG, Subtarget); 8717 else if (VT.isFloatingPoint()) 8718 NewNode = DAG.getBitcast(VT, NewNode); 8719 8720 if (Op == NewNode) 8721 break; 8722 8723 return NewNode; 8724 } 8725 } 8726 8727 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget); 8728} 8729 8730SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, 8731 SelectionDAG &DAG) const { 8732 unsigned IntNo = Op.getConstantOperandVal(1); 8733 switch (IntNo) { 8734 default: 8735 break; 8736 case Intrinsic::riscv_masked_strided_load: { 8737 SDLoc DL(Op); 8738 MVT XLenVT = Subtarget.getXLenVT(); 8739 8740 // If the mask is known to be all ones, optimize to an unmasked intrinsic; 8741 // the selection of the masked intrinsics doesn't do this for us. 8742 SDValue Mask = Op.getOperand(5); 8743 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode()); 8744 8745 MVT VT = Op->getSimpleValueType(0); 8746 MVT ContainerVT = VT; 8747 if (VT.isFixedLengthVector()) 8748 ContainerVT = getContainerForFixedLengthVector(VT); 8749 8750 SDValue PassThru = Op.getOperand(2); 8751 if (!IsUnmasked) { 8752 MVT MaskVT = getMaskTypeFor(ContainerVT); 8753 if (VT.isFixedLengthVector()) { 8754 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); 8755 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget); 8756 } 8757 } 8758 8759 auto *Load = cast<MemIntrinsicSDNode>(Op); 8760 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second; 8761 SDValue Ptr = Op.getOperand(3); 8762 SDValue Stride = Op.getOperand(4); 8763 SDValue Result, Chain; 8764 8765 // TODO: We restrict this to unmasked loads currently in consideration of 8766 // the complexity of hanlding all falses masks. 8767 if (IsUnmasked && isNullConstant(Stride)) { 8768 MVT ScalarVT = ContainerVT.getVectorElementType(); 8769 SDValue ScalarLoad = 8770 DAG.getExtLoad(ISD::ZEXTLOAD, DL, XLenVT, Load->getChain(), Ptr, 8771 ScalarVT, Load->getMemOperand()); 8772 Chain = ScalarLoad.getValue(1); 8773 Result = lowerScalarSplat(SDValue(), ScalarLoad, VL, ContainerVT, DL, DAG, 8774 Subtarget); 8775 } else { 8776 SDValue IntID = DAG.getTargetConstant( 8777 IsUnmasked ? Intrinsic::riscv_vlse : Intrinsic::riscv_vlse_mask, DL, 8778 XLenVT); 8779 8780 SmallVector<SDValue, 8> Ops{Load->getChain(), IntID}; 8781 if (IsUnmasked) 8782 Ops.push_back(DAG.getUNDEF(ContainerVT)); 8783 else 8784 Ops.push_back(PassThru); 8785 Ops.push_back(Ptr); 8786 Ops.push_back(Stride); 8787 if (!IsUnmasked) 8788 Ops.push_back(Mask); 8789 Ops.push_back(VL); 8790 if (!IsUnmasked) { 8791 SDValue Policy = 8792 DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT); 8793 Ops.push_back(Policy); 8794 } 8795 8796 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other}); 8797 Result = 8798 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, 8799 Load->getMemoryVT(), Load->getMemOperand()); 8800 Chain = Result.getValue(1); 8801 } 8802 if (VT.isFixedLengthVector()) 8803 Result = convertFromScalableVector(VT, Result, DAG, Subtarget); 8804 return DAG.getMergeValues({Result, Chain}, DL); 8805 } 8806 case Intrinsic::riscv_seg2_load: 8807 case Intrinsic::riscv_seg3_load: 8808 case Intrinsic::riscv_seg4_load: 8809 case Intrinsic::riscv_seg5_load: 8810 case Intrinsic::riscv_seg6_load: 8811 case Intrinsic::riscv_seg7_load: 8812 case Intrinsic::riscv_seg8_load: { 8813 SDLoc DL(Op); 8814 static const Intrinsic::ID VlsegInts[7] = { 8815 Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3, 8816 Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5, 8817 Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7, 8818 Intrinsic::riscv_vlseg8}; 8819 unsigned NF = Op->getNumValues() - 1; 8820 assert(NF >= 2 && NF <= 8 && "Unexpected seg number"); 8821 MVT XLenVT = Subtarget.getXLenVT(); 8822 MVT VT = Op->getSimpleValueType(0); 8823 MVT ContainerVT = getContainerForFixedLengthVector(VT); 8824 8825 SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG, 8826 Subtarget); 8827 SDValue IntID = DAG.getTargetConstant(VlsegInts[NF - 2], DL, XLenVT); 8828 auto *Load = cast<MemIntrinsicSDNode>(Op); 8829 SmallVector<EVT, 9> ContainerVTs(NF, ContainerVT); 8830 ContainerVTs.push_back(MVT::Other); 8831 SDVTList VTs = DAG.getVTList(ContainerVTs); 8832 SmallVector<SDValue, 12> Ops = {Load->getChain(), IntID}; 8833 Ops.insert(Ops.end(), NF, DAG.getUNDEF(ContainerVT)); 8834 Ops.push_back(Op.getOperand(2)); 8835 Ops.push_back(VL); 8836 SDValue Result = 8837 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, 8838 Load->getMemoryVT(), Load->getMemOperand()); 8839 SmallVector<SDValue, 9> Results; 8840 for (unsigned int RetIdx = 0; RetIdx < NF; RetIdx++) 8841 Results.push_back(convertFromScalableVector(VT, Result.getValue(RetIdx), 8842 DAG, Subtarget)); 8843 Results.push_back(Result.getValue(NF)); 8844 return DAG.getMergeValues(Results, DL); 8845 } 8846 case Intrinsic::riscv_sf_vc_v_x_se: 8847 case Intrinsic::riscv_sf_vc_v_i_se: 8848 case Intrinsic::riscv_sf_vc_v_xv_se: 8849 case Intrinsic::riscv_sf_vc_v_iv_se: 8850 case Intrinsic::riscv_sf_vc_v_vv_se: 8851 case Intrinsic::riscv_sf_vc_v_fv_se: 8852 case Intrinsic::riscv_sf_vc_v_xvv_se: 8853 case Intrinsic::riscv_sf_vc_v_ivv_se: 8854 case Intrinsic::riscv_sf_vc_v_vvv_se: 8855 case Intrinsic::riscv_sf_vc_v_fvv_se: 8856 case Intrinsic::riscv_sf_vc_v_xvw_se: 8857 case Intrinsic::riscv_sf_vc_v_ivw_se: 8858 case Intrinsic::riscv_sf_vc_v_vvw_se: 8859 case Intrinsic::riscv_sf_vc_v_fvw_se: { 8860 MVT VT = Op.getSimpleValueType(); 8861 SDLoc DL(Op); 8862 SmallVector<SDValue> Ops; 8863 getVCIXOperands(Op, DAG, Ops); 8864 8865 MVT RetVT = VT; 8866 if (VT.isFixedLengthVector()) 8867 RetVT = getContainerForFixedLengthVector(VT); 8868 else if (VT.isFloatingPoint()) 8869 RetVT = MVT::getVectorVT(MVT::getIntegerVT(RetVT.getScalarSizeInBits()), 8870 RetVT.getVectorElementCount()); 8871 8872 SDVTList VTs = DAG.getVTList({RetVT, MVT::Other}); 8873 SDValue NewNode = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops); 8874 8875 if (VT.isFixedLengthVector()) { 8876 SDValue FixedVector = 8877 convertFromScalableVector(VT, NewNode, DAG, Subtarget); 8878 NewNode = DAG.getMergeValues({FixedVector, NewNode.getValue(1)}, DL); 8879 } else if (VT.isFloatingPoint()) { 8880 SDValue BitCast = DAG.getBitcast(VT, NewNode.getValue(0)); 8881 NewNode = DAG.getMergeValues({BitCast, NewNode.getValue(1)}, DL); 8882 } 8883 8884 if (Op == NewNode) 8885 break; 8886 8887 return NewNode; 8888 } 8889 } 8890 8891 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget); 8892} 8893 8894SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op, 8895 SelectionDAG &DAG) const { 8896 unsigned IntNo = Op.getConstantOperandVal(1); 8897 switch (IntNo) { 8898 default: 8899 break; 8900 case Intrinsic::riscv_masked_strided_store: { 8901 SDLoc DL(Op); 8902 MVT XLenVT = Subtarget.getXLenVT(); 8903 8904 // If the mask is known to be all ones, optimize to an unmasked intrinsic; 8905 // the selection of the masked intrinsics doesn't do this for us. 8906 SDValue Mask = Op.getOperand(5); 8907 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode()); 8908 8909 SDValue Val = Op.getOperand(2); 8910 MVT VT = Val.getSimpleValueType(); 8911 MVT ContainerVT = VT; 8912 if (VT.isFixedLengthVector()) { 8913 ContainerVT = getContainerForFixedLengthVector(VT); 8914 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget); 8915 } 8916 if (!IsUnmasked) { 8917 MVT MaskVT = getMaskTypeFor(ContainerVT); 8918 if (VT.isFixedLengthVector()) 8919 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); 8920 } 8921 8922 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second; 8923 8924 SDValue IntID = DAG.getTargetConstant( 8925 IsUnmasked ? Intrinsic::riscv_vsse : Intrinsic::riscv_vsse_mask, DL, 8926 XLenVT); 8927 8928 auto *Store = cast<MemIntrinsicSDNode>(Op); 8929 SmallVector<SDValue, 8> Ops{Store->getChain(), IntID}; 8930 Ops.push_back(Val); 8931 Ops.push_back(Op.getOperand(3)); // Ptr 8932 Ops.push_back(Op.getOperand(4)); // Stride 8933 if (!IsUnmasked) 8934 Ops.push_back(Mask); 8935 Ops.push_back(VL); 8936 8937 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, Store->getVTList(), 8938 Ops, Store->getMemoryVT(), 8939 Store->getMemOperand()); 8940 } 8941 case Intrinsic::riscv_seg2_store: 8942 case Intrinsic::riscv_seg3_store: 8943 case Intrinsic::riscv_seg4_store: 8944 case Intrinsic::riscv_seg5_store: 8945 case Intrinsic::riscv_seg6_store: 8946 case Intrinsic::riscv_seg7_store: 8947 case Intrinsic::riscv_seg8_store: { 8948 SDLoc DL(Op); 8949 static const Intrinsic::ID VssegInts[] = { 8950 Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3, 8951 Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5, 8952 Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7, 8953 Intrinsic::riscv_vsseg8}; 8954 // Operands are (chain, int_id, vec*, ptr, vl) 8955 unsigned NF = Op->getNumOperands() - 4; 8956 assert(NF >= 2 && NF <= 8 && "Unexpected seg number"); 8957 MVT XLenVT = Subtarget.getXLenVT(); 8958 MVT VT = Op->getOperand(2).getSimpleValueType(); 8959 MVT ContainerVT = getContainerForFixedLengthVector(VT); 8960 8961 SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG, 8962 Subtarget); 8963 SDValue IntID = DAG.getTargetConstant(VssegInts[NF - 2], DL, XLenVT); 8964 SDValue Ptr = Op->getOperand(NF + 2); 8965 8966 auto *FixedIntrinsic = cast<MemIntrinsicSDNode>(Op); 8967 SmallVector<SDValue, 12> Ops = {FixedIntrinsic->getChain(), IntID}; 8968 for (unsigned i = 0; i < NF; i++) 8969 Ops.push_back(convertToScalableVector( 8970 ContainerVT, FixedIntrinsic->getOperand(2 + i), DAG, Subtarget)); 8971 Ops.append({Ptr, VL}); 8972 8973 return DAG.getMemIntrinsicNode( 8974 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops, 8975 FixedIntrinsic->getMemoryVT(), FixedIntrinsic->getMemOperand()); 8976 } 8977 case Intrinsic::riscv_sf_vc_x_se_e8mf8: 8978 case Intrinsic::riscv_sf_vc_x_se_e8mf4: 8979 case Intrinsic::riscv_sf_vc_x_se_e8mf2: 8980 case Intrinsic::riscv_sf_vc_x_se_e8m1: 8981 case Intrinsic::riscv_sf_vc_x_se_e8m2: 8982 case Intrinsic::riscv_sf_vc_x_se_e8m4: 8983 case Intrinsic::riscv_sf_vc_x_se_e8m8: 8984 case Intrinsic::riscv_sf_vc_x_se_e16mf4: 8985 case Intrinsic::riscv_sf_vc_x_se_e16mf2: 8986 case Intrinsic::riscv_sf_vc_x_se_e16m1: 8987 case Intrinsic::riscv_sf_vc_x_se_e16m2: 8988 case Intrinsic::riscv_sf_vc_x_se_e16m4: 8989 case Intrinsic::riscv_sf_vc_x_se_e16m8: 8990 case Intrinsic::riscv_sf_vc_x_se_e32mf2: 8991 case Intrinsic::riscv_sf_vc_x_se_e32m1: 8992 case Intrinsic::riscv_sf_vc_x_se_e32m2: 8993 case Intrinsic::riscv_sf_vc_x_se_e32m4: 8994 case Intrinsic::riscv_sf_vc_x_se_e32m8: 8995 case Intrinsic::riscv_sf_vc_x_se_e64m1: 8996 case Intrinsic::riscv_sf_vc_x_se_e64m2: 8997 case Intrinsic::riscv_sf_vc_x_se_e64m4: 8998 case Intrinsic::riscv_sf_vc_x_se_e64m8: 8999 case Intrinsic::riscv_sf_vc_i_se_e8mf8: 9000 case Intrinsic::riscv_sf_vc_i_se_e8mf4: 9001 case Intrinsic::riscv_sf_vc_i_se_e8mf2: 9002 case Intrinsic::riscv_sf_vc_i_se_e8m1: 9003 case Intrinsic::riscv_sf_vc_i_se_e8m2: 9004 case Intrinsic::riscv_sf_vc_i_se_e8m4: 9005 case Intrinsic::riscv_sf_vc_i_se_e8m8: 9006 case Intrinsic::riscv_sf_vc_i_se_e16mf4: 9007 case Intrinsic::riscv_sf_vc_i_se_e16mf2: 9008 case Intrinsic::riscv_sf_vc_i_se_e16m1: 9009 case Intrinsic::riscv_sf_vc_i_se_e16m2: 9010 case Intrinsic::riscv_sf_vc_i_se_e16m4: 9011 case Intrinsic::riscv_sf_vc_i_se_e16m8: 9012 case Intrinsic::riscv_sf_vc_i_se_e32mf2: 9013 case Intrinsic::riscv_sf_vc_i_se_e32m1: 9014 case Intrinsic::riscv_sf_vc_i_se_e32m2: 9015 case Intrinsic::riscv_sf_vc_i_se_e32m4: 9016 case Intrinsic::riscv_sf_vc_i_se_e32m8: 9017 case Intrinsic::riscv_sf_vc_i_se_e64m1: 9018 case Intrinsic::riscv_sf_vc_i_se_e64m2: 9019 case Intrinsic::riscv_sf_vc_i_se_e64m4: 9020 case Intrinsic::riscv_sf_vc_i_se_e64m8: 9021 case Intrinsic::riscv_sf_vc_xv_se: 9022 case Intrinsic::riscv_sf_vc_iv_se: 9023 case Intrinsic::riscv_sf_vc_vv_se: 9024 case Intrinsic::riscv_sf_vc_fv_se: 9025 case Intrinsic::riscv_sf_vc_xvv_se: 9026 case Intrinsic::riscv_sf_vc_ivv_se: 9027 case Intrinsic::riscv_sf_vc_vvv_se: 9028 case Intrinsic::riscv_sf_vc_fvv_se: 9029 case Intrinsic::riscv_sf_vc_xvw_se: 9030 case Intrinsic::riscv_sf_vc_ivw_se: 9031 case Intrinsic::riscv_sf_vc_vvw_se: 9032 case Intrinsic::riscv_sf_vc_fvw_se: { 9033 SmallVector<SDValue> Ops; 9034 getVCIXOperands(Op, DAG, Ops); 9035 9036 SDValue NewNode = 9037 DAG.getNode(ISD::INTRINSIC_VOID, SDLoc(Op), Op->getVTList(), Ops); 9038 9039 if (Op == NewNode) 9040 break; 9041 9042 return NewNode; 9043 } 9044 } 9045 9046 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget); 9047} 9048 9049static unsigned getRVVReductionOp(unsigned ISDOpcode) { 9050 switch (ISDOpcode) { 9051 default: 9052 llvm_unreachable("Unhandled reduction"); 9053 case ISD::VP_REDUCE_ADD: 9054 case ISD::VECREDUCE_ADD: 9055 return RISCVISD::VECREDUCE_ADD_VL; 9056 case ISD::VP_REDUCE_UMAX: 9057 case ISD::VECREDUCE_UMAX: 9058 return RISCVISD::VECREDUCE_UMAX_VL; 9059 case ISD::VP_REDUCE_SMAX: 9060 case ISD::VECREDUCE_SMAX: 9061 return RISCVISD::VECREDUCE_SMAX_VL; 9062 case ISD::VP_REDUCE_UMIN: 9063 case ISD::VECREDUCE_UMIN: 9064 return RISCVISD::VECREDUCE_UMIN_VL; 9065 case ISD::VP_REDUCE_SMIN: 9066 case ISD::VECREDUCE_SMIN: 9067 return RISCVISD::VECREDUCE_SMIN_VL; 9068 case ISD::VP_REDUCE_AND: 9069 case ISD::VECREDUCE_AND: 9070 return RISCVISD::VECREDUCE_AND_VL; 9071 case ISD::VP_REDUCE_OR: 9072 case ISD::VECREDUCE_OR: 9073 return RISCVISD::VECREDUCE_OR_VL; 9074 case ISD::VP_REDUCE_XOR: 9075 case ISD::VECREDUCE_XOR: 9076 return RISCVISD::VECREDUCE_XOR_VL; 9077 case ISD::VP_REDUCE_FADD: 9078 return RISCVISD::VECREDUCE_FADD_VL; 9079 case ISD::VP_REDUCE_SEQ_FADD: 9080 return RISCVISD::VECREDUCE_SEQ_FADD_VL; 9081 case ISD::VP_REDUCE_FMAX: 9082 return RISCVISD::VECREDUCE_FMAX_VL; 9083 case ISD::VP_REDUCE_FMIN: 9084 return RISCVISD::VECREDUCE_FMIN_VL; 9085 } 9086 9087} 9088 9089SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op, 9090 SelectionDAG &DAG, 9091 bool IsVP) const { 9092 SDLoc DL(Op); 9093 SDValue Vec = Op.getOperand(IsVP ? 1 : 0); 9094 MVT VecVT = Vec.getSimpleValueType(); 9095 assert((Op.getOpcode() == ISD::VECREDUCE_AND || 9096 Op.getOpcode() == ISD::VECREDUCE_OR || 9097 Op.getOpcode() == ISD::VECREDUCE_XOR || 9098 Op.getOpcode() == ISD::VP_REDUCE_AND || 9099 Op.getOpcode() == ISD::VP_REDUCE_OR || 9100 Op.getOpcode() == ISD::VP_REDUCE_XOR) && 9101 "Unexpected reduction lowering"); 9102 9103 MVT XLenVT = Subtarget.getXLenVT(); 9104 9105 MVT ContainerVT = VecVT; 9106 if (VecVT.isFixedLengthVector()) { 9107 ContainerVT = getContainerForFixedLengthVector(VecVT); 9108 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 9109 } 9110 9111 SDValue Mask, VL; 9112 if (IsVP) { 9113 Mask = Op.getOperand(2); 9114 VL = Op.getOperand(3); 9115 } else { 9116 std::tie(Mask, VL) = 9117 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); 9118 } 9119 9120 unsigned BaseOpc; 9121 ISD::CondCode CC; 9122 SDValue Zero = DAG.getConstant(0, DL, XLenVT); 9123 9124 switch (Op.getOpcode()) { 9125 default: 9126 llvm_unreachable("Unhandled reduction"); 9127 case ISD::VECREDUCE_AND: 9128 case ISD::VP_REDUCE_AND: { 9129 // vcpop ~x == 0 9130 SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL); 9131 Vec = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Vec, TrueMask, VL); 9132 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL); 9133 CC = ISD::SETEQ; 9134 BaseOpc = ISD::AND; 9135 break; 9136 } 9137 case ISD::VECREDUCE_OR: 9138 case ISD::VP_REDUCE_OR: 9139 // vcpop x != 0 9140 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL); 9141 CC = ISD::SETNE; 9142 BaseOpc = ISD::OR; 9143 break; 9144 case ISD::VECREDUCE_XOR: 9145 case ISD::VP_REDUCE_XOR: { 9146 // ((vcpop x) & 1) != 0 9147 SDValue One = DAG.getConstant(1, DL, XLenVT); 9148 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL); 9149 Vec = DAG.getNode(ISD::AND, DL, XLenVT, Vec, One); 9150 CC = ISD::SETNE; 9151 BaseOpc = ISD::XOR; 9152 break; 9153 } 9154 } 9155 9156 SDValue SetCC = DAG.getSetCC(DL, XLenVT, Vec, Zero, CC); 9157 SetCC = DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), SetCC); 9158 9159 if (!IsVP) 9160 return SetCC; 9161 9162 // Now include the start value in the operation. 9163 // Note that we must return the start value when no elements are operated 9164 // upon. The vcpop instructions we've emitted in each case above will return 9165 // 0 for an inactive vector, and so we've already received the neutral value: 9166 // AND gives us (0 == 0) -> 1 and OR/XOR give us (0 != 0) -> 0. Therefore we 9167 // can simply include the start value. 9168 return DAG.getNode(BaseOpc, DL, Op.getValueType(), SetCC, Op.getOperand(0)); 9169} 9170 9171static bool isNonZeroAVL(SDValue AVL) { 9172 auto *RegisterAVL = dyn_cast<RegisterSDNode>(AVL); 9173 auto *ImmAVL = dyn_cast<ConstantSDNode>(AVL); 9174 return (RegisterAVL && RegisterAVL->getReg() == RISCV::X0) || 9175 (ImmAVL && ImmAVL->getZExtValue() >= 1); 9176} 9177 9178/// Helper to lower a reduction sequence of the form: 9179/// scalar = reduce_op vec, scalar_start 9180static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT, 9181 SDValue StartValue, SDValue Vec, SDValue Mask, 9182 SDValue VL, const SDLoc &DL, SelectionDAG &DAG, 9183 const RISCVSubtarget &Subtarget) { 9184 const MVT VecVT = Vec.getSimpleValueType(); 9185 const MVT M1VT = getLMUL1VT(VecVT); 9186 const MVT XLenVT = Subtarget.getXLenVT(); 9187 const bool NonZeroAVL = isNonZeroAVL(VL); 9188 9189 // The reduction needs an LMUL1 input; do the splat at either LMUL1 9190 // or the original VT if fractional. 9191 auto InnerVT = VecVT.bitsLE(M1VT) ? VecVT : M1VT; 9192 // We reuse the VL of the reduction to reduce vsetvli toggles if we can 9193 // prove it is non-zero. For the AVL=0 case, we need the scalar to 9194 // be the result of the reduction operation. 9195 auto InnerVL = NonZeroAVL ? VL : DAG.getConstant(1, DL, XLenVT); 9196 SDValue InitialValue = lowerScalarInsert(StartValue, InnerVL, InnerVT, DL, 9197 DAG, Subtarget); 9198 if (M1VT != InnerVT) 9199 InitialValue = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, M1VT, 9200 DAG.getUNDEF(M1VT), 9201 InitialValue, DAG.getConstant(0, DL, XLenVT)); 9202 SDValue PassThru = NonZeroAVL ? DAG.getUNDEF(M1VT) : InitialValue; 9203 SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT); 9204 SDValue Ops[] = {PassThru, Vec, InitialValue, Mask, VL, Policy}; 9205 SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, Ops); 9206 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Reduction, 9207 DAG.getConstant(0, DL, XLenVT)); 9208} 9209 9210SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op, 9211 SelectionDAG &DAG) const { 9212 SDLoc DL(Op); 9213 SDValue Vec = Op.getOperand(0); 9214 EVT VecEVT = Vec.getValueType(); 9215 9216 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode()); 9217 9218 // Due to ordering in legalize types we may have a vector type that needs to 9219 // be split. Do that manually so we can get down to a legal type. 9220 while (getTypeAction(*DAG.getContext(), VecEVT) == 9221 TargetLowering::TypeSplitVector) { 9222 auto [Lo, Hi] = DAG.SplitVector(Vec, DL); 9223 VecEVT = Lo.getValueType(); 9224 Vec = DAG.getNode(BaseOpc, DL, VecEVT, Lo, Hi); 9225 } 9226 9227 // TODO: The type may need to be widened rather than split. Or widened before 9228 // it can be split. 9229 if (!isTypeLegal(VecEVT)) 9230 return SDValue(); 9231 9232 MVT VecVT = VecEVT.getSimpleVT(); 9233 MVT VecEltVT = VecVT.getVectorElementType(); 9234 unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode()); 9235 9236 MVT ContainerVT = VecVT; 9237 if (VecVT.isFixedLengthVector()) { 9238 ContainerVT = getContainerForFixedLengthVector(VecVT); 9239 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 9240 } 9241 9242 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); 9243 9244 SDValue StartV = DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags()); 9245 switch (BaseOpc) { 9246 case ISD::AND: 9247 case ISD::OR: 9248 case ISD::UMAX: 9249 case ISD::UMIN: 9250 case ISD::SMAX: 9251 case ISD::SMIN: 9252 MVT XLenVT = Subtarget.getXLenVT(); 9253 StartV = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Vec, 9254 DAG.getConstant(0, DL, XLenVT)); 9255 } 9256 return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), StartV, Vec, 9257 Mask, VL, DL, DAG, Subtarget); 9258} 9259 9260// Given a reduction op, this function returns the matching reduction opcode, 9261// the vector SDValue and the scalar SDValue required to lower this to a 9262// RISCVISD node. 9263static std::tuple<unsigned, SDValue, SDValue> 9264getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT, 9265 const RISCVSubtarget &Subtarget) { 9266 SDLoc DL(Op); 9267 auto Flags = Op->getFlags(); 9268 unsigned Opcode = Op.getOpcode(); 9269 switch (Opcode) { 9270 default: 9271 llvm_unreachable("Unhandled reduction"); 9272 case ISD::VECREDUCE_FADD: { 9273 // Use positive zero if we can. It is cheaper to materialize. 9274 SDValue Zero = 9275 DAG.getConstantFP(Flags.hasNoSignedZeros() ? 0.0 : -0.0, DL, EltVT); 9276 return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0), Zero); 9277 } 9278 case ISD::VECREDUCE_SEQ_FADD: 9279 return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1), 9280 Op.getOperand(0)); 9281 case ISD::VECREDUCE_FMIN: 9282 case ISD::VECREDUCE_FMAX: { 9283 MVT XLenVT = Subtarget.getXLenVT(); 9284 SDValue Front = 9285 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Op.getOperand(0), 9286 DAG.getConstant(0, DL, XLenVT)); 9287 unsigned RVVOpc = (Opcode == ISD::VECREDUCE_FMIN) 9288 ? RISCVISD::VECREDUCE_FMIN_VL 9289 : RISCVISD::VECREDUCE_FMAX_VL; 9290 return std::make_tuple(RVVOpc, Op.getOperand(0), Front); 9291 } 9292 } 9293} 9294 9295SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op, 9296 SelectionDAG &DAG) const { 9297 SDLoc DL(Op); 9298 MVT VecEltVT = Op.getSimpleValueType(); 9299 9300 unsigned RVVOpcode; 9301 SDValue VectorVal, ScalarVal; 9302 std::tie(RVVOpcode, VectorVal, ScalarVal) = 9303 getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT, Subtarget); 9304 MVT VecVT = VectorVal.getSimpleValueType(); 9305 9306 MVT ContainerVT = VecVT; 9307 if (VecVT.isFixedLengthVector()) { 9308 ContainerVT = getContainerForFixedLengthVector(VecVT); 9309 VectorVal = convertToScalableVector(ContainerVT, VectorVal, DAG, Subtarget); 9310 } 9311 9312 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); 9313 return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), ScalarVal, 9314 VectorVal, Mask, VL, DL, DAG, Subtarget); 9315} 9316 9317SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op, 9318 SelectionDAG &DAG) const { 9319 SDLoc DL(Op); 9320 SDValue Vec = Op.getOperand(1); 9321 EVT VecEVT = Vec.getValueType(); 9322 9323 // TODO: The type may need to be widened rather than split. Or widened before 9324 // it can be split. 9325 if (!isTypeLegal(VecEVT)) 9326 return SDValue(); 9327 9328 MVT VecVT = VecEVT.getSimpleVT(); 9329 unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode()); 9330 9331 if (VecVT.isFixedLengthVector()) { 9332 auto ContainerVT = getContainerForFixedLengthVector(VecVT); 9333 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 9334 } 9335 9336 SDValue VL = Op.getOperand(3); 9337 SDValue Mask = Op.getOperand(2); 9338 return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), Op.getOperand(0), 9339 Vec, Mask, VL, DL, DAG, Subtarget); 9340} 9341 9342SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op, 9343 SelectionDAG &DAG) const { 9344 SDValue Vec = Op.getOperand(0); 9345 SDValue SubVec = Op.getOperand(1); 9346 MVT VecVT = Vec.getSimpleValueType(); 9347 MVT SubVecVT = SubVec.getSimpleValueType(); 9348 9349 SDLoc DL(Op); 9350 MVT XLenVT = Subtarget.getXLenVT(); 9351 unsigned OrigIdx = Op.getConstantOperandVal(2); 9352 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo(); 9353 9354 // We don't have the ability to slide mask vectors up indexed by their i1 9355 // elements; the smallest we can do is i8. Often we are able to bitcast to 9356 // equivalent i8 vectors. Note that when inserting a fixed-length vector 9357 // into a scalable one, we might not necessarily have enough scalable 9358 // elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid. 9359 if (SubVecVT.getVectorElementType() == MVT::i1 && 9360 (OrigIdx != 0 || !Vec.isUndef())) { 9361 if (VecVT.getVectorMinNumElements() >= 8 && 9362 SubVecVT.getVectorMinNumElements() >= 8) { 9363 assert(OrigIdx % 8 == 0 && "Invalid index"); 9364 assert(VecVT.getVectorMinNumElements() % 8 == 0 && 9365 SubVecVT.getVectorMinNumElements() % 8 == 0 && 9366 "Unexpected mask vector lowering"); 9367 OrigIdx /= 8; 9368 SubVecVT = 9369 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8, 9370 SubVecVT.isScalableVector()); 9371 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8, 9372 VecVT.isScalableVector()); 9373 Vec = DAG.getBitcast(VecVT, Vec); 9374 SubVec = DAG.getBitcast(SubVecVT, SubVec); 9375 } else { 9376 // We can't slide this mask vector up indexed by its i1 elements. 9377 // This poses a problem when we wish to insert a scalable vector which 9378 // can't be re-expressed as a larger type. Just choose the slow path and 9379 // extend to a larger type, then truncate back down. 9380 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8); 9381 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8); 9382 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec); 9383 SubVec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtSubVecVT, SubVec); 9384 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ExtVecVT, Vec, SubVec, 9385 Op.getOperand(2)); 9386 SDValue SplatZero = DAG.getConstant(0, DL, ExtVecVT); 9387 return DAG.getSetCC(DL, VecVT, Vec, SplatZero, ISD::SETNE); 9388 } 9389 } 9390 9391 // If the subvector vector is a fixed-length type, we cannot use subregister 9392 // manipulation to simplify the codegen; we don't know which register of a 9393 // LMUL group contains the specific subvector as we only know the minimum 9394 // register size. Therefore we must slide the vector group up the full 9395 // amount. 9396 if (SubVecVT.isFixedLengthVector()) { 9397 if (OrigIdx == 0 && Vec.isUndef() && !VecVT.isFixedLengthVector()) 9398 return Op; 9399 MVT ContainerVT = VecVT; 9400 if (VecVT.isFixedLengthVector()) { 9401 ContainerVT = getContainerForFixedLengthVector(VecVT); 9402 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 9403 } 9404 9405 if (OrigIdx == 0 && Vec.isUndef() && VecVT.isFixedLengthVector()) { 9406 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, 9407 DAG.getUNDEF(ContainerVT), SubVec, 9408 DAG.getConstant(0, DL, XLenVT)); 9409 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget); 9410 return DAG.getBitcast(Op.getValueType(), SubVec); 9411 } 9412 9413 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, 9414 DAG.getUNDEF(ContainerVT), SubVec, 9415 DAG.getConstant(0, DL, XLenVT)); 9416 SDValue Mask = 9417 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first; 9418 // Set the vector length to only the number of elements we care about. Note 9419 // that for slideup this includes the offset. 9420 unsigned EndIndex = OrigIdx + SubVecVT.getVectorNumElements(); 9421 SDValue VL = getVLOp(EndIndex, ContainerVT, DL, DAG, Subtarget); 9422 9423 // Use tail agnostic policy if we're inserting over Vec's tail. 9424 unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED; 9425 if (VecVT.isFixedLengthVector() && EndIndex == VecVT.getVectorNumElements()) 9426 Policy = RISCVII::TAIL_AGNOSTIC; 9427 9428 // If we're inserting into the lowest elements, use a tail undisturbed 9429 // vmv.v.v. 9430 if (OrigIdx == 0) { 9431 SubVec = 9432 DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, Vec, SubVec, VL); 9433 } else { 9434 SDValue SlideupAmt = DAG.getConstant(OrigIdx, DL, XLenVT); 9435 SubVec = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, SubVec, 9436 SlideupAmt, Mask, VL, Policy); 9437 } 9438 9439 if (VecVT.isFixedLengthVector()) 9440 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget); 9441 return DAG.getBitcast(Op.getValueType(), SubVec); 9442 } 9443 9444 unsigned SubRegIdx, RemIdx; 9445 std::tie(SubRegIdx, RemIdx) = 9446 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( 9447 VecVT, SubVecVT, OrigIdx, TRI); 9448 9449 RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecVT); 9450 bool IsSubVecPartReg = SubVecLMUL == RISCVII::VLMUL::LMUL_F2 || 9451 SubVecLMUL == RISCVII::VLMUL::LMUL_F4 || 9452 SubVecLMUL == RISCVII::VLMUL::LMUL_F8; 9453 9454 // 1. If the Idx has been completely eliminated and this subvector's size is 9455 // a vector register or a multiple thereof, or the surrounding elements are 9456 // undef, then this is a subvector insert which naturally aligns to a vector 9457 // register. These can easily be handled using subregister manipulation. 9458 // 2. If the subvector is smaller than a vector register, then the insertion 9459 // must preserve the undisturbed elements of the register. We do this by 9460 // lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector type 9461 // (which resolves to a subregister copy), performing a VSLIDEUP to place the 9462 // subvector within the vector register, and an INSERT_SUBVECTOR of that 9463 // LMUL=1 type back into the larger vector (resolving to another subregister 9464 // operation). See below for how our VSLIDEUP works. We go via a LMUL=1 type 9465 // to avoid allocating a large register group to hold our subvector. 9466 if (RemIdx == 0 && (!IsSubVecPartReg || Vec.isUndef())) 9467 return Op; 9468 9469 // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements 9470 // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy 9471 // (in our case undisturbed). This means we can set up a subvector insertion 9472 // where OFFSET is the insertion offset, and the VL is the OFFSET plus the 9473 // size of the subvector. 9474 MVT InterSubVT = VecVT; 9475 SDValue AlignedExtract = Vec; 9476 unsigned AlignedIdx = OrigIdx - RemIdx; 9477 if (VecVT.bitsGT(getLMUL1VT(VecVT))) { 9478 InterSubVT = getLMUL1VT(VecVT); 9479 // Extract a subvector equal to the nearest full vector register type. This 9480 // should resolve to a EXTRACT_SUBREG instruction. 9481 AlignedExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec, 9482 DAG.getConstant(AlignedIdx, DL, XLenVT)); 9483 } 9484 9485 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InterSubVT, 9486 DAG.getUNDEF(InterSubVT), SubVec, 9487 DAG.getConstant(0, DL, XLenVT)); 9488 9489 auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget); 9490 9491 VL = computeVLMax(SubVecVT, DL, DAG); 9492 9493 // If we're inserting into the lowest elements, use a tail undisturbed 9494 // vmv.v.v. 9495 if (RemIdx == 0) { 9496 SubVec = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, InterSubVT, AlignedExtract, 9497 SubVec, VL); 9498 } else { 9499 SDValue SlideupAmt = 9500 DAG.getVScale(DL, XLenVT, APInt(XLenVT.getSizeInBits(), RemIdx)); 9501 9502 // Construct the vector length corresponding to RemIdx + length(SubVecVT). 9503 VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL); 9504 9505 SubVec = getVSlideup(DAG, Subtarget, DL, InterSubVT, AlignedExtract, SubVec, 9506 SlideupAmt, Mask, VL); 9507 } 9508 9509 // If required, insert this subvector back into the correct vector register. 9510 // This should resolve to an INSERT_SUBREG instruction. 9511 if (VecVT.bitsGT(InterSubVT)) 9512 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, Vec, SubVec, 9513 DAG.getConstant(AlignedIdx, DL, XLenVT)); 9514 9515 // We might have bitcast from a mask type: cast back to the original type if 9516 // required. 9517 return DAG.getBitcast(Op.getSimpleValueType(), SubVec); 9518} 9519 9520SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op, 9521 SelectionDAG &DAG) const { 9522 SDValue Vec = Op.getOperand(0); 9523 MVT SubVecVT = Op.getSimpleValueType(); 9524 MVT VecVT = Vec.getSimpleValueType(); 9525 9526 SDLoc DL(Op); 9527 MVT XLenVT = Subtarget.getXLenVT(); 9528 unsigned OrigIdx = Op.getConstantOperandVal(1); 9529 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo(); 9530 9531 // We don't have the ability to slide mask vectors down indexed by their i1 9532 // elements; the smallest we can do is i8. Often we are able to bitcast to 9533 // equivalent i8 vectors. Note that when extracting a fixed-length vector 9534 // from a scalable one, we might not necessarily have enough scalable 9535 // elements to safely divide by 8: v8i1 = extract nxv1i1 is valid. 9536 if (SubVecVT.getVectorElementType() == MVT::i1 && OrigIdx != 0) { 9537 if (VecVT.getVectorMinNumElements() >= 8 && 9538 SubVecVT.getVectorMinNumElements() >= 8) { 9539 assert(OrigIdx % 8 == 0 && "Invalid index"); 9540 assert(VecVT.getVectorMinNumElements() % 8 == 0 && 9541 SubVecVT.getVectorMinNumElements() % 8 == 0 && 9542 "Unexpected mask vector lowering"); 9543 OrigIdx /= 8; 9544 SubVecVT = 9545 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8, 9546 SubVecVT.isScalableVector()); 9547 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8, 9548 VecVT.isScalableVector()); 9549 Vec = DAG.getBitcast(VecVT, Vec); 9550 } else { 9551 // We can't slide this mask vector down, indexed by its i1 elements. 9552 // This poses a problem when we wish to extract a scalable vector which 9553 // can't be re-expressed as a larger type. Just choose the slow path and 9554 // extend to a larger type, then truncate back down. 9555 // TODO: We could probably improve this when extracting certain fixed 9556 // from fixed, where we can extract as i8 and shift the correct element 9557 // right to reach the desired subvector? 9558 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8); 9559 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8); 9560 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec); 9561 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtSubVecVT, Vec, 9562 Op.getOperand(1)); 9563 SDValue SplatZero = DAG.getConstant(0, DL, ExtSubVecVT); 9564 return DAG.getSetCC(DL, SubVecVT, Vec, SplatZero, ISD::SETNE); 9565 } 9566 } 9567 9568 // With an index of 0 this is a cast-like subvector, which can be performed 9569 // with subregister operations. 9570 if (OrigIdx == 0) 9571 return Op; 9572 9573 // If the subvector vector is a fixed-length type, we cannot use subregister 9574 // manipulation to simplify the codegen; we don't know which register of a 9575 // LMUL group contains the specific subvector as we only know the minimum 9576 // register size. Therefore we must slide the vector group down the full 9577 // amount. 9578 if (SubVecVT.isFixedLengthVector()) { 9579 MVT ContainerVT = VecVT; 9580 if (VecVT.isFixedLengthVector()) { 9581 ContainerVT = getContainerForFixedLengthVector(VecVT); 9582 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 9583 } 9584 9585 // Shrink down Vec so we're performing the slidedown on a smaller LMUL. 9586 unsigned LastIdx = OrigIdx + SubVecVT.getVectorNumElements() - 1; 9587 if (auto ShrunkVT = 9588 getSmallestVTForIndex(ContainerVT, LastIdx, DL, DAG, Subtarget)) { 9589 ContainerVT = *ShrunkVT; 9590 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec, 9591 DAG.getVectorIdxConstant(0, DL)); 9592 } 9593 9594 SDValue Mask = 9595 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first; 9596 // Set the vector length to only the number of elements we care about. This 9597 // avoids sliding down elements we're going to discard straight away. 9598 SDValue VL = getVLOp(SubVecVT.getVectorNumElements(), ContainerVT, DL, DAG, 9599 Subtarget); 9600 SDValue SlidedownAmt = DAG.getConstant(OrigIdx, DL, XLenVT); 9601 SDValue Slidedown = 9602 getVSlidedown(DAG, Subtarget, DL, ContainerVT, 9603 DAG.getUNDEF(ContainerVT), Vec, SlidedownAmt, Mask, VL); 9604 // Now we can use a cast-like subvector extract to get the result. 9605 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown, 9606 DAG.getConstant(0, DL, XLenVT)); 9607 return DAG.getBitcast(Op.getValueType(), Slidedown); 9608 } 9609 9610 unsigned SubRegIdx, RemIdx; 9611 std::tie(SubRegIdx, RemIdx) = 9612 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( 9613 VecVT, SubVecVT, OrigIdx, TRI); 9614 9615 // If the Idx has been completely eliminated then this is a subvector extract 9616 // which naturally aligns to a vector register. These can easily be handled 9617 // using subregister manipulation. 9618 if (RemIdx == 0) 9619 return Op; 9620 9621 // Else SubVecVT is a fractional LMUL and may need to be slid down. 9622 assert(RISCVVType::decodeVLMUL(getLMUL(SubVecVT)).second); 9623 9624 // If the vector type is an LMUL-group type, extract a subvector equal to the 9625 // nearest full vector register type. 9626 MVT InterSubVT = VecVT; 9627 if (VecVT.bitsGT(getLMUL1VT(VecVT))) { 9628 // If VecVT has an LMUL > 1, then SubVecVT should have a smaller LMUL, and 9629 // we should have successfully decomposed the extract into a subregister. 9630 assert(SubRegIdx != RISCV::NoSubRegister); 9631 InterSubVT = getLMUL1VT(VecVT); 9632 Vec = DAG.getTargetExtractSubreg(SubRegIdx, DL, InterSubVT, Vec); 9633 } 9634 9635 // Slide this vector register down by the desired number of elements in order 9636 // to place the desired subvector starting at element 0. 9637 SDValue SlidedownAmt = 9638 DAG.getVScale(DL, XLenVT, APInt(XLenVT.getSizeInBits(), RemIdx)); 9639 9640 auto [Mask, VL] = getDefaultScalableVLOps(InterSubVT, DL, DAG, Subtarget); 9641 SDValue Slidedown = 9642 getVSlidedown(DAG, Subtarget, DL, InterSubVT, DAG.getUNDEF(InterSubVT), 9643 Vec, SlidedownAmt, Mask, VL); 9644 9645 // Now the vector is in the right position, extract our final subvector. This 9646 // should resolve to a COPY. 9647 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown, 9648 DAG.getConstant(0, DL, XLenVT)); 9649 9650 // We might have bitcast from a mask type: cast back to the original type if 9651 // required. 9652 return DAG.getBitcast(Op.getSimpleValueType(), Slidedown); 9653} 9654 9655// Widen a vector's operands to i8, then truncate its results back to the 9656// original type, typically i1. All operand and result types must be the same. 9657static SDValue widenVectorOpsToi8(SDValue N, const SDLoc &DL, 9658 SelectionDAG &DAG) { 9659 MVT VT = N.getSimpleValueType(); 9660 MVT WideVT = VT.changeVectorElementType(MVT::i8); 9661 SmallVector<SDValue, 4> WideOps; 9662 for (SDValue Op : N->ops()) { 9663 assert(Op.getSimpleValueType() == VT && 9664 "Operands and result must be same type"); 9665 WideOps.push_back(DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Op)); 9666 } 9667 9668 unsigned NumVals = N->getNumValues(); 9669 9670 SDVTList VTs = DAG.getVTList(SmallVector<EVT, 4>( 9671 NumVals, N.getValueType().changeVectorElementType(MVT::i8))); 9672 SDValue WideN = DAG.getNode(N.getOpcode(), DL, VTs, WideOps); 9673 SmallVector<SDValue, 4> TruncVals; 9674 for (unsigned I = 0; I < NumVals; I++) { 9675 TruncVals.push_back( 9676 DAG.getSetCC(DL, N->getSimpleValueType(I), WideN.getValue(I), 9677 DAG.getConstant(0, DL, WideVT), ISD::SETNE)); 9678 } 9679 9680 if (TruncVals.size() > 1) 9681 return DAG.getMergeValues(TruncVals, DL); 9682 return TruncVals.front(); 9683} 9684 9685SDValue RISCVTargetLowering::lowerVECTOR_DEINTERLEAVE(SDValue Op, 9686 SelectionDAG &DAG) const { 9687 SDLoc DL(Op); 9688 MVT VecVT = Op.getSimpleValueType(); 9689 MVT XLenVT = Subtarget.getXLenVT(); 9690 9691 assert(VecVT.isScalableVector() && 9692 "vector_interleave on non-scalable vector!"); 9693 9694 // 1 bit element vectors need to be widened to e8 9695 if (VecVT.getVectorElementType() == MVT::i1) 9696 return widenVectorOpsToi8(Op, DL, DAG); 9697 9698 // If the VT is LMUL=8, we need to split and reassemble. 9699 if (VecVT.getSizeInBits().getKnownMinValue() == 9700 (8 * RISCV::RVVBitsPerBlock)) { 9701 auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(Op.getNode(), 0); 9702 auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(Op.getNode(), 1); 9703 EVT SplitVT = Op0Lo.getValueType(); 9704 9705 SDValue ResLo = DAG.getNode(ISD::VECTOR_DEINTERLEAVE, DL, 9706 DAG.getVTList(SplitVT, SplitVT), Op0Lo, Op0Hi); 9707 SDValue ResHi = DAG.getNode(ISD::VECTOR_DEINTERLEAVE, DL, 9708 DAG.getVTList(SplitVT, SplitVT), Op1Lo, Op1Hi); 9709 9710 SDValue Even = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, 9711 ResLo.getValue(0), ResHi.getValue(0)); 9712 SDValue Odd = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, ResLo.getValue(1), 9713 ResHi.getValue(1)); 9714 return DAG.getMergeValues({Even, Odd}, DL); 9715 } 9716 9717 // Concatenate the two vectors as one vector to deinterleave 9718 MVT ConcatVT = 9719 MVT::getVectorVT(VecVT.getVectorElementType(), 9720 VecVT.getVectorElementCount().multiplyCoefficientBy(2)); 9721 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT, 9722 Op.getOperand(0), Op.getOperand(1)); 9723 9724 // We want to operate on all lanes, so get the mask and VL and mask for it 9725 auto [Mask, VL] = getDefaultScalableVLOps(ConcatVT, DL, DAG, Subtarget); 9726 SDValue Passthru = DAG.getUNDEF(ConcatVT); 9727 9728 // We can deinterleave through vnsrl.wi if the element type is smaller than 9729 // ELEN 9730 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) { 9731 SDValue Even = 9732 getDeinterleaveViaVNSRL(DL, VecVT, Concat, true, Subtarget, DAG); 9733 SDValue Odd = 9734 getDeinterleaveViaVNSRL(DL, VecVT, Concat, false, Subtarget, DAG); 9735 return DAG.getMergeValues({Even, Odd}, DL); 9736 } 9737 9738 // For the indices, use the same SEW to avoid an extra vsetvli 9739 MVT IdxVT = ConcatVT.changeVectorElementTypeToInteger(); 9740 // Create a vector of even indices {0, 2, 4, ...} 9741 SDValue EvenIdx = 9742 DAG.getStepVector(DL, IdxVT, APInt(IdxVT.getScalarSizeInBits(), 2)); 9743 // Create a vector of odd indices {1, 3, 5, ... } 9744 SDValue OddIdx = 9745 DAG.getNode(ISD::ADD, DL, IdxVT, EvenIdx, DAG.getConstant(1, DL, IdxVT)); 9746 9747 // Gather the even and odd elements into two separate vectors 9748 SDValue EvenWide = DAG.getNode(RISCVISD::VRGATHER_VV_VL, DL, ConcatVT, 9749 Concat, EvenIdx, Passthru, Mask, VL); 9750 SDValue OddWide = DAG.getNode(RISCVISD::VRGATHER_VV_VL, DL, ConcatVT, 9751 Concat, OddIdx, Passthru, Mask, VL); 9752 9753 // Extract the result half of the gather for even and odd 9754 SDValue Even = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, EvenWide, 9755 DAG.getConstant(0, DL, XLenVT)); 9756 SDValue Odd = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, OddWide, 9757 DAG.getConstant(0, DL, XLenVT)); 9758 9759 return DAG.getMergeValues({Even, Odd}, DL); 9760} 9761 9762SDValue RISCVTargetLowering::lowerVECTOR_INTERLEAVE(SDValue Op, 9763 SelectionDAG &DAG) const { 9764 SDLoc DL(Op); 9765 MVT VecVT = Op.getSimpleValueType(); 9766 9767 assert(VecVT.isScalableVector() && 9768 "vector_interleave on non-scalable vector!"); 9769 9770 // i1 vectors need to be widened to i8 9771 if (VecVT.getVectorElementType() == MVT::i1) 9772 return widenVectorOpsToi8(Op, DL, DAG); 9773 9774 MVT XLenVT = Subtarget.getXLenVT(); 9775 SDValue VL = DAG.getRegister(RISCV::X0, XLenVT); 9776 9777 // If the VT is LMUL=8, we need to split and reassemble. 9778 if (VecVT.getSizeInBits().getKnownMinValue() == (8 * RISCV::RVVBitsPerBlock)) { 9779 auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(Op.getNode(), 0); 9780 auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(Op.getNode(), 1); 9781 EVT SplitVT = Op0Lo.getValueType(); 9782 9783 SDValue ResLo = DAG.getNode(ISD::VECTOR_INTERLEAVE, DL, 9784 DAG.getVTList(SplitVT, SplitVT), Op0Lo, Op1Lo); 9785 SDValue ResHi = DAG.getNode(ISD::VECTOR_INTERLEAVE, DL, 9786 DAG.getVTList(SplitVT, SplitVT), Op0Hi, Op1Hi); 9787 9788 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, 9789 ResLo.getValue(0), ResLo.getValue(1)); 9790 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, 9791 ResHi.getValue(0), ResHi.getValue(1)); 9792 return DAG.getMergeValues({Lo, Hi}, DL); 9793 } 9794 9795 SDValue Interleaved; 9796 9797 // If the element type is smaller than ELEN, then we can interleave with 9798 // vwaddu.vv and vwmaccu.vx 9799 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) { 9800 Interleaved = getWideningInterleave(Op.getOperand(0), Op.getOperand(1), DL, 9801 DAG, Subtarget); 9802 } else { 9803 // Otherwise, fallback to using vrgathere16.vv 9804 MVT ConcatVT = 9805 MVT::getVectorVT(VecVT.getVectorElementType(), 9806 VecVT.getVectorElementCount().multiplyCoefficientBy(2)); 9807 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT, 9808 Op.getOperand(0), Op.getOperand(1)); 9809 9810 MVT IdxVT = ConcatVT.changeVectorElementType(MVT::i16); 9811 9812 // 0 1 2 3 4 5 6 7 ... 9813 SDValue StepVec = DAG.getStepVector(DL, IdxVT); 9814 9815 // 1 1 1 1 1 1 1 1 ... 9816 SDValue Ones = DAG.getSplatVector(IdxVT, DL, DAG.getConstant(1, DL, XLenVT)); 9817 9818 // 1 0 1 0 1 0 1 0 ... 9819 SDValue OddMask = DAG.getNode(ISD::AND, DL, IdxVT, StepVec, Ones); 9820 OddMask = DAG.getSetCC( 9821 DL, IdxVT.changeVectorElementType(MVT::i1), OddMask, 9822 DAG.getSplatVector(IdxVT, DL, DAG.getConstant(0, DL, XLenVT)), 9823 ISD::CondCode::SETNE); 9824 9825 SDValue VLMax = DAG.getSplatVector(IdxVT, DL, computeVLMax(VecVT, DL, DAG)); 9826 9827 // Build up the index vector for interleaving the concatenated vector 9828 // 0 0 1 1 2 2 3 3 ... 9829 SDValue Idx = DAG.getNode(ISD::SRL, DL, IdxVT, StepVec, Ones); 9830 // 0 n 1 n+1 2 n+2 3 n+3 ... 9831 Idx = 9832 DAG.getNode(RISCVISD::ADD_VL, DL, IdxVT, Idx, VLMax, Idx, OddMask, VL); 9833 9834 // Then perform the interleave 9835 // v[0] v[n] v[1] v[n+1] v[2] v[n+2] v[3] v[n+3] ... 9836 SDValue TrueMask = getAllOnesMask(IdxVT, VL, DL, DAG); 9837 Interleaved = DAG.getNode(RISCVISD::VRGATHEREI16_VV_VL, DL, ConcatVT, 9838 Concat, Idx, DAG.getUNDEF(ConcatVT), TrueMask, VL); 9839 } 9840 9841 // Extract the two halves from the interleaved result 9842 SDValue Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, Interleaved, 9843 DAG.getVectorIdxConstant(0, DL)); 9844 SDValue Hi = DAG.getNode( 9845 ISD::EXTRACT_SUBVECTOR, DL, VecVT, Interleaved, 9846 DAG.getVectorIdxConstant(VecVT.getVectorMinNumElements(), DL)); 9847 9848 return DAG.getMergeValues({Lo, Hi}, DL); 9849} 9850 9851// Lower step_vector to the vid instruction. Any non-identity step value must 9852// be accounted for my manual expansion. 9853SDValue RISCVTargetLowering::lowerSTEP_VECTOR(SDValue Op, 9854 SelectionDAG &DAG) const { 9855 SDLoc DL(Op); 9856 MVT VT = Op.getSimpleValueType(); 9857 assert(VT.isScalableVector() && "Expected scalable vector"); 9858 MVT XLenVT = Subtarget.getXLenVT(); 9859 auto [Mask, VL] = getDefaultScalableVLOps(VT, DL, DAG, Subtarget); 9860 SDValue StepVec = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL); 9861 uint64_t StepValImm = Op.getConstantOperandVal(0); 9862 if (StepValImm != 1) { 9863 if (isPowerOf2_64(StepValImm)) { 9864 SDValue StepVal = 9865 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT), 9866 DAG.getConstant(Log2_64(StepValImm), DL, XLenVT), VL); 9867 StepVec = DAG.getNode(ISD::SHL, DL, VT, StepVec, StepVal); 9868 } else { 9869 SDValue StepVal = lowerScalarSplat( 9870 SDValue(), DAG.getConstant(StepValImm, DL, VT.getVectorElementType()), 9871 VL, VT, DL, DAG, Subtarget); 9872 StepVec = DAG.getNode(ISD::MUL, DL, VT, StepVec, StepVal); 9873 } 9874 } 9875 return StepVec; 9876} 9877 9878// Implement vector_reverse using vrgather.vv with indices determined by 9879// subtracting the id of each element from (VLMAX-1). This will convert 9880// the indices like so: 9881// (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0). 9882// TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16. 9883SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op, 9884 SelectionDAG &DAG) const { 9885 SDLoc DL(Op); 9886 MVT VecVT = Op.getSimpleValueType(); 9887 if (VecVT.getVectorElementType() == MVT::i1) { 9888 MVT WidenVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount()); 9889 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, Op.getOperand(0)); 9890 SDValue Op2 = DAG.getNode(ISD::VECTOR_REVERSE, DL, WidenVT, Op1); 9891 return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Op2); 9892 } 9893 unsigned EltSize = VecVT.getScalarSizeInBits(); 9894 unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue(); 9895 unsigned VectorBitsMax = Subtarget.getRealMaxVLen(); 9896 unsigned MaxVLMAX = 9897 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize); 9898 9899 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL; 9900 MVT IntVT = VecVT.changeVectorElementTypeToInteger(); 9901 9902 // If this is SEW=8 and VLMAX is potentially more than 256, we need 9903 // to use vrgatherei16.vv. 9904 // TODO: It's also possible to use vrgatherei16.vv for other types to 9905 // decrease register width for the index calculation. 9906 if (MaxVLMAX > 256 && EltSize == 8) { 9907 // If this is LMUL=8, we have to split before can use vrgatherei16.vv. 9908 // Reverse each half, then reassemble them in reverse order. 9909 // NOTE: It's also possible that after splitting that VLMAX no longer 9910 // requires vrgatherei16.vv. 9911 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) { 9912 auto [Lo, Hi] = DAG.SplitVectorOperand(Op.getNode(), 0); 9913 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VecVT); 9914 Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo); 9915 Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi); 9916 // Reassemble the low and high pieces reversed. 9917 // FIXME: This is a CONCAT_VECTORS. 9918 SDValue Res = 9919 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, DAG.getUNDEF(VecVT), Hi, 9920 DAG.getIntPtrConstant(0, DL)); 9921 return DAG.getNode( 9922 ISD::INSERT_SUBVECTOR, DL, VecVT, Res, Lo, 9923 DAG.getIntPtrConstant(LoVT.getVectorMinNumElements(), DL)); 9924 } 9925 9926 // Just promote the int type to i16 which will double the LMUL. 9927 IntVT = MVT::getVectorVT(MVT::i16, VecVT.getVectorElementCount()); 9928 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL; 9929 } 9930 9931 MVT XLenVT = Subtarget.getXLenVT(); 9932 auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget); 9933 9934 // Calculate VLMAX-1 for the desired SEW. 9935 SDValue VLMinus1 = DAG.getNode(ISD::SUB, DL, XLenVT, 9936 computeVLMax(VecVT, DL, DAG), 9937 DAG.getConstant(1, DL, XLenVT)); 9938 9939 // Splat VLMAX-1 taking care to handle SEW==64 on RV32. 9940 bool IsRV32E64 = 9941 !Subtarget.is64Bit() && IntVT.getVectorElementType() == MVT::i64; 9942 SDValue SplatVL; 9943 if (!IsRV32E64) 9944 SplatVL = DAG.getSplatVector(IntVT, DL, VLMinus1); 9945 else 9946 SplatVL = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT, DAG.getUNDEF(IntVT), 9947 VLMinus1, DAG.getRegister(RISCV::X0, XLenVT)); 9948 9949 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IntVT, Mask, VL); 9950 SDValue Indices = DAG.getNode(RISCVISD::SUB_VL, DL, IntVT, SplatVL, VID, 9951 DAG.getUNDEF(IntVT), Mask, VL); 9952 9953 return DAG.getNode(GatherOpc, DL, VecVT, Op.getOperand(0), Indices, 9954 DAG.getUNDEF(VecVT), Mask, VL); 9955} 9956 9957SDValue RISCVTargetLowering::lowerVECTOR_SPLICE(SDValue Op, 9958 SelectionDAG &DAG) const { 9959 SDLoc DL(Op); 9960 SDValue V1 = Op.getOperand(0); 9961 SDValue V2 = Op.getOperand(1); 9962 MVT XLenVT = Subtarget.getXLenVT(); 9963 MVT VecVT = Op.getSimpleValueType(); 9964 9965 SDValue VLMax = computeVLMax(VecVT, DL, DAG); 9966 9967 int64_t ImmValue = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue(); 9968 SDValue DownOffset, UpOffset; 9969 if (ImmValue >= 0) { 9970 // The operand is a TargetConstant, we need to rebuild it as a regular 9971 // constant. 9972 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT); 9973 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, DownOffset); 9974 } else { 9975 // The operand is a TargetConstant, we need to rebuild it as a regular 9976 // constant rather than negating the original operand. 9977 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT); 9978 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, UpOffset); 9979 } 9980 9981 SDValue TrueMask = getAllOnesMask(VecVT, VLMax, DL, DAG); 9982 9983 SDValue SlideDown = 9984 getVSlidedown(DAG, Subtarget, DL, VecVT, DAG.getUNDEF(VecVT), V1, 9985 DownOffset, TrueMask, UpOffset); 9986 return getVSlideup(DAG, Subtarget, DL, VecVT, SlideDown, V2, UpOffset, 9987 TrueMask, DAG.getRegister(RISCV::X0, XLenVT), 9988 RISCVII::TAIL_AGNOSTIC); 9989} 9990 9991SDValue 9992RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op, 9993 SelectionDAG &DAG) const { 9994 SDLoc DL(Op); 9995 auto *Load = cast<LoadSDNode>(Op); 9996 9997 assert(allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), 9998 Load->getMemoryVT(), 9999 *Load->getMemOperand()) && 10000 "Expecting a correctly-aligned load"); 10001 10002 MVT VT = Op.getSimpleValueType(); 10003 MVT XLenVT = Subtarget.getXLenVT(); 10004 MVT ContainerVT = getContainerForFixedLengthVector(VT); 10005 10006 // If we know the exact VLEN and our fixed length vector completely fills 10007 // the container, use a whole register load instead. 10008 const auto [MinVLMAX, MaxVLMAX] = 10009 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget); 10010 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() && 10011 getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) { 10012 SDValue NewLoad = 10013 DAG.getLoad(ContainerVT, DL, Load->getChain(), Load->getBasePtr(), 10014 Load->getMemOperand()); 10015 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget); 10016 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL); 10017 } 10018 10019 SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG, Subtarget); 10020 10021 bool IsMaskOp = VT.getVectorElementType() == MVT::i1; 10022 SDValue IntID = DAG.getTargetConstant( 10023 IsMaskOp ? Intrinsic::riscv_vlm : Intrinsic::riscv_vle, DL, XLenVT); 10024 SmallVector<SDValue, 4> Ops{Load->getChain(), IntID}; 10025 if (!IsMaskOp) 10026 Ops.push_back(DAG.getUNDEF(ContainerVT)); 10027 Ops.push_back(Load->getBasePtr()); 10028 Ops.push_back(VL); 10029 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other}); 10030 SDValue NewLoad = 10031 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, 10032 Load->getMemoryVT(), Load->getMemOperand()); 10033 10034 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget); 10035 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL); 10036} 10037 10038SDValue 10039RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op, 10040 SelectionDAG &DAG) const { 10041 SDLoc DL(Op); 10042 auto *Store = cast<StoreSDNode>(Op); 10043 10044 assert(allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), 10045 Store->getMemoryVT(), 10046 *Store->getMemOperand()) && 10047 "Expecting a correctly-aligned store"); 10048 10049 SDValue StoreVal = Store->getValue(); 10050 MVT VT = StoreVal.getSimpleValueType(); 10051 MVT XLenVT = Subtarget.getXLenVT(); 10052 10053 // If the size less than a byte, we need to pad with zeros to make a byte. 10054 if (VT.getVectorElementType() == MVT::i1 && VT.getVectorNumElements() < 8) { 10055 VT = MVT::v8i1; 10056 StoreVal = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, 10057 DAG.getConstant(0, DL, VT), StoreVal, 10058 DAG.getIntPtrConstant(0, DL)); 10059 } 10060 10061 MVT ContainerVT = getContainerForFixedLengthVector(VT); 10062 10063 SDValue NewValue = 10064 convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget); 10065 10066 10067 // If we know the exact VLEN and our fixed length vector completely fills 10068 // the container, use a whole register store instead. 10069 const auto [MinVLMAX, MaxVLMAX] = 10070 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget); 10071 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() && 10072 getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) 10073 return DAG.getStore(Store->getChain(), DL, NewValue, Store->getBasePtr(), 10074 Store->getMemOperand()); 10075 10076 SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG, 10077 Subtarget); 10078 10079 bool IsMaskOp = VT.getVectorElementType() == MVT::i1; 10080 SDValue IntID = DAG.getTargetConstant( 10081 IsMaskOp ? Intrinsic::riscv_vsm : Intrinsic::riscv_vse, DL, XLenVT); 10082 return DAG.getMemIntrinsicNode( 10083 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), 10084 {Store->getChain(), IntID, NewValue, Store->getBasePtr(), VL}, 10085 Store->getMemoryVT(), Store->getMemOperand()); 10086} 10087 10088SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op, 10089 SelectionDAG &DAG) const { 10090 SDLoc DL(Op); 10091 MVT VT = Op.getSimpleValueType(); 10092 10093 const auto *MemSD = cast<MemSDNode>(Op); 10094 EVT MemVT = MemSD->getMemoryVT(); 10095 MachineMemOperand *MMO = MemSD->getMemOperand(); 10096 SDValue Chain = MemSD->getChain(); 10097 SDValue BasePtr = MemSD->getBasePtr(); 10098 10099 SDValue Mask, PassThru, VL; 10100 if (const auto *VPLoad = dyn_cast<VPLoadSDNode>(Op)) { 10101 Mask = VPLoad->getMask(); 10102 PassThru = DAG.getUNDEF(VT); 10103 VL = VPLoad->getVectorLength(); 10104 } else { 10105 const auto *MLoad = cast<MaskedLoadSDNode>(Op); 10106 Mask = MLoad->getMask(); 10107 PassThru = MLoad->getPassThru(); 10108 } 10109 10110 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode()); 10111 10112 MVT XLenVT = Subtarget.getXLenVT(); 10113 10114 MVT ContainerVT = VT; 10115 if (VT.isFixedLengthVector()) { 10116 ContainerVT = getContainerForFixedLengthVector(VT); 10117 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget); 10118 if (!IsUnmasked) { 10119 MVT MaskVT = getMaskTypeFor(ContainerVT); 10120 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); 10121 } 10122 } 10123 10124 if (!VL) 10125 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second; 10126 10127 unsigned IntID = 10128 IsUnmasked ? Intrinsic::riscv_vle : Intrinsic::riscv_vle_mask; 10129 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)}; 10130 if (IsUnmasked) 10131 Ops.push_back(DAG.getUNDEF(ContainerVT)); 10132 else 10133 Ops.push_back(PassThru); 10134 Ops.push_back(BasePtr); 10135 if (!IsUnmasked) 10136 Ops.push_back(Mask); 10137 Ops.push_back(VL); 10138 if (!IsUnmasked) 10139 Ops.push_back(DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT)); 10140 10141 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other}); 10142 10143 SDValue Result = 10144 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO); 10145 Chain = Result.getValue(1); 10146 10147 if (VT.isFixedLengthVector()) 10148 Result = convertFromScalableVector(VT, Result, DAG, Subtarget); 10149 10150 return DAG.getMergeValues({Result, Chain}, DL); 10151} 10152 10153SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op, 10154 SelectionDAG &DAG) const { 10155 SDLoc DL(Op); 10156 10157 const auto *MemSD = cast<MemSDNode>(Op); 10158 EVT MemVT = MemSD->getMemoryVT(); 10159 MachineMemOperand *MMO = MemSD->getMemOperand(); 10160 SDValue Chain = MemSD->getChain(); 10161 SDValue BasePtr = MemSD->getBasePtr(); 10162 SDValue Val, Mask, VL; 10163 10164 if (const auto *VPStore = dyn_cast<VPStoreSDNode>(Op)) { 10165 Val = VPStore->getValue(); 10166 Mask = VPStore->getMask(); 10167 VL = VPStore->getVectorLength(); 10168 } else { 10169 const auto *MStore = cast<MaskedStoreSDNode>(Op); 10170 Val = MStore->getValue(); 10171 Mask = MStore->getMask(); 10172 } 10173 10174 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode()); 10175 10176 MVT VT = Val.getSimpleValueType(); 10177 MVT XLenVT = Subtarget.getXLenVT(); 10178 10179 MVT ContainerVT = VT; 10180 if (VT.isFixedLengthVector()) { 10181 ContainerVT = getContainerForFixedLengthVector(VT); 10182 10183 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget); 10184 if (!IsUnmasked) { 10185 MVT MaskVT = getMaskTypeFor(ContainerVT); 10186 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); 10187 } 10188 } 10189 10190 if (!VL) 10191 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second; 10192 10193 unsigned IntID = 10194 IsUnmasked ? Intrinsic::riscv_vse : Intrinsic::riscv_vse_mask; 10195 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)}; 10196 Ops.push_back(Val); 10197 Ops.push_back(BasePtr); 10198 if (!IsUnmasked) 10199 Ops.push_back(Mask); 10200 Ops.push_back(VL); 10201 10202 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, 10203 DAG.getVTList(MVT::Other), Ops, MemVT, MMO); 10204} 10205 10206SDValue 10207RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op, 10208 SelectionDAG &DAG) const { 10209 MVT InVT = Op.getOperand(0).getSimpleValueType(); 10210 MVT ContainerVT = getContainerForFixedLengthVector(InVT); 10211 10212 MVT VT = Op.getSimpleValueType(); 10213 10214 SDValue Op1 = 10215 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget); 10216 SDValue Op2 = 10217 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget); 10218 10219 SDLoc DL(Op); 10220 auto [Mask, VL] = getDefaultVLOps(VT.getVectorNumElements(), ContainerVT, DL, 10221 DAG, Subtarget); 10222 MVT MaskVT = getMaskTypeFor(ContainerVT); 10223 10224 SDValue Cmp = 10225 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, 10226 {Op1, Op2, Op.getOperand(2), DAG.getUNDEF(MaskVT), Mask, VL}); 10227 10228 return convertFromScalableVector(VT, Cmp, DAG, Subtarget); 10229} 10230 10231SDValue RISCVTargetLowering::lowerVectorStrictFSetcc(SDValue Op, 10232 SelectionDAG &DAG) const { 10233 unsigned Opc = Op.getOpcode(); 10234 SDLoc DL(Op); 10235 SDValue Chain = Op.getOperand(0); 10236 SDValue Op1 = Op.getOperand(1); 10237 SDValue Op2 = Op.getOperand(2); 10238 SDValue CC = Op.getOperand(3); 10239 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get(); 10240 MVT VT = Op.getSimpleValueType(); 10241 MVT InVT = Op1.getSimpleValueType(); 10242 10243 // RVV VMFEQ/VMFNE ignores qNan, so we expand strict_fsetccs with OEQ/UNE 10244 // condition code. 10245 if (Opc == ISD::STRICT_FSETCCS) { 10246 // Expand strict_fsetccs(x, oeq) to 10247 // (and strict_fsetccs(x, y, oge), strict_fsetccs(x, y, ole)) 10248 SDVTList VTList = Op->getVTList(); 10249 if (CCVal == ISD::SETEQ || CCVal == ISD::SETOEQ) { 10250 SDValue OLECCVal = DAG.getCondCode(ISD::SETOLE); 10251 SDValue Tmp1 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1, 10252 Op2, OLECCVal); 10253 SDValue Tmp2 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op2, 10254 Op1, OLECCVal); 10255 SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, 10256 Tmp1.getValue(1), Tmp2.getValue(1)); 10257 // Tmp1 and Tmp2 might be the same node. 10258 if (Tmp1 != Tmp2) 10259 Tmp1 = DAG.getNode(ISD::AND, DL, VT, Tmp1, Tmp2); 10260 return DAG.getMergeValues({Tmp1, OutChain}, DL); 10261 } 10262 10263 // Expand (strict_fsetccs x, y, une) to (not (strict_fsetccs x, y, oeq)) 10264 if (CCVal == ISD::SETNE || CCVal == ISD::SETUNE) { 10265 SDValue OEQCCVal = DAG.getCondCode(ISD::SETOEQ); 10266 SDValue OEQ = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1, 10267 Op2, OEQCCVal); 10268 SDValue Res = DAG.getNOT(DL, OEQ, VT); 10269 return DAG.getMergeValues({Res, OEQ.getValue(1)}, DL); 10270 } 10271 } 10272 10273 MVT ContainerInVT = InVT; 10274 if (InVT.isFixedLengthVector()) { 10275 ContainerInVT = getContainerForFixedLengthVector(InVT); 10276 Op1 = convertToScalableVector(ContainerInVT, Op1, DAG, Subtarget); 10277 Op2 = convertToScalableVector(ContainerInVT, Op2, DAG, Subtarget); 10278 } 10279 MVT MaskVT = getMaskTypeFor(ContainerInVT); 10280 10281 auto [Mask, VL] = getDefaultVLOps(InVT, ContainerInVT, DL, DAG, Subtarget); 10282 10283 SDValue Res; 10284 if (Opc == ISD::STRICT_FSETCC && 10285 (CCVal == ISD::SETLT || CCVal == ISD::SETOLT || CCVal == ISD::SETLE || 10286 CCVal == ISD::SETOLE)) { 10287 // VMFLT/VMFLE/VMFGT/VMFGE raise exception for qNan. Generate a mask to only 10288 // active when both input elements are ordered. 10289 SDValue True = getAllOnesMask(ContainerInVT, VL, DL, DAG); 10290 SDValue OrderMask1 = DAG.getNode( 10291 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other), 10292 {Chain, Op1, Op1, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT), 10293 True, VL}); 10294 SDValue OrderMask2 = DAG.getNode( 10295 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other), 10296 {Chain, Op2, Op2, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT), 10297 True, VL}); 10298 Mask = 10299 DAG.getNode(RISCVISD::VMAND_VL, DL, MaskVT, OrderMask1, OrderMask2, VL); 10300 // Use Mask as the merge operand to let the result be 0 if either of the 10301 // inputs is unordered. 10302 Res = DAG.getNode(RISCVISD::STRICT_FSETCCS_VL, DL, 10303 DAG.getVTList(MaskVT, MVT::Other), 10304 {Chain, Op1, Op2, CC, Mask, Mask, VL}); 10305 } else { 10306 unsigned RVVOpc = Opc == ISD::STRICT_FSETCC ? RISCVISD::STRICT_FSETCC_VL 10307 : RISCVISD::STRICT_FSETCCS_VL; 10308 Res = DAG.getNode(RVVOpc, DL, DAG.getVTList(MaskVT, MVT::Other), 10309 {Chain, Op1, Op2, CC, DAG.getUNDEF(MaskVT), Mask, VL}); 10310 } 10311 10312 if (VT.isFixedLengthVector()) { 10313 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget); 10314 return DAG.getMergeValues({SubVec, Res.getValue(1)}, DL); 10315 } 10316 return Res; 10317} 10318 10319// Lower vector ABS to smax(X, sub(0, X)). 10320SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const { 10321 SDLoc DL(Op); 10322 MVT VT = Op.getSimpleValueType(); 10323 SDValue X = Op.getOperand(0); 10324 10325 assert((Op.getOpcode() == ISD::VP_ABS || VT.isFixedLengthVector()) && 10326 "Unexpected type for ISD::ABS"); 10327 10328 MVT ContainerVT = VT; 10329 if (VT.isFixedLengthVector()) { 10330 ContainerVT = getContainerForFixedLengthVector(VT); 10331 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget); 10332 } 10333 10334 SDValue Mask, VL; 10335 if (Op->getOpcode() == ISD::VP_ABS) { 10336 Mask = Op->getOperand(1); 10337 if (VT.isFixedLengthVector()) 10338 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG, 10339 Subtarget); 10340 VL = Op->getOperand(2); 10341 } else 10342 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 10343 10344 SDValue SplatZero = DAG.getNode( 10345 RISCVISD::VMV_V_X_VL, DL, ContainerVT, DAG.getUNDEF(ContainerVT), 10346 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL); 10347 SDValue NegX = DAG.getNode(RISCVISD::SUB_VL, DL, ContainerVT, SplatZero, X, 10348 DAG.getUNDEF(ContainerVT), Mask, VL); 10349 SDValue Max = DAG.getNode(RISCVISD::SMAX_VL, DL, ContainerVT, X, NegX, 10350 DAG.getUNDEF(ContainerVT), Mask, VL); 10351 10352 if (VT.isFixedLengthVector()) 10353 Max = convertFromScalableVector(VT, Max, DAG, Subtarget); 10354 return Max; 10355} 10356 10357SDValue RISCVTargetLowering::lowerFixedLengthVectorFCOPYSIGNToRVV( 10358 SDValue Op, SelectionDAG &DAG) const { 10359 SDLoc DL(Op); 10360 MVT VT = Op.getSimpleValueType(); 10361 SDValue Mag = Op.getOperand(0); 10362 SDValue Sign = Op.getOperand(1); 10363 assert(Mag.getValueType() == Sign.getValueType() && 10364 "Can only handle COPYSIGN with matching types."); 10365 10366 MVT ContainerVT = getContainerForFixedLengthVector(VT); 10367 Mag = convertToScalableVector(ContainerVT, Mag, DAG, Subtarget); 10368 Sign = convertToScalableVector(ContainerVT, Sign, DAG, Subtarget); 10369 10370 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 10371 10372 SDValue CopySign = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Mag, 10373 Sign, DAG.getUNDEF(ContainerVT), Mask, VL); 10374 10375 return convertFromScalableVector(VT, CopySign, DAG, Subtarget); 10376} 10377 10378SDValue RISCVTargetLowering::lowerFixedLengthVectorSelectToRVV( 10379 SDValue Op, SelectionDAG &DAG) const { 10380 MVT VT = Op.getSimpleValueType(); 10381 MVT ContainerVT = getContainerForFixedLengthVector(VT); 10382 10383 MVT I1ContainerVT = 10384 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 10385 10386 SDValue CC = 10387 convertToScalableVector(I1ContainerVT, Op.getOperand(0), DAG, Subtarget); 10388 SDValue Op1 = 10389 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget); 10390 SDValue Op2 = 10391 convertToScalableVector(ContainerVT, Op.getOperand(2), DAG, Subtarget); 10392 10393 SDLoc DL(Op); 10394 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second; 10395 10396 SDValue Select = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, CC, Op1, 10397 Op2, DAG.getUNDEF(ContainerVT), VL); 10398 10399 return convertFromScalableVector(VT, Select, DAG, Subtarget); 10400} 10401 10402SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op, 10403 SelectionDAG &DAG) const { 10404 unsigned NewOpc = getRISCVVLOp(Op); 10405 bool HasMergeOp = hasMergeOp(NewOpc); 10406 bool HasMask = hasMaskOp(NewOpc); 10407 10408 MVT VT = Op.getSimpleValueType(); 10409 MVT ContainerVT = getContainerForFixedLengthVector(VT); 10410 10411 // Create list of operands by converting existing ones to scalable types. 10412 SmallVector<SDValue, 6> Ops; 10413 for (const SDValue &V : Op->op_values()) { 10414 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!"); 10415 10416 // Pass through non-vector operands. 10417 if (!V.getValueType().isVector()) { 10418 Ops.push_back(V); 10419 continue; 10420 } 10421 10422 // "cast" fixed length vector to a scalable vector. 10423 assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) && 10424 "Only fixed length vectors are supported!"); 10425 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget)); 10426 } 10427 10428 SDLoc DL(Op); 10429 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 10430 if (HasMergeOp) 10431 Ops.push_back(DAG.getUNDEF(ContainerVT)); 10432 if (HasMask) 10433 Ops.push_back(Mask); 10434 Ops.push_back(VL); 10435 10436 // StrictFP operations have two result values. Their lowered result should 10437 // have same result count. 10438 if (Op->isStrictFPOpcode()) { 10439 SDValue ScalableRes = 10440 DAG.getNode(NewOpc, DL, DAG.getVTList(ContainerVT, MVT::Other), Ops, 10441 Op->getFlags()); 10442 SDValue SubVec = convertFromScalableVector(VT, ScalableRes, DAG, Subtarget); 10443 return DAG.getMergeValues({SubVec, ScalableRes.getValue(1)}, DL); 10444 } 10445 10446 SDValue ScalableRes = 10447 DAG.getNode(NewOpc, DL, ContainerVT, Ops, Op->getFlags()); 10448 return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget); 10449} 10450 10451// Lower a VP_* ISD node to the corresponding RISCVISD::*_VL node: 10452// * Operands of each node are assumed to be in the same order. 10453// * The EVL operand is promoted from i32 to i64 on RV64. 10454// * Fixed-length vectors are converted to their scalable-vector container 10455// types. 10456SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG) const { 10457 unsigned RISCVISDOpc = getRISCVVLOp(Op); 10458 bool HasMergeOp = hasMergeOp(RISCVISDOpc); 10459 10460 SDLoc DL(Op); 10461 MVT VT = Op.getSimpleValueType(); 10462 SmallVector<SDValue, 4> Ops; 10463 10464 MVT ContainerVT = VT; 10465 if (VT.isFixedLengthVector()) 10466 ContainerVT = getContainerForFixedLengthVector(VT); 10467 10468 for (const auto &OpIdx : enumerate(Op->ops())) { 10469 SDValue V = OpIdx.value(); 10470 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!"); 10471 // Add dummy merge value before the mask. Or if there isn't a mask, before 10472 // EVL. 10473 if (HasMergeOp) { 10474 auto MaskIdx = ISD::getVPMaskIdx(Op.getOpcode()); 10475 if (MaskIdx) { 10476 if (*MaskIdx == OpIdx.index()) 10477 Ops.push_back(DAG.getUNDEF(ContainerVT)); 10478 } else if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == 10479 OpIdx.index()) { 10480 if (Op.getOpcode() == ISD::VP_MERGE) { 10481 // For VP_MERGE, copy the false operand instead of an undef value. 10482 Ops.push_back(Ops.back()); 10483 } else { 10484 assert(Op.getOpcode() == ISD::VP_SELECT); 10485 // For VP_SELECT, add an undef value. 10486 Ops.push_back(DAG.getUNDEF(ContainerVT)); 10487 } 10488 } 10489 } 10490 // Pass through operands which aren't fixed-length vectors. 10491 if (!V.getValueType().isFixedLengthVector()) { 10492 Ops.push_back(V); 10493 continue; 10494 } 10495 // "cast" fixed length vector to a scalable vector. 10496 MVT OpVT = V.getSimpleValueType(); 10497 MVT ContainerVT = getContainerForFixedLengthVector(OpVT); 10498 assert(useRVVForFixedLengthVectorVT(OpVT) && 10499 "Only fixed length vectors are supported!"); 10500 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget)); 10501 } 10502 10503 if (!VT.isFixedLengthVector()) 10504 return DAG.getNode(RISCVISDOpc, DL, VT, Ops, Op->getFlags()); 10505 10506 SDValue VPOp = DAG.getNode(RISCVISDOpc, DL, ContainerVT, Ops, Op->getFlags()); 10507 10508 return convertFromScalableVector(VT, VPOp, DAG, Subtarget); 10509} 10510 10511SDValue RISCVTargetLowering::lowerVPExtMaskOp(SDValue Op, 10512 SelectionDAG &DAG) const { 10513 SDLoc DL(Op); 10514 MVT VT = Op.getSimpleValueType(); 10515 10516 SDValue Src = Op.getOperand(0); 10517 // NOTE: Mask is dropped. 10518 SDValue VL = Op.getOperand(2); 10519 10520 MVT ContainerVT = VT; 10521 if (VT.isFixedLengthVector()) { 10522 ContainerVT = getContainerForFixedLengthVector(VT); 10523 MVT SrcVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 10524 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget); 10525 } 10526 10527 MVT XLenVT = Subtarget.getXLenVT(); 10528 SDValue Zero = DAG.getConstant(0, DL, XLenVT); 10529 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, 10530 DAG.getUNDEF(ContainerVT), Zero, VL); 10531 10532 SDValue SplatValue = DAG.getConstant( 10533 Op.getOpcode() == ISD::VP_ZERO_EXTEND ? 1 : -1, DL, XLenVT); 10534 SDValue Splat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, 10535 DAG.getUNDEF(ContainerVT), SplatValue, VL); 10536 10537 SDValue Result = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Src, Splat, 10538 ZeroSplat, DAG.getUNDEF(ContainerVT), VL); 10539 if (!VT.isFixedLengthVector()) 10540 return Result; 10541 return convertFromScalableVector(VT, Result, DAG, Subtarget); 10542} 10543 10544SDValue RISCVTargetLowering::lowerVPSetCCMaskOp(SDValue Op, 10545 SelectionDAG &DAG) const { 10546 SDLoc DL(Op); 10547 MVT VT = Op.getSimpleValueType(); 10548 10549 SDValue Op1 = Op.getOperand(0); 10550 SDValue Op2 = Op.getOperand(1); 10551 ISD::CondCode Condition = cast<CondCodeSDNode>(Op.getOperand(2))->get(); 10552 // NOTE: Mask is dropped. 10553 SDValue VL = Op.getOperand(4); 10554 10555 MVT ContainerVT = VT; 10556 if (VT.isFixedLengthVector()) { 10557 ContainerVT = getContainerForFixedLengthVector(VT); 10558 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget); 10559 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget); 10560 } 10561 10562 SDValue Result; 10563 SDValue AllOneMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL); 10564 10565 switch (Condition) { 10566 default: 10567 break; 10568 // X != Y --> (X^Y) 10569 case ISD::SETNE: 10570 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL); 10571 break; 10572 // X == Y --> ~(X^Y) 10573 case ISD::SETEQ: { 10574 SDValue Temp = 10575 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL); 10576 Result = 10577 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, AllOneMask, VL); 10578 break; 10579 } 10580 // X >s Y --> X == 0 & Y == 1 --> ~X & Y 10581 // X <u Y --> X == 0 & Y == 1 --> ~X & Y 10582 case ISD::SETGT: 10583 case ISD::SETULT: { 10584 SDValue Temp = 10585 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL); 10586 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Temp, Op2, VL); 10587 break; 10588 } 10589 // X <s Y --> X == 1 & Y == 0 --> ~Y & X 10590 // X >u Y --> X == 1 & Y == 0 --> ~Y & X 10591 case ISD::SETLT: 10592 case ISD::SETUGT: { 10593 SDValue Temp = 10594 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL); 10595 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Op1, Temp, VL); 10596 break; 10597 } 10598 // X >=s Y --> X == 0 | Y == 1 --> ~X | Y 10599 // X <=u Y --> X == 0 | Y == 1 --> ~X | Y 10600 case ISD::SETGE: 10601 case ISD::SETULE: { 10602 SDValue Temp = 10603 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL); 10604 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op2, VL); 10605 break; 10606 } 10607 // X <=s Y --> X == 1 | Y == 0 --> ~Y | X 10608 // X >=u Y --> X == 1 | Y == 0 --> ~Y | X 10609 case ISD::SETLE: 10610 case ISD::SETUGE: { 10611 SDValue Temp = 10612 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL); 10613 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op1, VL); 10614 break; 10615 } 10616 } 10617 10618 if (!VT.isFixedLengthVector()) 10619 return Result; 10620 return convertFromScalableVector(VT, Result, DAG, Subtarget); 10621} 10622 10623// Lower Floating-Point/Integer Type-Convert VP SDNodes 10624SDValue RISCVTargetLowering::lowerVPFPIntConvOp(SDValue Op, 10625 SelectionDAG &DAG) const { 10626 SDLoc DL(Op); 10627 10628 SDValue Src = Op.getOperand(0); 10629 SDValue Mask = Op.getOperand(1); 10630 SDValue VL = Op.getOperand(2); 10631 unsigned RISCVISDOpc = getRISCVVLOp(Op); 10632 10633 MVT DstVT = Op.getSimpleValueType(); 10634 MVT SrcVT = Src.getSimpleValueType(); 10635 if (DstVT.isFixedLengthVector()) { 10636 DstVT = getContainerForFixedLengthVector(DstVT); 10637 SrcVT = getContainerForFixedLengthVector(SrcVT); 10638 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget); 10639 MVT MaskVT = getMaskTypeFor(DstVT); 10640 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); 10641 } 10642 10643 unsigned DstEltSize = DstVT.getScalarSizeInBits(); 10644 unsigned SrcEltSize = SrcVT.getScalarSizeInBits(); 10645 10646 SDValue Result; 10647 if (DstEltSize >= SrcEltSize) { // Single-width and widening conversion. 10648 if (SrcVT.isInteger()) { 10649 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types"); 10650 10651 unsigned RISCVISDExtOpc = RISCVISDOpc == RISCVISD::SINT_TO_FP_VL 10652 ? RISCVISD::VSEXT_VL 10653 : RISCVISD::VZEXT_VL; 10654 10655 // Do we need to do any pre-widening before converting? 10656 if (SrcEltSize == 1) { 10657 MVT IntVT = DstVT.changeVectorElementTypeToInteger(); 10658 MVT XLenVT = Subtarget.getXLenVT(); 10659 SDValue Zero = DAG.getConstant(0, DL, XLenVT); 10660 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT, 10661 DAG.getUNDEF(IntVT), Zero, VL); 10662 SDValue One = DAG.getConstant( 10663 RISCVISDExtOpc == RISCVISD::VZEXT_VL ? 1 : -1, DL, XLenVT); 10664 SDValue OneSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT, 10665 DAG.getUNDEF(IntVT), One, VL); 10666 Src = DAG.getNode(RISCVISD::VMERGE_VL, DL, IntVT, Src, OneSplat, 10667 ZeroSplat, DAG.getUNDEF(IntVT), VL); 10668 } else if (DstEltSize > (2 * SrcEltSize)) { 10669 // Widen before converting. 10670 MVT IntVT = MVT::getVectorVT(MVT::getIntegerVT(DstEltSize / 2), 10671 DstVT.getVectorElementCount()); 10672 Src = DAG.getNode(RISCVISDExtOpc, DL, IntVT, Src, Mask, VL); 10673 } 10674 10675 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL); 10676 } else { 10677 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() && 10678 "Wrong input/output vector types"); 10679 10680 // Convert f16 to f32 then convert f32 to i64. 10681 if (DstEltSize > (2 * SrcEltSize)) { 10682 assert(SrcVT.getVectorElementType() == MVT::f16 && "Unexpected type!"); 10683 MVT InterimFVT = 10684 MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount()); 10685 Src = 10686 DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterimFVT, Src, Mask, VL); 10687 } 10688 10689 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL); 10690 } 10691 } else { // Narrowing + Conversion 10692 if (SrcVT.isInteger()) { 10693 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types"); 10694 // First do a narrowing convert to an FP type half the size, then round 10695 // the FP type to a small FP type if needed. 10696 10697 MVT InterimFVT = DstVT; 10698 if (SrcEltSize > (2 * DstEltSize)) { 10699 assert(SrcEltSize == (4 * DstEltSize) && "Unexpected types!"); 10700 assert(DstVT.getVectorElementType() == MVT::f16 && "Unexpected type!"); 10701 InterimFVT = MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount()); 10702 } 10703 10704 Result = DAG.getNode(RISCVISDOpc, DL, InterimFVT, Src, Mask, VL); 10705 10706 if (InterimFVT != DstVT) { 10707 Src = Result; 10708 Result = DAG.getNode(RISCVISD::FP_ROUND_VL, DL, DstVT, Src, Mask, VL); 10709 } 10710 } else { 10711 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() && 10712 "Wrong input/output vector types"); 10713 // First do a narrowing conversion to an integer half the size, then 10714 // truncate if needed. 10715 10716 if (DstEltSize == 1) { 10717 // First convert to the same size integer, then convert to mask using 10718 // setcc. 10719 assert(SrcEltSize >= 16 && "Unexpected FP type!"); 10720 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize), 10721 DstVT.getVectorElementCount()); 10722 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL); 10723 10724 // Compare the integer result to 0. The integer should be 0 or 1/-1, 10725 // otherwise the conversion was undefined. 10726 MVT XLenVT = Subtarget.getXLenVT(); 10727 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT); 10728 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterimIVT, 10729 DAG.getUNDEF(InterimIVT), SplatZero, VL); 10730 Result = DAG.getNode(RISCVISD::SETCC_VL, DL, DstVT, 10731 {Result, SplatZero, DAG.getCondCode(ISD::SETNE), 10732 DAG.getUNDEF(DstVT), Mask, VL}); 10733 } else { 10734 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2), 10735 DstVT.getVectorElementCount()); 10736 10737 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL); 10738 10739 while (InterimIVT != DstVT) { 10740 SrcEltSize /= 2; 10741 Src = Result; 10742 InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2), 10743 DstVT.getVectorElementCount()); 10744 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, InterimIVT, 10745 Src, Mask, VL); 10746 } 10747 } 10748 } 10749 } 10750 10751 MVT VT = Op.getSimpleValueType(); 10752 if (!VT.isFixedLengthVector()) 10753 return Result; 10754 return convertFromScalableVector(VT, Result, DAG, Subtarget); 10755} 10756 10757SDValue 10758RISCVTargetLowering::lowerVPSpliceExperimental(SDValue Op, 10759 SelectionDAG &DAG) const { 10760 SDLoc DL(Op); 10761 10762 SDValue Op1 = Op.getOperand(0); 10763 SDValue Op2 = Op.getOperand(1); 10764 SDValue Offset = Op.getOperand(2); 10765 SDValue Mask = Op.getOperand(3); 10766 SDValue EVL1 = Op.getOperand(4); 10767 SDValue EVL2 = Op.getOperand(5); 10768 10769 const MVT XLenVT = Subtarget.getXLenVT(); 10770 MVT VT = Op.getSimpleValueType(); 10771 MVT ContainerVT = VT; 10772 if (VT.isFixedLengthVector()) { 10773 ContainerVT = getContainerForFixedLengthVector(VT); 10774 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget); 10775 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget); 10776 MVT MaskVT = getMaskTypeFor(ContainerVT); 10777 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); 10778 } 10779 10780 bool IsMaskVector = VT.getVectorElementType() == MVT::i1; 10781 if (IsMaskVector) { 10782 ContainerVT = ContainerVT.changeVectorElementType(MVT::i8); 10783 10784 // Expand input operands 10785 SDValue SplatOneOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, 10786 DAG.getUNDEF(ContainerVT), 10787 DAG.getConstant(1, DL, XLenVT), EVL1); 10788 SDValue SplatZeroOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, 10789 DAG.getUNDEF(ContainerVT), 10790 DAG.getConstant(0, DL, XLenVT), EVL1); 10791 Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op1, SplatOneOp1, 10792 SplatZeroOp1, DAG.getUNDEF(ContainerVT), EVL1); 10793 10794 SDValue SplatOneOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, 10795 DAG.getUNDEF(ContainerVT), 10796 DAG.getConstant(1, DL, XLenVT), EVL2); 10797 SDValue SplatZeroOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, 10798 DAG.getUNDEF(ContainerVT), 10799 DAG.getConstant(0, DL, XLenVT), EVL2); 10800 Op2 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op2, SplatOneOp2, 10801 SplatZeroOp2, DAG.getUNDEF(ContainerVT), EVL2); 10802 } 10803 10804 int64_t ImmValue = cast<ConstantSDNode>(Offset)->getSExtValue(); 10805 SDValue DownOffset, UpOffset; 10806 if (ImmValue >= 0) { 10807 // The operand is a TargetConstant, we need to rebuild it as a regular 10808 // constant. 10809 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT); 10810 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, DownOffset); 10811 } else { 10812 // The operand is a TargetConstant, we need to rebuild it as a regular 10813 // constant rather than negating the original operand. 10814 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT); 10815 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, UpOffset); 10816 } 10817 10818 SDValue SlideDown = 10819 getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT), 10820 Op1, DownOffset, Mask, UpOffset); 10821 SDValue Result = getVSlideup(DAG, Subtarget, DL, ContainerVT, SlideDown, Op2, 10822 UpOffset, Mask, EVL2, RISCVII::TAIL_AGNOSTIC); 10823 10824 if (IsMaskVector) { 10825 // Truncate Result back to a mask vector (Result has same EVL as Op2) 10826 Result = DAG.getNode( 10827 RISCVISD::SETCC_VL, DL, ContainerVT.changeVectorElementType(MVT::i1), 10828 {Result, DAG.getConstant(0, DL, ContainerVT), 10829 DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(getMaskTypeFor(ContainerVT)), 10830 Mask, EVL2}); 10831 } 10832 10833 if (!VT.isFixedLengthVector()) 10834 return Result; 10835 return convertFromScalableVector(VT, Result, DAG, Subtarget); 10836} 10837 10838SDValue 10839RISCVTargetLowering::lowerVPReverseExperimental(SDValue Op, 10840 SelectionDAG &DAG) const { 10841 SDLoc DL(Op); 10842 MVT VT = Op.getSimpleValueType(); 10843 MVT XLenVT = Subtarget.getXLenVT(); 10844 10845 SDValue Op1 = Op.getOperand(0); 10846 SDValue Mask = Op.getOperand(1); 10847 SDValue EVL = Op.getOperand(2); 10848 10849 MVT ContainerVT = VT; 10850 if (VT.isFixedLengthVector()) { 10851 ContainerVT = getContainerForFixedLengthVector(VT); 10852 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget); 10853 MVT MaskVT = getMaskTypeFor(ContainerVT); 10854 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); 10855 } 10856 10857 MVT GatherVT = ContainerVT; 10858 MVT IndicesVT = ContainerVT.changeVectorElementTypeToInteger(); 10859 // Check if we are working with mask vectors 10860 bool IsMaskVector = ContainerVT.getVectorElementType() == MVT::i1; 10861 if (IsMaskVector) { 10862 GatherVT = IndicesVT = ContainerVT.changeVectorElementType(MVT::i8); 10863 10864 // Expand input operand 10865 SDValue SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT, 10866 DAG.getUNDEF(IndicesVT), 10867 DAG.getConstant(1, DL, XLenVT), EVL); 10868 SDValue SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT, 10869 DAG.getUNDEF(IndicesVT), 10870 DAG.getConstant(0, DL, XLenVT), EVL); 10871 Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, IndicesVT, Op1, SplatOne, 10872 SplatZero, DAG.getUNDEF(IndicesVT), EVL); 10873 } 10874 10875 unsigned EltSize = GatherVT.getScalarSizeInBits(); 10876 unsigned MinSize = GatherVT.getSizeInBits().getKnownMinValue(); 10877 unsigned VectorBitsMax = Subtarget.getRealMaxVLen(); 10878 unsigned MaxVLMAX = 10879 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize); 10880 10881 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL; 10882 // If this is SEW=8 and VLMAX is unknown or more than 256, we need 10883 // to use vrgatherei16.vv. 10884 // TODO: It's also possible to use vrgatherei16.vv for other types to 10885 // decrease register width for the index calculation. 10886 // NOTE: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16. 10887 if (MaxVLMAX > 256 && EltSize == 8) { 10888 // If this is LMUL=8, we have to split before using vrgatherei16.vv. 10889 // Split the vector in half and reverse each half using a full register 10890 // reverse. 10891 // Swap the halves and concatenate them. 10892 // Slide the concatenated result by (VLMax - VL). 10893 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) { 10894 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(GatherVT); 10895 auto [Lo, Hi] = DAG.SplitVector(Op1, DL); 10896 10897 SDValue LoRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo); 10898 SDValue HiRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi); 10899 10900 // Reassemble the low and high pieces reversed. 10901 // NOTE: this Result is unmasked (because we do not need masks for 10902 // shuffles). If in the future this has to change, we can use a SELECT_VL 10903 // between Result and UNDEF using the mask originally passed to VP_REVERSE 10904 SDValue Result = 10905 DAG.getNode(ISD::CONCAT_VECTORS, DL, GatherVT, HiRev, LoRev); 10906 10907 // Slide off any elements from past EVL that were reversed into the low 10908 // elements. 10909 unsigned MinElts = GatherVT.getVectorMinNumElements(); 10910 SDValue VLMax = DAG.getNode(ISD::VSCALE, DL, XLenVT, 10911 DAG.getConstant(MinElts, DL, XLenVT)); 10912 SDValue Diff = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, EVL); 10913 10914 Result = getVSlidedown(DAG, Subtarget, DL, GatherVT, 10915 DAG.getUNDEF(GatherVT), Result, Diff, Mask, EVL); 10916 10917 if (IsMaskVector) { 10918 // Truncate Result back to a mask vector 10919 Result = 10920 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT, 10921 {Result, DAG.getConstant(0, DL, GatherVT), 10922 DAG.getCondCode(ISD::SETNE), 10923 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL}); 10924 } 10925 10926 if (!VT.isFixedLengthVector()) 10927 return Result; 10928 return convertFromScalableVector(VT, Result, DAG, Subtarget); 10929 } 10930 10931 // Just promote the int type to i16 which will double the LMUL. 10932 IndicesVT = MVT::getVectorVT(MVT::i16, IndicesVT.getVectorElementCount()); 10933 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL; 10934 } 10935 10936 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IndicesVT, Mask, EVL); 10937 SDValue VecLen = 10938 DAG.getNode(ISD::SUB, DL, XLenVT, EVL, DAG.getConstant(1, DL, XLenVT)); 10939 SDValue VecLenSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT, 10940 DAG.getUNDEF(IndicesVT), VecLen, EVL); 10941 SDValue VRSUB = DAG.getNode(RISCVISD::SUB_VL, DL, IndicesVT, VecLenSplat, VID, 10942 DAG.getUNDEF(IndicesVT), Mask, EVL); 10943 SDValue Result = DAG.getNode(GatherOpc, DL, GatherVT, Op1, VRSUB, 10944 DAG.getUNDEF(GatherVT), Mask, EVL); 10945 10946 if (IsMaskVector) { 10947 // Truncate Result back to a mask vector 10948 Result = DAG.getNode( 10949 RISCVISD::SETCC_VL, DL, ContainerVT, 10950 {Result, DAG.getConstant(0, DL, GatherVT), DAG.getCondCode(ISD::SETNE), 10951 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL}); 10952 } 10953 10954 if (!VT.isFixedLengthVector()) 10955 return Result; 10956 return convertFromScalableVector(VT, Result, DAG, Subtarget); 10957} 10958 10959SDValue RISCVTargetLowering::lowerLogicVPOp(SDValue Op, 10960 SelectionDAG &DAG) const { 10961 MVT VT = Op.getSimpleValueType(); 10962 if (VT.getVectorElementType() != MVT::i1) 10963 return lowerVPOp(Op, DAG); 10964 10965 // It is safe to drop mask parameter as masked-off elements are undef. 10966 SDValue Op1 = Op->getOperand(0); 10967 SDValue Op2 = Op->getOperand(1); 10968 SDValue VL = Op->getOperand(3); 10969 10970 MVT ContainerVT = VT; 10971 const bool IsFixed = VT.isFixedLengthVector(); 10972 if (IsFixed) { 10973 ContainerVT = getContainerForFixedLengthVector(VT); 10974 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget); 10975 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget); 10976 } 10977 10978 SDLoc DL(Op); 10979 SDValue Val = DAG.getNode(getRISCVVLOp(Op), DL, ContainerVT, Op1, Op2, VL); 10980 if (!IsFixed) 10981 return Val; 10982 return convertFromScalableVector(VT, Val, DAG, Subtarget); 10983} 10984 10985SDValue RISCVTargetLowering::lowerVPStridedLoad(SDValue Op, 10986 SelectionDAG &DAG) const { 10987 SDLoc DL(Op); 10988 MVT XLenVT = Subtarget.getXLenVT(); 10989 MVT VT = Op.getSimpleValueType(); 10990 MVT ContainerVT = VT; 10991 if (VT.isFixedLengthVector()) 10992 ContainerVT = getContainerForFixedLengthVector(VT); 10993 10994 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other}); 10995 10996 auto *VPNode = cast<VPStridedLoadSDNode>(Op); 10997 // Check if the mask is known to be all ones 10998 SDValue Mask = VPNode->getMask(); 10999 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode()); 11000 11001 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vlse 11002 : Intrinsic::riscv_vlse_mask, 11003 DL, XLenVT); 11004 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID, 11005 DAG.getUNDEF(ContainerVT), VPNode->getBasePtr(), 11006 VPNode->getStride()}; 11007 if (!IsUnmasked) { 11008 if (VT.isFixedLengthVector()) { 11009 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1); 11010 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); 11011 } 11012 Ops.push_back(Mask); 11013 } 11014 Ops.push_back(VPNode->getVectorLength()); 11015 if (!IsUnmasked) { 11016 SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT); 11017 Ops.push_back(Policy); 11018 } 11019 11020 SDValue Result = 11021 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, 11022 VPNode->getMemoryVT(), VPNode->getMemOperand()); 11023 SDValue Chain = Result.getValue(1); 11024 11025 if (VT.isFixedLengthVector()) 11026 Result = convertFromScalableVector(VT, Result, DAG, Subtarget); 11027 11028 return DAG.getMergeValues({Result, Chain}, DL); 11029} 11030 11031SDValue RISCVTargetLowering::lowerVPStridedStore(SDValue Op, 11032 SelectionDAG &DAG) const { 11033 SDLoc DL(Op); 11034 MVT XLenVT = Subtarget.getXLenVT(); 11035 11036 auto *VPNode = cast<VPStridedStoreSDNode>(Op); 11037 SDValue StoreVal = VPNode->getValue(); 11038 MVT VT = StoreVal.getSimpleValueType(); 11039 MVT ContainerVT = VT; 11040 if (VT.isFixedLengthVector()) { 11041 ContainerVT = getContainerForFixedLengthVector(VT); 11042 StoreVal = convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget); 11043 } 11044 11045 // Check if the mask is known to be all ones 11046 SDValue Mask = VPNode->getMask(); 11047 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode()); 11048 11049 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vsse 11050 : Intrinsic::riscv_vsse_mask, 11051 DL, XLenVT); 11052 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID, StoreVal, 11053 VPNode->getBasePtr(), VPNode->getStride()}; 11054 if (!IsUnmasked) { 11055 if (VT.isFixedLengthVector()) { 11056 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1); 11057 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); 11058 } 11059 Ops.push_back(Mask); 11060 } 11061 Ops.push_back(VPNode->getVectorLength()); 11062 11063 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, VPNode->getVTList(), 11064 Ops, VPNode->getMemoryVT(), 11065 VPNode->getMemOperand()); 11066} 11067 11068// Custom lower MGATHER/VP_GATHER to a legalized form for RVV. It will then be 11069// matched to a RVV indexed load. The RVV indexed load instructions only 11070// support the "unsigned unscaled" addressing mode; indices are implicitly 11071// zero-extended or truncated to XLEN and are treated as byte offsets. Any 11072// signed or scaled indexing is extended to the XLEN value type and scaled 11073// accordingly. 11074SDValue RISCVTargetLowering::lowerMaskedGather(SDValue Op, 11075 SelectionDAG &DAG) const { 11076 SDLoc DL(Op); 11077 MVT VT = Op.getSimpleValueType(); 11078 11079 const auto *MemSD = cast<MemSDNode>(Op.getNode()); 11080 EVT MemVT = MemSD->getMemoryVT(); 11081 MachineMemOperand *MMO = MemSD->getMemOperand(); 11082 SDValue Chain = MemSD->getChain(); 11083 SDValue BasePtr = MemSD->getBasePtr(); 11084 11085 ISD::LoadExtType LoadExtType; 11086 SDValue Index, Mask, PassThru, VL; 11087 11088 if (auto *VPGN = dyn_cast<VPGatherSDNode>(Op.getNode())) { 11089 Index = VPGN->getIndex(); 11090 Mask = VPGN->getMask(); 11091 PassThru = DAG.getUNDEF(VT); 11092 VL = VPGN->getVectorLength(); 11093 // VP doesn't support extending loads. 11094 LoadExtType = ISD::NON_EXTLOAD; 11095 } else { 11096 // Else it must be a MGATHER. 11097 auto *MGN = cast<MaskedGatherSDNode>(Op.getNode()); 11098 Index = MGN->getIndex(); 11099 Mask = MGN->getMask(); 11100 PassThru = MGN->getPassThru(); 11101 LoadExtType = MGN->getExtensionType(); 11102 } 11103 11104 MVT IndexVT = Index.getSimpleValueType(); 11105 MVT XLenVT = Subtarget.getXLenVT(); 11106 11107 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && 11108 "Unexpected VTs!"); 11109 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type"); 11110 // Targets have to explicitly opt-in for extending vector loads. 11111 assert(LoadExtType == ISD::NON_EXTLOAD && 11112 "Unexpected extending MGATHER/VP_GATHER"); 11113 (void)LoadExtType; 11114 11115 // If the mask is known to be all ones, optimize to an unmasked intrinsic; 11116 // the selection of the masked intrinsics doesn't do this for us. 11117 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode()); 11118 11119 MVT ContainerVT = VT; 11120 if (VT.isFixedLengthVector()) { 11121 ContainerVT = getContainerForFixedLengthVector(VT); 11122 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(), 11123 ContainerVT.getVectorElementCount()); 11124 11125 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget); 11126 11127 if (!IsUnmasked) { 11128 MVT MaskVT = getMaskTypeFor(ContainerVT); 11129 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); 11130 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget); 11131 } 11132 } 11133 11134 if (!VL) 11135 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second; 11136 11137 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) { 11138 IndexVT = IndexVT.changeVectorElementType(XLenVT); 11139 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index); 11140 } 11141 11142 unsigned IntID = 11143 IsUnmasked ? Intrinsic::riscv_vluxei : Intrinsic::riscv_vluxei_mask; 11144 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)}; 11145 if (IsUnmasked) 11146 Ops.push_back(DAG.getUNDEF(ContainerVT)); 11147 else 11148 Ops.push_back(PassThru); 11149 Ops.push_back(BasePtr); 11150 Ops.push_back(Index); 11151 if (!IsUnmasked) 11152 Ops.push_back(Mask); 11153 Ops.push_back(VL); 11154 if (!IsUnmasked) 11155 Ops.push_back(DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT)); 11156 11157 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other}); 11158 SDValue Result = 11159 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO); 11160 Chain = Result.getValue(1); 11161 11162 if (VT.isFixedLengthVector()) 11163 Result = convertFromScalableVector(VT, Result, DAG, Subtarget); 11164 11165 return DAG.getMergeValues({Result, Chain}, DL); 11166} 11167 11168// Custom lower MSCATTER/VP_SCATTER to a legalized form for RVV. It will then be 11169// matched to a RVV indexed store. The RVV indexed store instructions only 11170// support the "unsigned unscaled" addressing mode; indices are implicitly 11171// zero-extended or truncated to XLEN and are treated as byte offsets. Any 11172// signed or scaled indexing is extended to the XLEN value type and scaled 11173// accordingly. 11174SDValue RISCVTargetLowering::lowerMaskedScatter(SDValue Op, 11175 SelectionDAG &DAG) const { 11176 SDLoc DL(Op); 11177 const auto *MemSD = cast<MemSDNode>(Op.getNode()); 11178 EVT MemVT = MemSD->getMemoryVT(); 11179 MachineMemOperand *MMO = MemSD->getMemOperand(); 11180 SDValue Chain = MemSD->getChain(); 11181 SDValue BasePtr = MemSD->getBasePtr(); 11182 11183 bool IsTruncatingStore = false; 11184 SDValue Index, Mask, Val, VL; 11185 11186 if (auto *VPSN = dyn_cast<VPScatterSDNode>(Op.getNode())) { 11187 Index = VPSN->getIndex(); 11188 Mask = VPSN->getMask(); 11189 Val = VPSN->getValue(); 11190 VL = VPSN->getVectorLength(); 11191 // VP doesn't support truncating stores. 11192 IsTruncatingStore = false; 11193 } else { 11194 // Else it must be a MSCATTER. 11195 auto *MSN = cast<MaskedScatterSDNode>(Op.getNode()); 11196 Index = MSN->getIndex(); 11197 Mask = MSN->getMask(); 11198 Val = MSN->getValue(); 11199 IsTruncatingStore = MSN->isTruncatingStore(); 11200 } 11201 11202 MVT VT = Val.getSimpleValueType(); 11203 MVT IndexVT = Index.getSimpleValueType(); 11204 MVT XLenVT = Subtarget.getXLenVT(); 11205 11206 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && 11207 "Unexpected VTs!"); 11208 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type"); 11209 // Targets have to explicitly opt-in for extending vector loads and 11210 // truncating vector stores. 11211 assert(!IsTruncatingStore && "Unexpected truncating MSCATTER/VP_SCATTER"); 11212 (void)IsTruncatingStore; 11213 11214 // If the mask is known to be all ones, optimize to an unmasked intrinsic; 11215 // the selection of the masked intrinsics doesn't do this for us. 11216 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode()); 11217 11218 MVT ContainerVT = VT; 11219 if (VT.isFixedLengthVector()) { 11220 ContainerVT = getContainerForFixedLengthVector(VT); 11221 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(), 11222 ContainerVT.getVectorElementCount()); 11223 11224 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget); 11225 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget); 11226 11227 if (!IsUnmasked) { 11228 MVT MaskVT = getMaskTypeFor(ContainerVT); 11229 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); 11230 } 11231 } 11232 11233 if (!VL) 11234 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second; 11235 11236 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) { 11237 IndexVT = IndexVT.changeVectorElementType(XLenVT); 11238 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index); 11239 } 11240 11241 unsigned IntID = 11242 IsUnmasked ? Intrinsic::riscv_vsoxei : Intrinsic::riscv_vsoxei_mask; 11243 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)}; 11244 Ops.push_back(Val); 11245 Ops.push_back(BasePtr); 11246 Ops.push_back(Index); 11247 if (!IsUnmasked) 11248 Ops.push_back(Mask); 11249 Ops.push_back(VL); 11250 11251 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, 11252 DAG.getVTList(MVT::Other), Ops, MemVT, MMO); 11253} 11254 11255SDValue RISCVTargetLowering::lowerGET_ROUNDING(SDValue Op, 11256 SelectionDAG &DAG) const { 11257 const MVT XLenVT = Subtarget.getXLenVT(); 11258 SDLoc DL(Op); 11259 SDValue Chain = Op->getOperand(0); 11260 SDValue SysRegNo = DAG.getTargetConstant( 11261 RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT); 11262 SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other); 11263 SDValue RM = DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo); 11264 11265 // Encoding used for rounding mode in RISC-V differs from that used in 11266 // FLT_ROUNDS. To convert it the RISC-V rounding mode is used as an index in a 11267 // table, which consists of a sequence of 4-bit fields, each representing 11268 // corresponding FLT_ROUNDS mode. 11269 static const int Table = 11270 (int(RoundingMode::NearestTiesToEven) << 4 * RISCVFPRndMode::RNE) | 11271 (int(RoundingMode::TowardZero) << 4 * RISCVFPRndMode::RTZ) | 11272 (int(RoundingMode::TowardNegative) << 4 * RISCVFPRndMode::RDN) | 11273 (int(RoundingMode::TowardPositive) << 4 * RISCVFPRndMode::RUP) | 11274 (int(RoundingMode::NearestTiesToAway) << 4 * RISCVFPRndMode::RMM); 11275 11276 SDValue Shift = 11277 DAG.getNode(ISD::SHL, DL, XLenVT, RM, DAG.getConstant(2, DL, XLenVT)); 11278 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT, 11279 DAG.getConstant(Table, DL, XLenVT), Shift); 11280 SDValue Masked = DAG.getNode(ISD::AND, DL, XLenVT, Shifted, 11281 DAG.getConstant(7, DL, XLenVT)); 11282 11283 return DAG.getMergeValues({Masked, Chain}, DL); 11284} 11285 11286SDValue RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op, 11287 SelectionDAG &DAG) const { 11288 const MVT XLenVT = Subtarget.getXLenVT(); 11289 SDLoc DL(Op); 11290 SDValue Chain = Op->getOperand(0); 11291 SDValue RMValue = Op->getOperand(1); 11292 SDValue SysRegNo = DAG.getTargetConstant( 11293 RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT); 11294 11295 // Encoding used for rounding mode in RISC-V differs from that used in 11296 // FLT_ROUNDS. To convert it the C rounding mode is used as an index in 11297 // a table, which consists of a sequence of 4-bit fields, each representing 11298 // corresponding RISC-V mode. 11299 static const unsigned Table = 11300 (RISCVFPRndMode::RNE << 4 * int(RoundingMode::NearestTiesToEven)) | 11301 (RISCVFPRndMode::RTZ << 4 * int(RoundingMode::TowardZero)) | 11302 (RISCVFPRndMode::RDN << 4 * int(RoundingMode::TowardNegative)) | 11303 (RISCVFPRndMode::RUP << 4 * int(RoundingMode::TowardPositive)) | 11304 (RISCVFPRndMode::RMM << 4 * int(RoundingMode::NearestTiesToAway)); 11305 11306 RMValue = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, RMValue); 11307 11308 SDValue Shift = DAG.getNode(ISD::SHL, DL, XLenVT, RMValue, 11309 DAG.getConstant(2, DL, XLenVT)); 11310 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT, 11311 DAG.getConstant(Table, DL, XLenVT), Shift); 11312 RMValue = DAG.getNode(ISD::AND, DL, XLenVT, Shifted, 11313 DAG.getConstant(0x7, DL, XLenVT)); 11314 return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo, 11315 RMValue); 11316} 11317 11318SDValue RISCVTargetLowering::lowerEH_DWARF_CFA(SDValue Op, 11319 SelectionDAG &DAG) const { 11320 MachineFunction &MF = DAG.getMachineFunction(); 11321 11322 bool isRISCV64 = Subtarget.is64Bit(); 11323 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 11324 11325 int FI = MF.getFrameInfo().CreateFixedObject(isRISCV64 ? 8 : 4, 0, false); 11326 return DAG.getFrameIndex(FI, PtrVT); 11327} 11328 11329// Returns the opcode of the target-specific SDNode that implements the 32-bit 11330// form of the given Opcode. 11331static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) { 11332 switch (Opcode) { 11333 default: 11334 llvm_unreachable("Unexpected opcode"); 11335 case ISD::SHL: 11336 return RISCVISD::SLLW; 11337 case ISD::SRA: 11338 return RISCVISD::SRAW; 11339 case ISD::SRL: 11340 return RISCVISD::SRLW; 11341 case ISD::SDIV: 11342 return RISCVISD::DIVW; 11343 case ISD::UDIV: 11344 return RISCVISD::DIVUW; 11345 case ISD::UREM: 11346 return RISCVISD::REMUW; 11347 case ISD::ROTL: 11348 return RISCVISD::ROLW; 11349 case ISD::ROTR: 11350 return RISCVISD::RORW; 11351 } 11352} 11353 11354// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG 11355// node. Because i8/i16/i32 isn't a legal type for RV64, these operations would 11356// otherwise be promoted to i64, making it difficult to select the 11357// SLLW/DIVUW/.../*W later one because the fact the operation was originally of 11358// type i8/i16/i32 is lost. 11359static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, 11360 unsigned ExtOpc = ISD::ANY_EXTEND) { 11361 SDLoc DL(N); 11362 RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode()); 11363 SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0)); 11364 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1)); 11365 SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1); 11366 // ReplaceNodeResults requires we maintain the same type for the return value. 11367 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes); 11368} 11369 11370// Converts the given 32-bit operation to a i64 operation with signed extension 11371// semantic to reduce the signed extension instructions. 11372static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) { 11373 SDLoc DL(N); 11374 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 11375 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); 11376 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1); 11377 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp, 11378 DAG.getValueType(MVT::i32)); 11379 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes); 11380} 11381 11382void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, 11383 SmallVectorImpl<SDValue> &Results, 11384 SelectionDAG &DAG) const { 11385 SDLoc DL(N); 11386 switch (N->getOpcode()) { 11387 default: 11388 llvm_unreachable("Don't know how to custom type legalize this operation!"); 11389 case ISD::STRICT_FP_TO_SINT: 11390 case ISD::STRICT_FP_TO_UINT: 11391 case ISD::FP_TO_SINT: 11392 case ISD::FP_TO_UINT: { 11393 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 11394 "Unexpected custom legalisation"); 11395 bool IsStrict = N->isStrictFPOpcode(); 11396 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT || 11397 N->getOpcode() == ISD::STRICT_FP_TO_SINT; 11398 SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0); 11399 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) != 11400 TargetLowering::TypeSoftenFloat) { 11401 if (!isTypeLegal(Op0.getValueType())) 11402 return; 11403 if (IsStrict) { 11404 SDValue Chain = N->getOperand(0); 11405 // In absense of Zfh, promote f16 to f32, then convert. 11406 if (Op0.getValueType() == MVT::f16 && 11407 !Subtarget.hasStdExtZfhOrZhinx()) { 11408 Op0 = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other}, 11409 {Chain, Op0}); 11410 Chain = Op0.getValue(1); 11411 } 11412 unsigned Opc = IsSigned ? RISCVISD::STRICT_FCVT_W_RV64 11413 : RISCVISD::STRICT_FCVT_WU_RV64; 11414 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other); 11415 SDValue Res = DAG.getNode( 11416 Opc, DL, VTs, Chain, Op0, 11417 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64)); 11418 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); 11419 Results.push_back(Res.getValue(1)); 11420 return; 11421 } 11422 // For bf16, or f16 in absense of Zfh, promote [b]f16 to f32 and then 11423 // convert. 11424 if ((Op0.getValueType() == MVT::f16 && 11425 !Subtarget.hasStdExtZfhOrZhinx()) || 11426 Op0.getValueType() == MVT::bf16) 11427 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0); 11428 11429 unsigned Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64; 11430 SDValue Res = 11431 DAG.getNode(Opc, DL, MVT::i64, Op0, 11432 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64)); 11433 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); 11434 return; 11435 } 11436 // If the FP type needs to be softened, emit a library call using the 'si' 11437 // version. If we left it to default legalization we'd end up with 'di'. If 11438 // the FP type doesn't need to be softened just let generic type 11439 // legalization promote the result type. 11440 RTLIB::Libcall LC; 11441 if (IsSigned) 11442 LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0)); 11443 else 11444 LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0)); 11445 MakeLibCallOptions CallOptions; 11446 EVT OpVT = Op0.getValueType(); 11447 CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true); 11448 SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); 11449 SDValue Result; 11450 std::tie(Result, Chain) = 11451 makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain); 11452 Results.push_back(Result); 11453 if (IsStrict) 11454 Results.push_back(Chain); 11455 break; 11456 } 11457 case ISD::LROUND: { 11458 SDValue Op0 = N->getOperand(0); 11459 EVT Op0VT = Op0.getValueType(); 11460 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) != 11461 TargetLowering::TypeSoftenFloat) { 11462 if (!isTypeLegal(Op0VT)) 11463 return; 11464 11465 // In absense of Zfh, promote f16 to f32, then convert. 11466 if (Op0.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) 11467 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0); 11468 11469 SDValue Res = 11470 DAG.getNode(RISCVISD::FCVT_W_RV64, DL, MVT::i64, Op0, 11471 DAG.getTargetConstant(RISCVFPRndMode::RMM, DL, MVT::i64)); 11472 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); 11473 return; 11474 } 11475 // If the FP type needs to be softened, emit a library call to lround. We'll 11476 // need to truncate the result. We assume any value that doesn't fit in i32 11477 // is allowed to return an unspecified value. 11478 RTLIB::Libcall LC = 11479 Op0.getValueType() == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32; 11480 MakeLibCallOptions CallOptions; 11481 EVT OpVT = Op0.getValueType(); 11482 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64, true); 11483 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first; 11484 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result); 11485 Results.push_back(Result); 11486 break; 11487 } 11488 case ISD::READCYCLECOUNTER: { 11489 assert(!Subtarget.is64Bit() && 11490 "READCYCLECOUNTER only has custom type legalization on riscv32"); 11491 11492 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); 11493 SDValue RCW = 11494 DAG.getNode(RISCVISD::READ_CYCLE_WIDE, DL, VTs, N->getOperand(0)); 11495 11496 Results.push_back( 11497 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1))); 11498 Results.push_back(RCW.getValue(2)); 11499 break; 11500 } 11501 case ISD::LOAD: { 11502 if (!ISD::isNON_EXTLoad(N)) 11503 return; 11504 11505 // Use a SEXTLOAD instead of the default EXTLOAD. Similar to the 11506 // sext_inreg we emit for ADD/SUB/MUL/SLLI. 11507 LoadSDNode *Ld = cast<LoadSDNode>(N); 11508 11509 SDLoc dl(N); 11510 SDValue Res = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Ld->getChain(), 11511 Ld->getBasePtr(), Ld->getMemoryVT(), 11512 Ld->getMemOperand()); 11513 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Res)); 11514 Results.push_back(Res.getValue(1)); 11515 return; 11516 } 11517 case ISD::MUL: { 11518 unsigned Size = N->getSimpleValueType(0).getSizeInBits(); 11519 unsigned XLen = Subtarget.getXLen(); 11520 // This multiply needs to be expanded, try to use MULHSU+MUL if possible. 11521 if (Size > XLen) { 11522 assert(Size == (XLen * 2) && "Unexpected custom legalisation"); 11523 SDValue LHS = N->getOperand(0); 11524 SDValue RHS = N->getOperand(1); 11525 APInt HighMask = APInt::getHighBitsSet(Size, XLen); 11526 11527 bool LHSIsU = DAG.MaskedValueIsZero(LHS, HighMask); 11528 bool RHSIsU = DAG.MaskedValueIsZero(RHS, HighMask); 11529 // We need exactly one side to be unsigned. 11530 if (LHSIsU == RHSIsU) 11531 return; 11532 11533 auto MakeMULPair = [&](SDValue S, SDValue U) { 11534 MVT XLenVT = Subtarget.getXLenVT(); 11535 S = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, S); 11536 U = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, U); 11537 SDValue Lo = DAG.getNode(ISD::MUL, DL, XLenVT, S, U); 11538 SDValue Hi = DAG.getNode(RISCVISD::MULHSU, DL, XLenVT, S, U); 11539 return DAG.getNode(ISD::BUILD_PAIR, DL, N->getValueType(0), Lo, Hi); 11540 }; 11541 11542 bool LHSIsS = DAG.ComputeNumSignBits(LHS) > XLen; 11543 bool RHSIsS = DAG.ComputeNumSignBits(RHS) > XLen; 11544 11545 // The other operand should be signed, but still prefer MULH when 11546 // possible. 11547 if (RHSIsU && LHSIsS && !RHSIsS) 11548 Results.push_back(MakeMULPair(LHS, RHS)); 11549 else if (LHSIsU && RHSIsS && !LHSIsS) 11550 Results.push_back(MakeMULPair(RHS, LHS)); 11551 11552 return; 11553 } 11554 [[fallthrough]]; 11555 } 11556 case ISD::ADD: 11557 case ISD::SUB: 11558 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 11559 "Unexpected custom legalisation"); 11560 Results.push_back(customLegalizeToWOpWithSExt(N, DAG)); 11561 break; 11562 case ISD::SHL: 11563 case ISD::SRA: 11564 case ISD::SRL: 11565 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 11566 "Unexpected custom legalisation"); 11567 if (N->getOperand(1).getOpcode() != ISD::Constant) { 11568 // If we can use a BSET instruction, allow default promotion to apply. 11569 if (N->getOpcode() == ISD::SHL && Subtarget.hasStdExtZbs() && 11570 isOneConstant(N->getOperand(0))) 11571 break; 11572 Results.push_back(customLegalizeToWOp(N, DAG)); 11573 break; 11574 } 11575 11576 // Custom legalize ISD::SHL by placing a SIGN_EXTEND_INREG after. This is 11577 // similar to customLegalizeToWOpWithSExt, but we must zero_extend the 11578 // shift amount. 11579 if (N->getOpcode() == ISD::SHL) { 11580 SDLoc DL(N); 11581 SDValue NewOp0 = 11582 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 11583 SDValue NewOp1 = 11584 DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1)); 11585 SDValue NewWOp = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0, NewOp1); 11586 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp, 11587 DAG.getValueType(MVT::i32)); 11588 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes)); 11589 } 11590 11591 break; 11592 case ISD::ROTL: 11593 case ISD::ROTR: 11594 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 11595 "Unexpected custom legalisation"); 11596 assert((Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() || 11597 Subtarget.hasVendorXTHeadBb()) && 11598 "Unexpected custom legalization"); 11599 if (!isa<ConstantSDNode>(N->getOperand(1)) && 11600 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb())) 11601 return; 11602 Results.push_back(customLegalizeToWOp(N, DAG)); 11603 break; 11604 case ISD::CTTZ: 11605 case ISD::CTTZ_ZERO_UNDEF: 11606 case ISD::CTLZ: 11607 case ISD::CTLZ_ZERO_UNDEF: { 11608 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 11609 "Unexpected custom legalisation"); 11610 11611 SDValue NewOp0 = 11612 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 11613 bool IsCTZ = 11614 N->getOpcode() == ISD::CTTZ || N->getOpcode() == ISD::CTTZ_ZERO_UNDEF; 11615 unsigned Opc = IsCTZ ? RISCVISD::CTZW : RISCVISD::CLZW; 11616 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0); 11617 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); 11618 return; 11619 } 11620 case ISD::SDIV: 11621 case ISD::UDIV: 11622 case ISD::UREM: { 11623 MVT VT = N->getSimpleValueType(0); 11624 assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) && 11625 Subtarget.is64Bit() && Subtarget.hasStdExtM() && 11626 "Unexpected custom legalisation"); 11627 // Don't promote division/remainder by constant since we should expand those 11628 // to multiply by magic constant. 11629 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); 11630 if (N->getOperand(1).getOpcode() == ISD::Constant && 11631 !isIntDivCheap(N->getValueType(0), Attr)) 11632 return; 11633 11634 // If the input is i32, use ANY_EXTEND since the W instructions don't read 11635 // the upper 32 bits. For other types we need to sign or zero extend 11636 // based on the opcode. 11637 unsigned ExtOpc = ISD::ANY_EXTEND; 11638 if (VT != MVT::i32) 11639 ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND 11640 : ISD::ZERO_EXTEND; 11641 11642 Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc)); 11643 break; 11644 } 11645 case ISD::SADDO: { 11646 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 11647 "Unexpected custom legalisation"); 11648 11649 // If the RHS is a constant, we can simplify ConditionRHS below. Otherwise 11650 // use the default legalization. 11651 if (!isa<ConstantSDNode>(N->getOperand(1))) 11652 return; 11653 11654 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0)); 11655 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1)); 11656 SDValue Res = DAG.getNode(ISD::ADD, DL, MVT::i64, LHS, RHS); 11657 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res, 11658 DAG.getValueType(MVT::i32)); 11659 11660 SDValue Zero = DAG.getConstant(0, DL, MVT::i64); 11661 11662 // For an addition, the result should be less than one of the operands (LHS) 11663 // if and only if the other operand (RHS) is negative, otherwise there will 11664 // be overflow. 11665 // For a subtraction, the result should be less than one of the operands 11666 // (LHS) if and only if the other operand (RHS) is (non-zero) positive, 11667 // otherwise there will be overflow. 11668 EVT OType = N->getValueType(1); 11669 SDValue ResultLowerThanLHS = DAG.getSetCC(DL, OType, Res, LHS, ISD::SETLT); 11670 SDValue ConditionRHS = DAG.getSetCC(DL, OType, RHS, Zero, ISD::SETLT); 11671 11672 SDValue Overflow = 11673 DAG.getNode(ISD::XOR, DL, OType, ConditionRHS, ResultLowerThanLHS); 11674 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); 11675 Results.push_back(Overflow); 11676 return; 11677 } 11678 case ISD::UADDO: 11679 case ISD::USUBO: { 11680 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 11681 "Unexpected custom legalisation"); 11682 bool IsAdd = N->getOpcode() == ISD::UADDO; 11683 // Create an ADDW or SUBW. 11684 SDValue LHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 11685 SDValue RHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); 11686 SDValue Res = 11687 DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS); 11688 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res, 11689 DAG.getValueType(MVT::i32)); 11690 11691 SDValue Overflow; 11692 if (IsAdd && isOneConstant(RHS)) { 11693 // Special case uaddo X, 1 overflowed if the addition result is 0. 11694 // The general case (X + C) < C is not necessarily beneficial. Although we 11695 // reduce the live range of X, we may introduce the materialization of 11696 // constant C, especially when the setcc result is used by branch. We have 11697 // no compare with constant and branch instructions. 11698 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, 11699 DAG.getConstant(0, DL, MVT::i64), ISD::SETEQ); 11700 } else if (IsAdd && isAllOnesConstant(RHS)) { 11701 // Special case uaddo X, -1 overflowed if X != 0. 11702 Overflow = DAG.getSetCC(DL, N->getValueType(1), N->getOperand(0), 11703 DAG.getConstant(0, DL, MVT::i32), ISD::SETNE); 11704 } else { 11705 // Sign extend the LHS and perform an unsigned compare with the ADDW 11706 // result. Since the inputs are sign extended from i32, this is equivalent 11707 // to comparing the lower 32 bits. 11708 LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0)); 11709 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, LHS, 11710 IsAdd ? ISD::SETULT : ISD::SETUGT); 11711 } 11712 11713 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); 11714 Results.push_back(Overflow); 11715 return; 11716 } 11717 case ISD::UADDSAT: 11718 case ISD::USUBSAT: { 11719 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 11720 "Unexpected custom legalisation"); 11721 if (Subtarget.hasStdExtZbb()) { 11722 // With Zbb we can sign extend and let LegalizeDAG use minu/maxu. Using 11723 // sign extend allows overflow of the lower 32 bits to be detected on 11724 // the promoted size. 11725 SDValue LHS = 11726 DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0)); 11727 SDValue RHS = 11728 DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1)); 11729 SDValue Res = DAG.getNode(N->getOpcode(), DL, MVT::i64, LHS, RHS); 11730 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); 11731 return; 11732 } 11733 11734 // Without Zbb, expand to UADDO/USUBO+select which will trigger our custom 11735 // promotion for UADDO/USUBO. 11736 Results.push_back(expandAddSubSat(N, DAG)); 11737 return; 11738 } 11739 case ISD::ABS: { 11740 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 11741 "Unexpected custom legalisation"); 11742 11743 if (Subtarget.hasStdExtZbb()) { 11744 // Emit a special ABSW node that will be expanded to NEGW+MAX at isel. 11745 // This allows us to remember that the result is sign extended. Expanding 11746 // to NEGW+MAX here requires a Freeze which breaks ComputeNumSignBits. 11747 SDValue Src = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, 11748 N->getOperand(0)); 11749 SDValue Abs = DAG.getNode(RISCVISD::ABSW, DL, MVT::i64, Src); 11750 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Abs)); 11751 return; 11752 } 11753 11754 // Expand abs to Y = (sraiw X, 31); subw(xor(X, Y), Y) 11755 SDValue Src = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 11756 11757 // Freeze the source so we can increase it's use count. 11758 Src = DAG.getFreeze(Src); 11759 11760 // Copy sign bit to all bits using the sraiw pattern. 11761 SDValue SignFill = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Src, 11762 DAG.getValueType(MVT::i32)); 11763 SignFill = DAG.getNode(ISD::SRA, DL, MVT::i64, SignFill, 11764 DAG.getConstant(31, DL, MVT::i64)); 11765 11766 SDValue NewRes = DAG.getNode(ISD::XOR, DL, MVT::i64, Src, SignFill); 11767 NewRes = DAG.getNode(ISD::SUB, DL, MVT::i64, NewRes, SignFill); 11768 11769 // NOTE: The result is only required to be anyextended, but sext is 11770 // consistent with type legalization of sub. 11771 NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewRes, 11772 DAG.getValueType(MVT::i32)); 11773 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes)); 11774 return; 11775 } 11776 case ISD::BITCAST: { 11777 EVT VT = N->getValueType(0); 11778 assert(VT.isInteger() && !VT.isVector() && "Unexpected VT!"); 11779 SDValue Op0 = N->getOperand(0); 11780 EVT Op0VT = Op0.getValueType(); 11781 MVT XLenVT = Subtarget.getXLenVT(); 11782 if (VT == MVT::i16 && Op0VT == MVT::f16 && 11783 Subtarget.hasStdExtZfhminOrZhinxmin()) { 11784 SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0); 11785 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv)); 11786 } else if (VT == MVT::i16 && Op0VT == MVT::bf16 && 11787 Subtarget.hasStdExtZfbfmin()) { 11788 SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0); 11789 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv)); 11790 } else if (VT == MVT::i32 && Op0VT == MVT::f32 && Subtarget.is64Bit() && 11791 Subtarget.hasStdExtFOrZfinx()) { 11792 SDValue FPConv = 11793 DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0); 11794 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv)); 11795 } else if (VT == MVT::i64 && Op0VT == MVT::f64 && XLenVT == MVT::i32 && 11796 Subtarget.hasStdExtZfa()) { 11797 SDValue NewReg = DAG.getNode(RISCVISD::SplitF64, DL, 11798 DAG.getVTList(MVT::i32, MVT::i32), Op0); 11799 SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, 11800 NewReg.getValue(0), NewReg.getValue(1)); 11801 Results.push_back(RetReg); 11802 } else if (!VT.isVector() && Op0VT.isFixedLengthVector() && 11803 isTypeLegal(Op0VT)) { 11804 // Custom-legalize bitcasts from fixed-length vector types to illegal 11805 // scalar types in order to improve codegen. Bitcast the vector to a 11806 // one-element vector type whose element type is the same as the result 11807 // type, and extract the first element. 11808 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1); 11809 if (isTypeLegal(BVT)) { 11810 SDValue BVec = DAG.getBitcast(BVT, Op0); 11811 Results.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec, 11812 DAG.getConstant(0, DL, XLenVT))); 11813 } 11814 } 11815 break; 11816 } 11817 case RISCVISD::BREV8: { 11818 MVT VT = N->getSimpleValueType(0); 11819 MVT XLenVT = Subtarget.getXLenVT(); 11820 assert((VT == MVT::i16 || (VT == MVT::i32 && Subtarget.is64Bit())) && 11821 "Unexpected custom legalisation"); 11822 assert(Subtarget.hasStdExtZbkb() && "Unexpected extension"); 11823 SDValue NewOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, N->getOperand(0)); 11824 SDValue NewRes = DAG.getNode(N->getOpcode(), DL, XLenVT, NewOp); 11825 // ReplaceNodeResults requires we maintain the same type for the return 11826 // value. 11827 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NewRes)); 11828 break; 11829 } 11830 case ISD::EXTRACT_VECTOR_ELT: { 11831 // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element 11832 // type is illegal (currently only vXi64 RV32). 11833 // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are 11834 // transferred to the destination register. We issue two of these from the 11835 // upper- and lower- halves of the SEW-bit vector element, slid down to the 11836 // first element. 11837 SDValue Vec = N->getOperand(0); 11838 SDValue Idx = N->getOperand(1); 11839 11840 // The vector type hasn't been legalized yet so we can't issue target 11841 // specific nodes if it needs legalization. 11842 // FIXME: We would manually legalize if it's important. 11843 if (!isTypeLegal(Vec.getValueType())) 11844 return; 11845 11846 MVT VecVT = Vec.getSimpleValueType(); 11847 11848 assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 && 11849 VecVT.getVectorElementType() == MVT::i64 && 11850 "Unexpected EXTRACT_VECTOR_ELT legalization"); 11851 11852 // If this is a fixed vector, we need to convert it to a scalable vector. 11853 MVT ContainerVT = VecVT; 11854 if (VecVT.isFixedLengthVector()) { 11855 ContainerVT = getContainerForFixedLengthVector(VecVT); 11856 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 11857 } 11858 11859 MVT XLenVT = Subtarget.getXLenVT(); 11860 11861 // Use a VL of 1 to avoid processing more elements than we need. 11862 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget); 11863 11864 // Unless the index is known to be 0, we must slide the vector down to get 11865 // the desired element into index 0. 11866 if (!isNullConstant(Idx)) { 11867 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, 11868 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL); 11869 } 11870 11871 // Extract the lower XLEN bits of the correct vector element. 11872 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec); 11873 11874 // To extract the upper XLEN bits of the vector element, shift the first 11875 // element right by 32 bits and re-extract the lower XLEN bits. 11876 SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, 11877 DAG.getUNDEF(ContainerVT), 11878 DAG.getConstant(32, DL, XLenVT), VL); 11879 SDValue LShr32 = 11880 DAG.getNode(RISCVISD::SRL_VL, DL, ContainerVT, Vec, ThirtyTwoV, 11881 DAG.getUNDEF(ContainerVT), Mask, VL); 11882 11883 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32); 11884 11885 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi)); 11886 break; 11887 } 11888 case ISD::INTRINSIC_WO_CHAIN: { 11889 unsigned IntNo = N->getConstantOperandVal(0); 11890 switch (IntNo) { 11891 default: 11892 llvm_unreachable( 11893 "Don't know how to custom type legalize this intrinsic!"); 11894 case Intrinsic::experimental_get_vector_length: { 11895 SDValue Res = lowerGetVectorLength(N, DAG, Subtarget); 11896 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); 11897 return; 11898 } 11899 case Intrinsic::riscv_orc_b: 11900 case Intrinsic::riscv_brev8: 11901 case Intrinsic::riscv_sha256sig0: 11902 case Intrinsic::riscv_sha256sig1: 11903 case Intrinsic::riscv_sha256sum0: 11904 case Intrinsic::riscv_sha256sum1: 11905 case Intrinsic::riscv_sm3p0: 11906 case Intrinsic::riscv_sm3p1: { 11907 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32) 11908 return; 11909 unsigned Opc; 11910 switch (IntNo) { 11911 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break; 11912 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break; 11913 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break; 11914 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break; 11915 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break; 11916 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break; 11917 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break; 11918 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break; 11919 } 11920 11921 SDValue NewOp = 11922 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); 11923 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp); 11924 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); 11925 return; 11926 } 11927 case Intrinsic::riscv_sm4ks: 11928 case Intrinsic::riscv_sm4ed: { 11929 unsigned Opc = 11930 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED; 11931 SDValue NewOp0 = 11932 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); 11933 SDValue NewOp1 = 11934 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2)); 11935 SDValue Res = 11936 DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, N->getOperand(3)); 11937 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); 11938 return; 11939 } 11940 case Intrinsic::riscv_clmul: { 11941 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32) 11942 return; 11943 11944 SDValue NewOp0 = 11945 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); 11946 SDValue NewOp1 = 11947 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2)); 11948 SDValue Res = DAG.getNode(RISCVISD::CLMUL, DL, MVT::i64, NewOp0, NewOp1); 11949 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); 11950 return; 11951 } 11952 case Intrinsic::riscv_clmulh: 11953 case Intrinsic::riscv_clmulr: { 11954 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32) 11955 return; 11956 11957 // Extend inputs to XLen, and shift by 32. This will add 64 trailing zeros 11958 // to the full 128-bit clmul result of multiplying two xlen values. 11959 // Perform clmulr or clmulh on the shifted values. Finally, extract the 11960 // upper 32 bits. 11961 // 11962 // The alternative is to mask the inputs to 32 bits and use clmul, but 11963 // that requires two shifts to mask each input without zext.w. 11964 // FIXME: If the inputs are known zero extended or could be freely 11965 // zero extended, the mask form would be better. 11966 SDValue NewOp0 = 11967 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); 11968 SDValue NewOp1 = 11969 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2)); 11970 NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0, 11971 DAG.getConstant(32, DL, MVT::i64)); 11972 NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1, 11973 DAG.getConstant(32, DL, MVT::i64)); 11974 unsigned Opc = IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH 11975 : RISCVISD::CLMULR; 11976 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1); 11977 Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res, 11978 DAG.getConstant(32, DL, MVT::i64)); 11979 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); 11980 return; 11981 } 11982 case Intrinsic::riscv_vmv_x_s: { 11983 EVT VT = N->getValueType(0); 11984 MVT XLenVT = Subtarget.getXLenVT(); 11985 if (VT.bitsLT(XLenVT)) { 11986 // Simple case just extract using vmv.x.s and truncate. 11987 SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL, 11988 Subtarget.getXLenVT(), N->getOperand(1)); 11989 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract)); 11990 return; 11991 } 11992 11993 assert(VT == MVT::i64 && !Subtarget.is64Bit() && 11994 "Unexpected custom legalization"); 11995 11996 // We need to do the move in two steps. 11997 SDValue Vec = N->getOperand(1); 11998 MVT VecVT = Vec.getSimpleValueType(); 11999 12000 // First extract the lower XLEN bits of the element. 12001 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec); 12002 12003 // To extract the upper XLEN bits of the vector element, shift the first 12004 // element right by 32 bits and re-extract the lower XLEN bits. 12005 auto [Mask, VL] = getDefaultVLOps(1, VecVT, DL, DAG, Subtarget); 12006 12007 SDValue ThirtyTwoV = 12008 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT), 12009 DAG.getConstant(32, DL, XLenVT), VL); 12010 SDValue LShr32 = DAG.getNode(RISCVISD::SRL_VL, DL, VecVT, Vec, ThirtyTwoV, 12011 DAG.getUNDEF(VecVT), Mask, VL); 12012 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32); 12013 12014 Results.push_back( 12015 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi)); 12016 break; 12017 } 12018 } 12019 break; 12020 } 12021 case ISD::VECREDUCE_ADD: 12022 case ISD::VECREDUCE_AND: 12023 case ISD::VECREDUCE_OR: 12024 case ISD::VECREDUCE_XOR: 12025 case ISD::VECREDUCE_SMAX: 12026 case ISD::VECREDUCE_UMAX: 12027 case ISD::VECREDUCE_SMIN: 12028 case ISD::VECREDUCE_UMIN: 12029 if (SDValue V = lowerVECREDUCE(SDValue(N, 0), DAG)) 12030 Results.push_back(V); 12031 break; 12032 case ISD::VP_REDUCE_ADD: 12033 case ISD::VP_REDUCE_AND: 12034 case ISD::VP_REDUCE_OR: 12035 case ISD::VP_REDUCE_XOR: 12036 case ISD::VP_REDUCE_SMAX: 12037 case ISD::VP_REDUCE_UMAX: 12038 case ISD::VP_REDUCE_SMIN: 12039 case ISD::VP_REDUCE_UMIN: 12040 if (SDValue V = lowerVPREDUCE(SDValue(N, 0), DAG)) 12041 Results.push_back(V); 12042 break; 12043 case ISD::GET_ROUNDING: { 12044 SDVTList VTs = DAG.getVTList(Subtarget.getXLenVT(), MVT::Other); 12045 SDValue Res = DAG.getNode(ISD::GET_ROUNDING, DL, VTs, N->getOperand(0)); 12046 Results.push_back(Res.getValue(0)); 12047 Results.push_back(Res.getValue(1)); 12048 break; 12049 } 12050 } 12051} 12052 12053/// Given a binary operator, return the *associative* generic ISD::VECREDUCE_OP 12054/// which corresponds to it. 12055static unsigned getVecReduceOpcode(unsigned Opc) { 12056 switch (Opc) { 12057 default: 12058 llvm_unreachable("Unhandled binary to transfrom reduction"); 12059 case ISD::ADD: 12060 return ISD::VECREDUCE_ADD; 12061 case ISD::UMAX: 12062 return ISD::VECREDUCE_UMAX; 12063 case ISD::SMAX: 12064 return ISD::VECREDUCE_SMAX; 12065 case ISD::UMIN: 12066 return ISD::VECREDUCE_UMIN; 12067 case ISD::SMIN: 12068 return ISD::VECREDUCE_SMIN; 12069 case ISD::AND: 12070 return ISD::VECREDUCE_AND; 12071 case ISD::OR: 12072 return ISD::VECREDUCE_OR; 12073 case ISD::XOR: 12074 return ISD::VECREDUCE_XOR; 12075 case ISD::FADD: 12076 // Note: This is the associative form of the generic reduction opcode. 12077 return ISD::VECREDUCE_FADD; 12078 } 12079} 12080 12081/// Perform two related transforms whose purpose is to incrementally recognize 12082/// an explode_vector followed by scalar reduction as a vector reduction node. 12083/// This exists to recover from a deficiency in SLP which can't handle 12084/// forests with multiple roots sharing common nodes. In some cases, one 12085/// of the trees will be vectorized, and the other will remain (unprofitably) 12086/// scalarized. 12087static SDValue 12088combineBinOpOfExtractToReduceTree(SDNode *N, SelectionDAG &DAG, 12089 const RISCVSubtarget &Subtarget) { 12090 12091 // This transforms need to run before all integer types have been legalized 12092 // to i64 (so that the vector element type matches the add type), and while 12093 // it's safe to introduce odd sized vector types. 12094 if (DAG.NewNodesMustHaveLegalTypes) 12095 return SDValue(); 12096 12097 // Without V, this transform isn't useful. We could form the (illegal) 12098 // operations and let them be scalarized again, but there's really no point. 12099 if (!Subtarget.hasVInstructions()) 12100 return SDValue(); 12101 12102 const SDLoc DL(N); 12103 const EVT VT = N->getValueType(0); 12104 const unsigned Opc = N->getOpcode(); 12105 12106 // For FADD, we only handle the case with reassociation allowed. We 12107 // could handle strict reduction order, but at the moment, there's no 12108 // known reason to, and the complexity isn't worth it. 12109 // TODO: Handle fminnum and fmaxnum here 12110 if (!VT.isInteger() && 12111 (Opc != ISD::FADD || !N->getFlags().hasAllowReassociation())) 12112 return SDValue(); 12113 12114 const unsigned ReduceOpc = getVecReduceOpcode(Opc); 12115 assert(Opc == ISD::getVecReduceBaseOpcode(ReduceOpc) && 12116 "Inconsistent mappings"); 12117 SDValue LHS = N->getOperand(0); 12118 SDValue RHS = N->getOperand(1); 12119 12120 if (!LHS.hasOneUse() || !RHS.hasOneUse()) 12121 return SDValue(); 12122 12123 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT) 12124 std::swap(LHS, RHS); 12125 12126 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT || 12127 !isa<ConstantSDNode>(RHS.getOperand(1))) 12128 return SDValue(); 12129 12130 uint64_t RHSIdx = cast<ConstantSDNode>(RHS.getOperand(1))->getLimitedValue(); 12131 SDValue SrcVec = RHS.getOperand(0); 12132 EVT SrcVecVT = SrcVec.getValueType(); 12133 assert(SrcVecVT.getVectorElementType() == VT); 12134 if (SrcVecVT.isScalableVector()) 12135 return SDValue(); 12136 12137 if (SrcVecVT.getScalarSizeInBits() > Subtarget.getELen()) 12138 return SDValue(); 12139 12140 // match binop (extract_vector_elt V, 0), (extract_vector_elt V, 1) to 12141 // reduce_op (extract_subvector [2 x VT] from V). This will form the 12142 // root of our reduction tree. TODO: We could extend this to any two 12143 // adjacent aligned constant indices if desired. 12144 if (LHS.getOpcode() == ISD::EXTRACT_VECTOR_ELT && 12145 LHS.getOperand(0) == SrcVec && isa<ConstantSDNode>(LHS.getOperand(1))) { 12146 uint64_t LHSIdx = 12147 cast<ConstantSDNode>(LHS.getOperand(1))->getLimitedValue(); 12148 if (0 == std::min(LHSIdx, RHSIdx) && 1 == std::max(LHSIdx, RHSIdx)) { 12149 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, 2); 12150 SDValue Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ReduceVT, SrcVec, 12151 DAG.getVectorIdxConstant(0, DL)); 12152 return DAG.getNode(ReduceOpc, DL, VT, Vec, N->getFlags()); 12153 } 12154 } 12155 12156 // Match (binop (reduce (extract_subvector V, 0), 12157 // (extract_vector_elt V, sizeof(SubVec)))) 12158 // into a reduction of one more element from the original vector V. 12159 if (LHS.getOpcode() != ReduceOpc) 12160 return SDValue(); 12161 12162 SDValue ReduceVec = LHS.getOperand(0); 12163 if (ReduceVec.getOpcode() == ISD::EXTRACT_SUBVECTOR && 12164 ReduceVec.hasOneUse() && ReduceVec.getOperand(0) == RHS.getOperand(0) && 12165 isNullConstant(ReduceVec.getOperand(1)) && 12166 ReduceVec.getValueType().getVectorNumElements() == RHSIdx) { 12167 // For illegal types (e.g. 3xi32), most will be combined again into a 12168 // wider (hopefully legal) type. If this is a terminal state, we are 12169 // relying on type legalization here to produce something reasonable 12170 // and this lowering quality could probably be improved. (TODO) 12171 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, RHSIdx + 1); 12172 SDValue Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ReduceVT, SrcVec, 12173 DAG.getVectorIdxConstant(0, DL)); 12174 auto Flags = ReduceVec->getFlags(); 12175 Flags.intersectWith(N->getFlags()); 12176 return DAG.getNode(ReduceOpc, DL, VT, Vec, Flags); 12177 } 12178 12179 return SDValue(); 12180} 12181 12182 12183// Try to fold (<bop> x, (reduction.<bop> vec, start)) 12184static SDValue combineBinOpToReduce(SDNode *N, SelectionDAG &DAG, 12185 const RISCVSubtarget &Subtarget) { 12186 auto BinOpToRVVReduce = [](unsigned Opc) { 12187 switch (Opc) { 12188 default: 12189 llvm_unreachable("Unhandled binary to transfrom reduction"); 12190 case ISD::ADD: 12191 return RISCVISD::VECREDUCE_ADD_VL; 12192 case ISD::UMAX: 12193 return RISCVISD::VECREDUCE_UMAX_VL; 12194 case ISD::SMAX: 12195 return RISCVISD::VECREDUCE_SMAX_VL; 12196 case ISD::UMIN: 12197 return RISCVISD::VECREDUCE_UMIN_VL; 12198 case ISD::SMIN: 12199 return RISCVISD::VECREDUCE_SMIN_VL; 12200 case ISD::AND: 12201 return RISCVISD::VECREDUCE_AND_VL; 12202 case ISD::OR: 12203 return RISCVISD::VECREDUCE_OR_VL; 12204 case ISD::XOR: 12205 return RISCVISD::VECREDUCE_XOR_VL; 12206 case ISD::FADD: 12207 return RISCVISD::VECREDUCE_FADD_VL; 12208 case ISD::FMAXNUM: 12209 return RISCVISD::VECREDUCE_FMAX_VL; 12210 case ISD::FMINNUM: 12211 return RISCVISD::VECREDUCE_FMIN_VL; 12212 } 12213 }; 12214 12215 auto IsReduction = [&BinOpToRVVReduce](SDValue V, unsigned Opc) { 12216 return V.getOpcode() == ISD::EXTRACT_VECTOR_ELT && 12217 isNullConstant(V.getOperand(1)) && 12218 V.getOperand(0).getOpcode() == BinOpToRVVReduce(Opc); 12219 }; 12220 12221 unsigned Opc = N->getOpcode(); 12222 unsigned ReduceIdx; 12223 if (IsReduction(N->getOperand(0), Opc)) 12224 ReduceIdx = 0; 12225 else if (IsReduction(N->getOperand(1), Opc)) 12226 ReduceIdx = 1; 12227 else 12228 return SDValue(); 12229 12230 // Skip if FADD disallows reassociation but the combiner needs. 12231 if (Opc == ISD::FADD && !N->getFlags().hasAllowReassociation()) 12232 return SDValue(); 12233 12234 SDValue Extract = N->getOperand(ReduceIdx); 12235 SDValue Reduce = Extract.getOperand(0); 12236 if (!Extract.hasOneUse() || !Reduce.hasOneUse()) 12237 return SDValue(); 12238 12239 SDValue ScalarV = Reduce.getOperand(2); 12240 EVT ScalarVT = ScalarV.getValueType(); 12241 if (ScalarV.getOpcode() == ISD::INSERT_SUBVECTOR && 12242 ScalarV.getOperand(0)->isUndef() && 12243 isNullConstant(ScalarV.getOperand(2))) 12244 ScalarV = ScalarV.getOperand(1); 12245 12246 // Make sure that ScalarV is a splat with VL=1. 12247 if (ScalarV.getOpcode() != RISCVISD::VFMV_S_F_VL && 12248 ScalarV.getOpcode() != RISCVISD::VMV_S_X_VL && 12249 ScalarV.getOpcode() != RISCVISD::VMV_V_X_VL) 12250 return SDValue(); 12251 12252 if (!isNonZeroAVL(ScalarV.getOperand(2))) 12253 return SDValue(); 12254 12255 // Check the scalar of ScalarV is neutral element 12256 // TODO: Deal with value other than neutral element. 12257 if (!isNeutralConstant(N->getOpcode(), N->getFlags(), ScalarV.getOperand(1), 12258 0)) 12259 return SDValue(); 12260 12261 // If the AVL is zero, operand 0 will be returned. So it's not safe to fold. 12262 // FIXME: We might be able to improve this if operand 0 is undef. 12263 if (!isNonZeroAVL(Reduce.getOperand(5))) 12264 return SDValue(); 12265 12266 SDValue NewStart = N->getOperand(1 - ReduceIdx); 12267 12268 SDLoc DL(N); 12269 SDValue NewScalarV = 12270 lowerScalarInsert(NewStart, ScalarV.getOperand(2), 12271 ScalarV.getSimpleValueType(), DL, DAG, Subtarget); 12272 12273 // If we looked through an INSERT_SUBVECTOR we need to restore it. 12274 if (ScalarVT != ScalarV.getValueType()) 12275 NewScalarV = 12276 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ScalarVT, DAG.getUNDEF(ScalarVT), 12277 NewScalarV, DAG.getConstant(0, DL, Subtarget.getXLenVT())); 12278 12279 SDValue Ops[] = {Reduce.getOperand(0), Reduce.getOperand(1), 12280 NewScalarV, Reduce.getOperand(3), 12281 Reduce.getOperand(4), Reduce.getOperand(5)}; 12282 SDValue NewReduce = 12283 DAG.getNode(Reduce.getOpcode(), DL, Reduce.getValueType(), Ops); 12284 return DAG.getNode(Extract.getOpcode(), DL, Extract.getValueType(), NewReduce, 12285 Extract.getOperand(1)); 12286} 12287 12288// Optimize (add (shl x, c0), (shl y, c1)) -> 12289// (SLLI (SH*ADD x, y), c0), if c1-c0 equals to [1|2|3]. 12290static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG, 12291 const RISCVSubtarget &Subtarget) { 12292 // Perform this optimization only in the zba extension. 12293 if (!Subtarget.hasStdExtZba()) 12294 return SDValue(); 12295 12296 // Skip for vector types and larger types. 12297 EVT VT = N->getValueType(0); 12298 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen()) 12299 return SDValue(); 12300 12301 // The two operand nodes must be SHL and have no other use. 12302 SDValue N0 = N->getOperand(0); 12303 SDValue N1 = N->getOperand(1); 12304 if (N0->getOpcode() != ISD::SHL || N1->getOpcode() != ISD::SHL || 12305 !N0->hasOneUse() || !N1->hasOneUse()) 12306 return SDValue(); 12307 12308 // Check c0 and c1. 12309 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1)); 12310 auto *N1C = dyn_cast<ConstantSDNode>(N1->getOperand(1)); 12311 if (!N0C || !N1C) 12312 return SDValue(); 12313 int64_t C0 = N0C->getSExtValue(); 12314 int64_t C1 = N1C->getSExtValue(); 12315 if (C0 <= 0 || C1 <= 0) 12316 return SDValue(); 12317 12318 // Skip if SH1ADD/SH2ADD/SH3ADD are not applicable. 12319 int64_t Bits = std::min(C0, C1); 12320 int64_t Diff = std::abs(C0 - C1); 12321 if (Diff != 1 && Diff != 2 && Diff != 3) 12322 return SDValue(); 12323 12324 // Build nodes. 12325 SDLoc DL(N); 12326 SDValue NS = (C0 < C1) ? N0->getOperand(0) : N1->getOperand(0); 12327 SDValue NL = (C0 > C1) ? N0->getOperand(0) : N1->getOperand(0); 12328 SDValue NA0 = 12329 DAG.getNode(ISD::SHL, DL, VT, NL, DAG.getConstant(Diff, DL, VT)); 12330 SDValue NA1 = DAG.getNode(ISD::ADD, DL, VT, NA0, NS); 12331 return DAG.getNode(ISD::SHL, DL, VT, NA1, DAG.getConstant(Bits, DL, VT)); 12332} 12333 12334// Combine a constant select operand into its use: 12335// 12336// (and (select cond, -1, c), x) 12337// -> (select cond, x, (and x, c)) [AllOnes=1] 12338// (or (select cond, 0, c), x) 12339// -> (select cond, x, (or x, c)) [AllOnes=0] 12340// (xor (select cond, 0, c), x) 12341// -> (select cond, x, (xor x, c)) [AllOnes=0] 12342// (add (select cond, 0, c), x) 12343// -> (select cond, x, (add x, c)) [AllOnes=0] 12344// (sub x, (select cond, 0, c)) 12345// -> (select cond, x, (sub x, c)) [AllOnes=0] 12346static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, 12347 SelectionDAG &DAG, bool AllOnes, 12348 const RISCVSubtarget &Subtarget) { 12349 EVT VT = N->getValueType(0); 12350 12351 // Skip vectors. 12352 if (VT.isVector()) 12353 return SDValue(); 12354 12355 if (!Subtarget.hasConditionalMoveFusion()) { 12356 // (select cond, x, (and x, c)) has custom lowering with Zicond. 12357 if ((!Subtarget.hasStdExtZicond() && 12358 !Subtarget.hasVendorXVentanaCondOps()) || 12359 N->getOpcode() != ISD::AND) 12360 return SDValue(); 12361 12362 // Maybe harmful when condition code has multiple use. 12363 if (Slct.getOpcode() == ISD::SELECT && !Slct.getOperand(0).hasOneUse()) 12364 return SDValue(); 12365 12366 // Maybe harmful when VT is wider than XLen. 12367 if (VT.getSizeInBits() > Subtarget.getXLen()) 12368 return SDValue(); 12369 } 12370 12371 if ((Slct.getOpcode() != ISD::SELECT && 12372 Slct.getOpcode() != RISCVISD::SELECT_CC) || 12373 !Slct.hasOneUse()) 12374 return SDValue(); 12375 12376 auto isZeroOrAllOnes = [](SDValue N, bool AllOnes) { 12377 return AllOnes ? isAllOnesConstant(N) : isNullConstant(N); 12378 }; 12379 12380 bool SwapSelectOps; 12381 unsigned OpOffset = Slct.getOpcode() == RISCVISD::SELECT_CC ? 2 : 0; 12382 SDValue TrueVal = Slct.getOperand(1 + OpOffset); 12383 SDValue FalseVal = Slct.getOperand(2 + OpOffset); 12384 SDValue NonConstantVal; 12385 if (isZeroOrAllOnes(TrueVal, AllOnes)) { 12386 SwapSelectOps = false; 12387 NonConstantVal = FalseVal; 12388 } else if (isZeroOrAllOnes(FalseVal, AllOnes)) { 12389 SwapSelectOps = true; 12390 NonConstantVal = TrueVal; 12391 } else 12392 return SDValue(); 12393 12394 // Slct is now know to be the desired identity constant when CC is true. 12395 TrueVal = OtherOp; 12396 FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT, OtherOp, NonConstantVal); 12397 // Unless SwapSelectOps says the condition should be false. 12398 if (SwapSelectOps) 12399 std::swap(TrueVal, FalseVal); 12400 12401 if (Slct.getOpcode() == RISCVISD::SELECT_CC) 12402 return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), VT, 12403 {Slct.getOperand(0), Slct.getOperand(1), 12404 Slct.getOperand(2), TrueVal, FalseVal}); 12405 12406 return DAG.getNode(ISD::SELECT, SDLoc(N), VT, 12407 {Slct.getOperand(0), TrueVal, FalseVal}); 12408} 12409 12410// Attempt combineSelectAndUse on each operand of a commutative operator N. 12411static SDValue combineSelectAndUseCommutative(SDNode *N, SelectionDAG &DAG, 12412 bool AllOnes, 12413 const RISCVSubtarget &Subtarget) { 12414 SDValue N0 = N->getOperand(0); 12415 SDValue N1 = N->getOperand(1); 12416 if (SDValue Result = combineSelectAndUse(N, N0, N1, DAG, AllOnes, Subtarget)) 12417 return Result; 12418 if (SDValue Result = combineSelectAndUse(N, N1, N0, DAG, AllOnes, Subtarget)) 12419 return Result; 12420 return SDValue(); 12421} 12422 12423// Transform (add (mul x, c0), c1) -> 12424// (add (mul (add x, c1/c0), c0), c1%c0). 12425// if c1/c0 and c1%c0 are simm12, while c1 is not. A special corner case 12426// that should be excluded is when c0*(c1/c0) is simm12, which will lead 12427// to an infinite loop in DAGCombine if transformed. 12428// Or transform (add (mul x, c0), c1) -> 12429// (add (mul (add x, c1/c0+1), c0), c1%c0-c0), 12430// if c1/c0+1 and c1%c0-c0 are simm12, while c1 is not. A special corner 12431// case that should be excluded is when c0*(c1/c0+1) is simm12, which will 12432// lead to an infinite loop in DAGCombine if transformed. 12433// Or transform (add (mul x, c0), c1) -> 12434// (add (mul (add x, c1/c0-1), c0), c1%c0+c0), 12435// if c1/c0-1 and c1%c0+c0 are simm12, while c1 is not. A special corner 12436// case that should be excluded is when c0*(c1/c0-1) is simm12, which will 12437// lead to an infinite loop in DAGCombine if transformed. 12438// Or transform (add (mul x, c0), c1) -> 12439// (mul (add x, c1/c0), c0). 12440// if c1%c0 is zero, and c1/c0 is simm12 while c1 is not. 12441static SDValue transformAddImmMulImm(SDNode *N, SelectionDAG &DAG, 12442 const RISCVSubtarget &Subtarget) { 12443 // Skip for vector types and larger types. 12444 EVT VT = N->getValueType(0); 12445 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen()) 12446 return SDValue(); 12447 // The first operand node must be a MUL and has no other use. 12448 SDValue N0 = N->getOperand(0); 12449 if (!N0->hasOneUse() || N0->getOpcode() != ISD::MUL) 12450 return SDValue(); 12451 // Check if c0 and c1 match above conditions. 12452 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1)); 12453 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)); 12454 if (!N0C || !N1C) 12455 return SDValue(); 12456 // If N0C has multiple uses it's possible one of the cases in 12457 // DAGCombiner::isMulAddWithConstProfitable will be true, which would result 12458 // in an infinite loop. 12459 if (!N0C->hasOneUse()) 12460 return SDValue(); 12461 int64_t C0 = N0C->getSExtValue(); 12462 int64_t C1 = N1C->getSExtValue(); 12463 int64_t CA, CB; 12464 if (C0 == -1 || C0 == 0 || C0 == 1 || isInt<12>(C1)) 12465 return SDValue(); 12466 // Search for proper CA (non-zero) and CB that both are simm12. 12467 if ((C1 / C0) != 0 && isInt<12>(C1 / C0) && isInt<12>(C1 % C0) && 12468 !isInt<12>(C0 * (C1 / C0))) { 12469 CA = C1 / C0; 12470 CB = C1 % C0; 12471 } else if ((C1 / C0 + 1) != 0 && isInt<12>(C1 / C0 + 1) && 12472 isInt<12>(C1 % C0 - C0) && !isInt<12>(C0 * (C1 / C0 + 1))) { 12473 CA = C1 / C0 + 1; 12474 CB = C1 % C0 - C0; 12475 } else if ((C1 / C0 - 1) != 0 && isInt<12>(C1 / C0 - 1) && 12476 isInt<12>(C1 % C0 + C0) && !isInt<12>(C0 * (C1 / C0 - 1))) { 12477 CA = C1 / C0 - 1; 12478 CB = C1 % C0 + C0; 12479 } else 12480 return SDValue(); 12481 // Build new nodes (add (mul (add x, c1/c0), c0), c1%c0). 12482 SDLoc DL(N); 12483 SDValue New0 = DAG.getNode(ISD::ADD, DL, VT, N0->getOperand(0), 12484 DAG.getConstant(CA, DL, VT)); 12485 SDValue New1 = 12486 DAG.getNode(ISD::MUL, DL, VT, New0, DAG.getConstant(C0, DL, VT)); 12487 return DAG.getNode(ISD::ADD, DL, VT, New1, DAG.getConstant(CB, DL, VT)); 12488} 12489 12490// Try to turn (add (xor bool, 1) -1) into (neg bool). 12491static SDValue combineAddOfBooleanXor(SDNode *N, SelectionDAG &DAG) { 12492 SDValue N0 = N->getOperand(0); 12493 SDValue N1 = N->getOperand(1); 12494 EVT VT = N->getValueType(0); 12495 SDLoc DL(N); 12496 12497 // RHS should be -1. 12498 if (!isAllOnesConstant(N1)) 12499 return SDValue(); 12500 12501 // Look for (xor X, 1). 12502 if (N0.getOpcode() != ISD::XOR || !isOneConstant(N0.getOperand(1))) 12503 return SDValue(); 12504 12505 // First xor input should be 0 or 1. 12506 APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), 1); 12507 if (!DAG.MaskedValueIsZero(N0.getOperand(0), Mask)) 12508 return SDValue(); 12509 12510 // Emit a negate of the setcc. 12511 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), 12512 N0.getOperand(0)); 12513} 12514 12515static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG, 12516 const RISCVSubtarget &Subtarget) { 12517 if (SDValue V = combineAddOfBooleanXor(N, DAG)) 12518 return V; 12519 if (SDValue V = transformAddImmMulImm(N, DAG, Subtarget)) 12520 return V; 12521 if (SDValue V = transformAddShlImm(N, DAG, Subtarget)) 12522 return V; 12523 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget)) 12524 return V; 12525 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget)) 12526 return V; 12527 12528 // fold (add (select lhs, rhs, cc, 0, y), x) -> 12529 // (select lhs, rhs, cc, x, (add x, y)) 12530 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget); 12531} 12532 12533// Try to turn a sub boolean RHS and constant LHS into an addi. 12534static SDValue combineSubOfBoolean(SDNode *N, SelectionDAG &DAG) { 12535 SDValue N0 = N->getOperand(0); 12536 SDValue N1 = N->getOperand(1); 12537 EVT VT = N->getValueType(0); 12538 SDLoc DL(N); 12539 12540 // Require a constant LHS. 12541 auto *N0C = dyn_cast<ConstantSDNode>(N0); 12542 if (!N0C) 12543 return SDValue(); 12544 12545 // All our optimizations involve subtracting 1 from the immediate and forming 12546 // an ADDI. Make sure the new immediate is valid for an ADDI. 12547 APInt ImmValMinus1 = N0C->getAPIntValue() - 1; 12548 if (!ImmValMinus1.isSignedIntN(12)) 12549 return SDValue(); 12550 12551 SDValue NewLHS; 12552 if (N1.getOpcode() == ISD::SETCC && N1.hasOneUse()) { 12553 // (sub constant, (setcc x, y, eq/neq)) -> 12554 // (add (setcc x, y, neq/eq), constant - 1) 12555 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get(); 12556 EVT SetCCOpVT = N1.getOperand(0).getValueType(); 12557 if (!isIntEqualitySetCC(CCVal) || !SetCCOpVT.isInteger()) 12558 return SDValue(); 12559 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT); 12560 NewLHS = 12561 DAG.getSetCC(SDLoc(N1), VT, N1.getOperand(0), N1.getOperand(1), CCVal); 12562 } else if (N1.getOpcode() == ISD::XOR && isOneConstant(N1.getOperand(1)) && 12563 N1.getOperand(0).getOpcode() == ISD::SETCC) { 12564 // (sub C, (xor (setcc), 1)) -> (add (setcc), C-1). 12565 // Since setcc returns a bool the xor is equivalent to 1-setcc. 12566 NewLHS = N1.getOperand(0); 12567 } else 12568 return SDValue(); 12569 12570 SDValue NewRHS = DAG.getConstant(ImmValMinus1, DL, VT); 12571 return DAG.getNode(ISD::ADD, DL, VT, NewLHS, NewRHS); 12572} 12573 12574static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG, 12575 const RISCVSubtarget &Subtarget) { 12576 if (SDValue V = combineSubOfBoolean(N, DAG)) 12577 return V; 12578 12579 SDValue N0 = N->getOperand(0); 12580 SDValue N1 = N->getOperand(1); 12581 // fold (sub 0, (setcc x, 0, setlt)) -> (sra x, xlen - 1) 12582 if (isNullConstant(N0) && N1.getOpcode() == ISD::SETCC && N1.hasOneUse() && 12583 isNullConstant(N1.getOperand(1))) { 12584 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get(); 12585 if (CCVal == ISD::SETLT) { 12586 EVT VT = N->getValueType(0); 12587 SDLoc DL(N); 12588 unsigned ShAmt = N0.getValueSizeInBits() - 1; 12589 return DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), 12590 DAG.getConstant(ShAmt, DL, VT)); 12591 } 12592 } 12593 12594 // fold (sub x, (select lhs, rhs, cc, 0, y)) -> 12595 // (select lhs, rhs, cc, x, (sub x, y)) 12596 return combineSelectAndUse(N, N1, N0, DAG, /*AllOnes*/ false, Subtarget); 12597} 12598 12599// Apply DeMorgan's law to (and/or (xor X, 1), (xor Y, 1)) if X and Y are 0/1. 12600// Legalizing setcc can introduce xors like this. Doing this transform reduces 12601// the number of xors and may allow the xor to fold into a branch condition. 12602static SDValue combineDeMorganOfBoolean(SDNode *N, SelectionDAG &DAG) { 12603 SDValue N0 = N->getOperand(0); 12604 SDValue N1 = N->getOperand(1); 12605 bool IsAnd = N->getOpcode() == ISD::AND; 12606 12607 if (N0.getOpcode() != ISD::XOR || N1.getOpcode() != ISD::XOR) 12608 return SDValue(); 12609 12610 if (!N0.hasOneUse() || !N1.hasOneUse()) 12611 return SDValue(); 12612 12613 SDValue N01 = N0.getOperand(1); 12614 SDValue N11 = N1.getOperand(1); 12615 12616 // For AND, SimplifyDemandedBits may have turned one of the (xor X, 1) into 12617 // (xor X, -1) based on the upper bits of the other operand being 0. If the 12618 // operation is And, allow one of the Xors to use -1. 12619 if (isOneConstant(N01)) { 12620 if (!isOneConstant(N11) && !(IsAnd && isAllOnesConstant(N11))) 12621 return SDValue(); 12622 } else if (isOneConstant(N11)) { 12623 // N01 and N11 being 1 was already handled. Handle N11==1 and N01==-1. 12624 if (!(IsAnd && isAllOnesConstant(N01))) 12625 return SDValue(); 12626 } else 12627 return SDValue(); 12628 12629 EVT VT = N->getValueType(0); 12630 12631 SDValue N00 = N0.getOperand(0); 12632 SDValue N10 = N1.getOperand(0); 12633 12634 // The LHS of the xors needs to be 0/1. 12635 APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), 1); 12636 if (!DAG.MaskedValueIsZero(N00, Mask) || !DAG.MaskedValueIsZero(N10, Mask)) 12637 return SDValue(); 12638 12639 // Invert the opcode and insert a new xor. 12640 SDLoc DL(N); 12641 unsigned Opc = IsAnd ? ISD::OR : ISD::AND; 12642 SDValue Logic = DAG.getNode(Opc, DL, VT, N00, N10); 12643 return DAG.getNode(ISD::XOR, DL, VT, Logic, DAG.getConstant(1, DL, VT)); 12644} 12645 12646static SDValue performTRUNCATECombine(SDNode *N, SelectionDAG &DAG, 12647 const RISCVSubtarget &Subtarget) { 12648 SDValue N0 = N->getOperand(0); 12649 EVT VT = N->getValueType(0); 12650 12651 // Pre-promote (i1 (truncate (srl X, Y))) on RV64 with Zbs without zero 12652 // extending X. This is safe since we only need the LSB after the shift and 12653 // shift amounts larger than 31 would produce poison. If we wait until 12654 // type legalization, we'll create RISCVISD::SRLW and we can't recover it 12655 // to use a BEXT instruction. 12656 if (!RV64LegalI32 && Subtarget.is64Bit() && Subtarget.hasStdExtZbs() && VT == MVT::i1 && 12657 N0.getValueType() == MVT::i32 && N0.getOpcode() == ISD::SRL && 12658 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) { 12659 SDLoc DL(N0); 12660 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0)); 12661 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1)); 12662 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1); 12663 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Srl); 12664 } 12665 12666 return SDValue(); 12667} 12668 12669// Combines two comparison operation and logic operation to one selection 12670// operation(min, max) and logic operation. Returns new constructed Node if 12671// conditions for optimization are satisfied. 12672static SDValue performANDCombine(SDNode *N, 12673 TargetLowering::DAGCombinerInfo &DCI, 12674 const RISCVSubtarget &Subtarget) { 12675 SelectionDAG &DAG = DCI.DAG; 12676 12677 SDValue N0 = N->getOperand(0); 12678 // Pre-promote (i32 (and (srl X, Y), 1)) on RV64 with Zbs without zero 12679 // extending X. This is safe since we only need the LSB after the shift and 12680 // shift amounts larger than 31 would produce poison. If we wait until 12681 // type legalization, we'll create RISCVISD::SRLW and we can't recover it 12682 // to use a BEXT instruction. 12683 if (!RV64LegalI32 && Subtarget.is64Bit() && Subtarget.hasStdExtZbs() && 12684 N->getValueType(0) == MVT::i32 && isOneConstant(N->getOperand(1)) && 12685 N0.getOpcode() == ISD::SRL && !isa<ConstantSDNode>(N0.getOperand(1)) && 12686 N0.hasOneUse()) { 12687 SDLoc DL(N); 12688 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0)); 12689 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1)); 12690 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1); 12691 SDValue And = DAG.getNode(ISD::AND, DL, MVT::i64, Srl, 12692 DAG.getConstant(1, DL, MVT::i64)); 12693 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And); 12694 } 12695 12696 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget)) 12697 return V; 12698 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget)) 12699 return V; 12700 12701 if (DCI.isAfterLegalizeDAG()) 12702 if (SDValue V = combineDeMorganOfBoolean(N, DAG)) 12703 return V; 12704 12705 // fold (and (select lhs, rhs, cc, -1, y), x) -> 12706 // (select lhs, rhs, cc, x, (and x, y)) 12707 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ true, Subtarget); 12708} 12709 12710// Try to pull an xor with 1 through a select idiom that uses czero_eqz/nez. 12711// FIXME: Generalize to other binary operators with same operand. 12712static SDValue combineOrOfCZERO(SDNode *N, SDValue N0, SDValue N1, 12713 SelectionDAG &DAG) { 12714 assert(N->getOpcode() == ISD::OR && "Unexpected opcode"); 12715 12716 if (N0.getOpcode() != RISCVISD::CZERO_EQZ || 12717 N1.getOpcode() != RISCVISD::CZERO_NEZ || 12718 !N0.hasOneUse() || !N1.hasOneUse()) 12719 return SDValue(); 12720 12721 // Should have the same condition. 12722 SDValue Cond = N0.getOperand(1); 12723 if (Cond != N1.getOperand(1)) 12724 return SDValue(); 12725 12726 SDValue TrueV = N0.getOperand(0); 12727 SDValue FalseV = N1.getOperand(0); 12728 12729 if (TrueV.getOpcode() != ISD::XOR || FalseV.getOpcode() != ISD::XOR || 12730 TrueV.getOperand(1) != FalseV.getOperand(1) || 12731 !isOneConstant(TrueV.getOperand(1)) || 12732 !TrueV.hasOneUse() || !FalseV.hasOneUse()) 12733 return SDValue(); 12734 12735 EVT VT = N->getValueType(0); 12736 SDLoc DL(N); 12737 12738 SDValue NewN0 = DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV.getOperand(0), 12739 Cond); 12740 SDValue NewN1 = DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV.getOperand(0), 12741 Cond); 12742 SDValue NewOr = DAG.getNode(ISD::OR, DL, VT, NewN0, NewN1); 12743 return DAG.getNode(ISD::XOR, DL, VT, NewOr, TrueV.getOperand(1)); 12744} 12745 12746static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, 12747 const RISCVSubtarget &Subtarget) { 12748 SelectionDAG &DAG = DCI.DAG; 12749 12750 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget)) 12751 return V; 12752 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget)) 12753 return V; 12754 12755 if (DCI.isAfterLegalizeDAG()) 12756 if (SDValue V = combineDeMorganOfBoolean(N, DAG)) 12757 return V; 12758 12759 // Look for Or of CZERO_EQZ/NEZ with same condition which is the select idiom. 12760 // We may be able to pull a common operation out of the true and false value. 12761 SDValue N0 = N->getOperand(0); 12762 SDValue N1 = N->getOperand(1); 12763 if (SDValue V = combineOrOfCZERO(N, N0, N1, DAG)) 12764 return V; 12765 if (SDValue V = combineOrOfCZERO(N, N1, N0, DAG)) 12766 return V; 12767 12768 // fold (or (select cond, 0, y), x) -> 12769 // (select cond, x, (or x, y)) 12770 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget); 12771} 12772 12773static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, 12774 const RISCVSubtarget &Subtarget) { 12775 SDValue N0 = N->getOperand(0); 12776 SDValue N1 = N->getOperand(1); 12777 12778 // Pre-promote (i32 (xor (shl -1, X), ~0)) on RV64 with Zbs so we can use 12779 // (ADDI (BSET X0, X), -1). If we wait until/ type legalization, we'll create 12780 // RISCVISD:::SLLW and we can't recover it to use a BSET instruction. 12781 if (!RV64LegalI32 && Subtarget.is64Bit() && Subtarget.hasStdExtZbs() && 12782 N->getValueType(0) == MVT::i32 && isAllOnesConstant(N1) && 12783 N0.getOpcode() == ISD::SHL && isAllOnesConstant(N0.getOperand(0)) && 12784 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) { 12785 SDLoc DL(N); 12786 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0)); 12787 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1)); 12788 SDValue Shl = DAG.getNode(ISD::SHL, DL, MVT::i64, Op0, Op1); 12789 SDValue And = DAG.getNOT(DL, Shl, MVT::i64); 12790 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And); 12791 } 12792 12793 // fold (xor (sllw 1, x), -1) -> (rolw ~1, x) 12794 // NOTE: Assumes ROL being legal means ROLW is legal. 12795 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 12796 if (N0.getOpcode() == RISCVISD::SLLW && 12797 isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0)) && 12798 TLI.isOperationLegal(ISD::ROTL, MVT::i64)) { 12799 SDLoc DL(N); 12800 return DAG.getNode(RISCVISD::ROLW, DL, MVT::i64, 12801 DAG.getConstant(~1, DL, MVT::i64), N0.getOperand(1)); 12802 } 12803 12804 // Fold (xor (setcc constant, y, setlt), 1) -> (setcc y, constant + 1, setlt) 12805 if (N0.getOpcode() == ISD::SETCC && isOneConstant(N1) && N0.hasOneUse()) { 12806 auto *ConstN00 = dyn_cast<ConstantSDNode>(N0.getOperand(0)); 12807 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); 12808 if (ConstN00 && CC == ISD::SETLT) { 12809 EVT VT = N0.getValueType(); 12810 SDLoc DL(N0); 12811 const APInt &Imm = ConstN00->getAPIntValue(); 12812 if ((Imm + 1).isSignedIntN(12)) 12813 return DAG.getSetCC(DL, VT, N0.getOperand(1), 12814 DAG.getConstant(Imm + 1, DL, VT), CC); 12815 } 12816 } 12817 12818 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget)) 12819 return V; 12820 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget)) 12821 return V; 12822 12823 // fold (xor (select cond, 0, y), x) -> 12824 // (select cond, x, (xor x, y)) 12825 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget); 12826} 12827 12828static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG) { 12829 EVT VT = N->getValueType(0); 12830 if (!VT.isVector()) 12831 return SDValue(); 12832 12833 SDLoc DL(N); 12834 SDValue N0 = N->getOperand(0); 12835 SDValue N1 = N->getOperand(1); 12836 SDValue MulOper; 12837 unsigned AddSubOpc; 12838 12839 // vmadd: (mul (add x, 1), y) -> (add (mul x, y), y) 12840 // (mul x, add (y, 1)) -> (add x, (mul x, y)) 12841 // vnmsub: (mul (sub 1, x), y) -> (sub y, (mul x, y)) 12842 // (mul x, (sub 1, y)) -> (sub x, (mul x, y)) 12843 auto IsAddSubWith1 = [&](SDValue V) -> bool { 12844 AddSubOpc = V->getOpcode(); 12845 if ((AddSubOpc == ISD::ADD || AddSubOpc == ISD::SUB) && V->hasOneUse()) { 12846 SDValue Opnd = V->getOperand(1); 12847 MulOper = V->getOperand(0); 12848 if (AddSubOpc == ISD::SUB) 12849 std::swap(Opnd, MulOper); 12850 if (isOneOrOneSplat(Opnd)) 12851 return true; 12852 } 12853 return false; 12854 }; 12855 12856 if (IsAddSubWith1(N0)) { 12857 SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N1, MulOper); 12858 return DAG.getNode(AddSubOpc, DL, VT, N1, MulVal); 12859 } 12860 12861 if (IsAddSubWith1(N1)) { 12862 SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N0, MulOper); 12863 return DAG.getNode(AddSubOpc, DL, VT, N0, MulVal); 12864 } 12865 12866 return SDValue(); 12867} 12868 12869/// According to the property that indexed load/store instructions zero-extend 12870/// their indices, try to narrow the type of index operand. 12871static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG) { 12872 if (isIndexTypeSigned(IndexType)) 12873 return false; 12874 12875 if (!N->hasOneUse()) 12876 return false; 12877 12878 EVT VT = N.getValueType(); 12879 SDLoc DL(N); 12880 12881 // In general, what we're doing here is seeing if we can sink a truncate to 12882 // a smaller element type into the expression tree building our index. 12883 // TODO: We can generalize this and handle a bunch more cases if useful. 12884 12885 // Narrow a buildvector to the narrowest element type. This requires less 12886 // work and less register pressure at high LMUL, and creates smaller constants 12887 // which may be cheaper to materialize. 12888 if (ISD::isBuildVectorOfConstantSDNodes(N.getNode())) { 12889 KnownBits Known = DAG.computeKnownBits(N); 12890 unsigned ActiveBits = std::max(8u, Known.countMaxActiveBits()); 12891 LLVMContext &C = *DAG.getContext(); 12892 EVT ResultVT = EVT::getIntegerVT(C, ActiveBits).getRoundIntegerType(C); 12893 if (ResultVT.bitsLT(VT.getVectorElementType())) { 12894 N = DAG.getNode(ISD::TRUNCATE, DL, 12895 VT.changeVectorElementType(ResultVT), N); 12896 return true; 12897 } 12898 } 12899 12900 // Handle the pattern (shl (zext x to ty), C) and bits(x) + C < bits(ty). 12901 if (N.getOpcode() != ISD::SHL) 12902 return false; 12903 12904 SDValue N0 = N.getOperand(0); 12905 if (N0.getOpcode() != ISD::ZERO_EXTEND && 12906 N0.getOpcode() != RISCVISD::VZEXT_VL) 12907 return false; 12908 if (!N0->hasOneUse()) 12909 return false; 12910 12911 APInt ShAmt; 12912 SDValue N1 = N.getOperand(1); 12913 if (!ISD::isConstantSplatVector(N1.getNode(), ShAmt)) 12914 return false; 12915 12916 SDValue Src = N0.getOperand(0); 12917 EVT SrcVT = Src.getValueType(); 12918 unsigned SrcElen = SrcVT.getScalarSizeInBits(); 12919 unsigned ShAmtV = ShAmt.getZExtValue(); 12920 unsigned NewElen = PowerOf2Ceil(SrcElen + ShAmtV); 12921 NewElen = std::max(NewElen, 8U); 12922 12923 // Skip if NewElen is not narrower than the original extended type. 12924 if (NewElen >= N0.getValueType().getScalarSizeInBits()) 12925 return false; 12926 12927 EVT NewEltVT = EVT::getIntegerVT(*DAG.getContext(), NewElen); 12928 EVT NewVT = SrcVT.changeVectorElementType(NewEltVT); 12929 12930 SDValue NewExt = DAG.getNode(N0->getOpcode(), DL, NewVT, N0->ops()); 12931 SDValue NewShAmtVec = DAG.getConstant(ShAmtV, DL, NewVT); 12932 N = DAG.getNode(ISD::SHL, DL, NewVT, NewExt, NewShAmtVec); 12933 return true; 12934} 12935 12936// Replace (seteq (i64 (and X, 0xffffffff)), C1) with 12937// (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from 12938// bit 31. Same for setne. C1' may be cheaper to materialize and the sext_inreg 12939// can become a sext.w instead of a shift pair. 12940static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG, 12941 const RISCVSubtarget &Subtarget) { 12942 SDValue N0 = N->getOperand(0); 12943 SDValue N1 = N->getOperand(1); 12944 EVT VT = N->getValueType(0); 12945 EVT OpVT = N0.getValueType(); 12946 12947 if (OpVT != MVT::i64 || !Subtarget.is64Bit()) 12948 return SDValue(); 12949 12950 // RHS needs to be a constant. 12951 auto *N1C = dyn_cast<ConstantSDNode>(N1); 12952 if (!N1C) 12953 return SDValue(); 12954 12955 // LHS needs to be (and X, 0xffffffff). 12956 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() || 12957 !isa<ConstantSDNode>(N0.getOperand(1)) || 12958 N0.getConstantOperandVal(1) != UINT64_C(0xffffffff)) 12959 return SDValue(); 12960 12961 // Looking for an equality compare. 12962 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get(); 12963 if (!isIntEqualitySetCC(Cond)) 12964 return SDValue(); 12965 12966 // Don't do this if the sign bit is provably zero, it will be turned back into 12967 // an AND. 12968 APInt SignMask = APInt::getOneBitSet(64, 31); 12969 if (DAG.MaskedValueIsZero(N0.getOperand(0), SignMask)) 12970 return SDValue(); 12971 12972 const APInt &C1 = N1C->getAPIntValue(); 12973 12974 SDLoc dl(N); 12975 // If the constant is larger than 2^32 - 1 it is impossible for both sides 12976 // to be equal. 12977 if (C1.getActiveBits() > 32) 12978 return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT); 12979 12980 SDValue SExtOp = DAG.getNode(ISD::SIGN_EXTEND_INREG, N, OpVT, 12981 N0.getOperand(0), DAG.getValueType(MVT::i32)); 12982 return DAG.getSetCC(dl, VT, SExtOp, DAG.getConstant(C1.trunc(32).sext(64), 12983 dl, OpVT), Cond); 12984} 12985 12986static SDValue 12987performSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG, 12988 const RISCVSubtarget &Subtarget) { 12989 SDValue Src = N->getOperand(0); 12990 EVT VT = N->getValueType(0); 12991 12992 // Fold (sext_inreg (fmv_x_anyexth X), i16) -> (fmv_x_signexth X) 12993 if (Src.getOpcode() == RISCVISD::FMV_X_ANYEXTH && 12994 cast<VTSDNode>(N->getOperand(1))->getVT().bitsGE(MVT::i16)) 12995 return DAG.getNode(RISCVISD::FMV_X_SIGNEXTH, SDLoc(N), VT, 12996 Src.getOperand(0)); 12997 12998 return SDValue(); 12999} 13000 13001namespace { 13002// Forward declaration of the structure holding the necessary information to 13003// apply a combine. 13004struct CombineResult; 13005 13006/// Helper class for folding sign/zero extensions. 13007/// In particular, this class is used for the following combines: 13008/// add | add_vl -> vwadd(u) | vwadd(u)_w 13009/// sub | sub_vl -> vwsub(u) | vwsub(u)_w 13010/// mul | mul_vl -> vwmul(u) | vwmul_su 13011/// 13012/// An object of this class represents an operand of the operation we want to 13013/// combine. 13014/// E.g., when trying to combine `mul_vl a, b`, we will have one instance of 13015/// NodeExtensionHelper for `a` and one for `b`. 13016/// 13017/// This class abstracts away how the extension is materialized and 13018/// how its Mask, VL, number of users affect the combines. 13019/// 13020/// In particular: 13021/// - VWADD_W is conceptually == add(op0, sext(op1)) 13022/// - VWADDU_W == add(op0, zext(op1)) 13023/// - VWSUB_W == sub(op0, sext(op1)) 13024/// - VWSUBU_W == sub(op0, zext(op1)) 13025/// 13026/// And VMV_V_X_VL, depending on the value, is conceptually equivalent to 13027/// zext|sext(smaller_value). 13028struct NodeExtensionHelper { 13029 /// Records if this operand is like being zero extended. 13030 bool SupportsZExt; 13031 /// Records if this operand is like being sign extended. 13032 /// Note: SupportsZExt and SupportsSExt are not mutually exclusive. For 13033 /// instance, a splat constant (e.g., 3), would support being both sign and 13034 /// zero extended. 13035 bool SupportsSExt; 13036 /// This boolean captures whether we care if this operand would still be 13037 /// around after the folding happens. 13038 bool EnforceOneUse; 13039 /// Records if this operand's mask needs to match the mask of the operation 13040 /// that it will fold into. 13041 bool CheckMask; 13042 /// Value of the Mask for this operand. 13043 /// It may be SDValue(). 13044 SDValue Mask; 13045 /// Value of the vector length operand. 13046 /// It may be SDValue(). 13047 SDValue VL; 13048 /// Original value that this NodeExtensionHelper represents. 13049 SDValue OrigOperand; 13050 13051 /// Get the value feeding the extension or the value itself. 13052 /// E.g., for zext(a), this would return a. 13053 SDValue getSource() const { 13054 switch (OrigOperand.getOpcode()) { 13055 case ISD::ZERO_EXTEND: 13056 case ISD::SIGN_EXTEND: 13057 case RISCVISD::VSEXT_VL: 13058 case RISCVISD::VZEXT_VL: 13059 return OrigOperand.getOperand(0); 13060 default: 13061 return OrigOperand; 13062 } 13063 } 13064 13065 /// Check if this instance represents a splat. 13066 bool isSplat() const { 13067 return OrigOperand.getOpcode() == RISCVISD::VMV_V_X_VL; 13068 } 13069 13070 /// Get or create a value that can feed \p Root with the given extension \p 13071 /// SExt. If \p SExt is std::nullopt, this returns the source of this operand. 13072 /// \see ::getSource(). 13073 SDValue getOrCreateExtendedOp(SDNode *Root, SelectionDAG &DAG, 13074 const RISCVSubtarget &Subtarget, 13075 std::optional<bool> SExt) const { 13076 if (!SExt.has_value()) 13077 return OrigOperand; 13078 13079 MVT NarrowVT = getNarrowType(Root); 13080 13081 SDValue Source = getSource(); 13082 if (Source.getValueType() == NarrowVT) 13083 return Source; 13084 13085 unsigned ExtOpc = *SExt ? RISCVISD::VSEXT_VL : RISCVISD::VZEXT_VL; 13086 13087 // If we need an extension, we should be changing the type. 13088 SDLoc DL(Root); 13089 auto [Mask, VL] = getMaskAndVL(Root, DAG, Subtarget); 13090 switch (OrigOperand.getOpcode()) { 13091 case ISD::ZERO_EXTEND: 13092 case ISD::SIGN_EXTEND: 13093 case RISCVISD::VSEXT_VL: 13094 case RISCVISD::VZEXT_VL: 13095 return DAG.getNode(ExtOpc, DL, NarrowVT, Source, Mask, VL); 13096 case RISCVISD::VMV_V_X_VL: 13097 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NarrowVT, 13098 DAG.getUNDEF(NarrowVT), Source.getOperand(1), VL); 13099 default: 13100 // Other opcodes can only come from the original LHS of VW(ADD|SUB)_W_VL 13101 // and that operand should already have the right NarrowVT so no 13102 // extension should be required at this point. 13103 llvm_unreachable("Unsupported opcode"); 13104 } 13105 } 13106 13107 /// Helper function to get the narrow type for \p Root. 13108 /// The narrow type is the type of \p Root where we divided the size of each 13109 /// element by 2. E.g., if Root's type <2xi16> -> narrow type <2xi8>. 13110 /// \pre The size of the type of the elements of Root must be a multiple of 2 13111 /// and be greater than 16. 13112 static MVT getNarrowType(const SDNode *Root) { 13113 MVT VT = Root->getSimpleValueType(0); 13114 13115 // Determine the narrow size. 13116 unsigned NarrowSize = VT.getScalarSizeInBits() / 2; 13117 assert(NarrowSize >= 8 && "Trying to extend something we can't represent"); 13118 MVT NarrowVT = MVT::getVectorVT(MVT::getIntegerVT(NarrowSize), 13119 VT.getVectorElementCount()); 13120 return NarrowVT; 13121 } 13122 13123 /// Return the opcode required to materialize the folding of the sign 13124 /// extensions (\p IsSExt == true) or zero extensions (IsSExt == false) for 13125 /// both operands for \p Opcode. 13126 /// Put differently, get the opcode to materialize: 13127 /// - ISExt == true: \p Opcode(sext(a), sext(b)) -> newOpcode(a, b) 13128 /// - ISExt == false: \p Opcode(zext(a), zext(b)) -> newOpcode(a, b) 13129 /// \pre \p Opcode represents a supported root (\see ::isSupportedRoot()). 13130 static unsigned getSameExtensionOpcode(unsigned Opcode, bool IsSExt) { 13131 switch (Opcode) { 13132 case ISD::ADD: 13133 case RISCVISD::ADD_VL: 13134 case RISCVISD::VWADD_W_VL: 13135 case RISCVISD::VWADDU_W_VL: 13136 return IsSExt ? RISCVISD::VWADD_VL : RISCVISD::VWADDU_VL; 13137 case ISD::MUL: 13138 case RISCVISD::MUL_VL: 13139 return IsSExt ? RISCVISD::VWMUL_VL : RISCVISD::VWMULU_VL; 13140 case ISD::SUB: 13141 case RISCVISD::SUB_VL: 13142 case RISCVISD::VWSUB_W_VL: 13143 case RISCVISD::VWSUBU_W_VL: 13144 return IsSExt ? RISCVISD::VWSUB_VL : RISCVISD::VWSUBU_VL; 13145 default: 13146 llvm_unreachable("Unexpected opcode"); 13147 } 13148 } 13149 13150 /// Get the opcode to materialize \p Opcode(sext(a), zext(b)) -> 13151 /// newOpcode(a, b). 13152 static unsigned getSUOpcode(unsigned Opcode) { 13153 assert((Opcode == RISCVISD::MUL_VL || Opcode == ISD::MUL) && 13154 "SU is only supported for MUL"); 13155 return RISCVISD::VWMULSU_VL; 13156 } 13157 13158 /// Get the opcode to materialize \p Opcode(a, s|zext(b)) -> 13159 /// newOpcode(a, b). 13160 static unsigned getWOpcode(unsigned Opcode, bool IsSExt) { 13161 switch (Opcode) { 13162 case ISD::ADD: 13163 case RISCVISD::ADD_VL: 13164 return IsSExt ? RISCVISD::VWADD_W_VL : RISCVISD::VWADDU_W_VL; 13165 case ISD::SUB: 13166 case RISCVISD::SUB_VL: 13167 return IsSExt ? RISCVISD::VWSUB_W_VL : RISCVISD::VWSUBU_W_VL; 13168 default: 13169 llvm_unreachable("Unexpected opcode"); 13170 } 13171 } 13172 13173 using CombineToTry = std::function<std::optional<CombineResult>( 13174 SDNode * /*Root*/, const NodeExtensionHelper & /*LHS*/, 13175 const NodeExtensionHelper & /*RHS*/, SelectionDAG &, 13176 const RISCVSubtarget &)>; 13177 13178 /// Check if this node needs to be fully folded or extended for all users. 13179 bool needToPromoteOtherUsers() const { return EnforceOneUse; } 13180 13181 /// Helper method to set the various fields of this struct based on the 13182 /// type of \p Root. 13183 void fillUpExtensionSupport(SDNode *Root, SelectionDAG &DAG, 13184 const RISCVSubtarget &Subtarget) { 13185 SupportsZExt = false; 13186 SupportsSExt = false; 13187 EnforceOneUse = true; 13188 CheckMask = true; 13189 unsigned Opc = OrigOperand.getOpcode(); 13190 switch (Opc) { 13191 case ISD::ZERO_EXTEND: 13192 case ISD::SIGN_EXTEND: { 13193 MVT VT = OrigOperand.getSimpleValueType(); 13194 if (!VT.isVector()) 13195 break; 13196 13197 SDValue NarrowElt = OrigOperand.getOperand(0); 13198 MVT NarrowVT = NarrowElt.getSimpleValueType(); 13199 13200 unsigned ScalarBits = VT.getScalarSizeInBits(); 13201 unsigned NarrowScalarBits = NarrowVT.getScalarSizeInBits(); 13202 13203 // Ensure the narrowing element type is legal 13204 if (!Subtarget.getTargetLowering()->isTypeLegal(NarrowElt.getValueType())) 13205 break; 13206 13207 // Ensure the extension's semantic is equivalent to rvv vzext or vsext. 13208 if (ScalarBits != NarrowScalarBits * 2) 13209 break; 13210 13211 SupportsZExt = Opc == ISD::ZERO_EXTEND; 13212 SupportsSExt = Opc == ISD::SIGN_EXTEND; 13213 13214 SDLoc DL(Root); 13215 std::tie(Mask, VL) = getDefaultScalableVLOps(VT, DL, DAG, Subtarget); 13216 break; 13217 } 13218 case RISCVISD::VZEXT_VL: 13219 SupportsZExt = true; 13220 Mask = OrigOperand.getOperand(1); 13221 VL = OrigOperand.getOperand(2); 13222 break; 13223 case RISCVISD::VSEXT_VL: 13224 SupportsSExt = true; 13225 Mask = OrigOperand.getOperand(1); 13226 VL = OrigOperand.getOperand(2); 13227 break; 13228 case RISCVISD::VMV_V_X_VL: { 13229 // Historically, we didn't care about splat values not disappearing during 13230 // combines. 13231 EnforceOneUse = false; 13232 CheckMask = false; 13233 VL = OrigOperand.getOperand(2); 13234 13235 // The operand is a splat of a scalar. 13236 13237 // The pasthru must be undef for tail agnostic. 13238 if (!OrigOperand.getOperand(0).isUndef()) 13239 break; 13240 13241 // Get the scalar value. 13242 SDValue Op = OrigOperand.getOperand(1); 13243 13244 // See if we have enough sign bits or zero bits in the scalar to use a 13245 // widening opcode by splatting to smaller element size. 13246 MVT VT = Root->getSimpleValueType(0); 13247 unsigned EltBits = VT.getScalarSizeInBits(); 13248 unsigned ScalarBits = Op.getValueSizeInBits(); 13249 // Make sure we're getting all element bits from the scalar register. 13250 // FIXME: Support implicit sign extension of vmv.v.x? 13251 if (ScalarBits < EltBits) 13252 break; 13253 13254 unsigned NarrowSize = VT.getScalarSizeInBits() / 2; 13255 // If the narrow type cannot be expressed with a legal VMV, 13256 // this is not a valid candidate. 13257 if (NarrowSize < 8) 13258 break; 13259 13260 if (DAG.ComputeMaxSignificantBits(Op) <= NarrowSize) 13261 SupportsSExt = true; 13262 if (DAG.MaskedValueIsZero(Op, 13263 APInt::getBitsSetFrom(ScalarBits, NarrowSize))) 13264 SupportsZExt = true; 13265 break; 13266 } 13267 default: 13268 break; 13269 } 13270 } 13271 13272 /// Check if \p Root supports any extension folding combines. 13273 static bool isSupportedRoot(const SDNode *Root, const SelectionDAG &DAG) { 13274 switch (Root->getOpcode()) { 13275 case ISD::ADD: 13276 case ISD::SUB: 13277 case ISD::MUL: { 13278 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 13279 if (!TLI.isTypeLegal(Root->getValueType(0))) 13280 return false; 13281 return Root->getValueType(0).isScalableVector(); 13282 } 13283 case RISCVISD::ADD_VL: 13284 case RISCVISD::MUL_VL: 13285 case RISCVISD::VWADD_W_VL: 13286 case RISCVISD::VWADDU_W_VL: 13287 case RISCVISD::SUB_VL: 13288 case RISCVISD::VWSUB_W_VL: 13289 case RISCVISD::VWSUBU_W_VL: 13290 return true; 13291 default: 13292 return false; 13293 } 13294 } 13295 13296 /// Build a NodeExtensionHelper for \p Root.getOperand(\p OperandIdx). 13297 NodeExtensionHelper(SDNode *Root, unsigned OperandIdx, SelectionDAG &DAG, 13298 const RISCVSubtarget &Subtarget) { 13299 assert(isSupportedRoot(Root, DAG) && "Trying to build an helper with an " 13300 "unsupported root"); 13301 assert(OperandIdx < 2 && "Requesting something else than LHS or RHS"); 13302 OrigOperand = Root->getOperand(OperandIdx); 13303 13304 unsigned Opc = Root->getOpcode(); 13305 switch (Opc) { 13306 // We consider VW<ADD|SUB>(U)_W(LHS, RHS) as if they were 13307 // <ADD|SUB>(LHS, S|ZEXT(RHS)) 13308 case RISCVISD::VWADD_W_VL: 13309 case RISCVISD::VWADDU_W_VL: 13310 case RISCVISD::VWSUB_W_VL: 13311 case RISCVISD::VWSUBU_W_VL: 13312 if (OperandIdx == 1) { 13313 SupportsZExt = 13314 Opc == RISCVISD::VWADDU_W_VL || Opc == RISCVISD::VWSUBU_W_VL; 13315 SupportsSExt = !SupportsZExt; 13316 std::tie(Mask, VL) = getMaskAndVL(Root, DAG, Subtarget); 13317 CheckMask = true; 13318 // There's no existing extension here, so we don't have to worry about 13319 // making sure it gets removed. 13320 EnforceOneUse = false; 13321 break; 13322 } 13323 [[fallthrough]]; 13324 default: 13325 fillUpExtensionSupport(Root, DAG, Subtarget); 13326 break; 13327 } 13328 } 13329 13330 /// Check if this operand is compatible with the given vector length \p VL. 13331 bool isVLCompatible(SDValue VL) const { 13332 return this->VL != SDValue() && this->VL == VL; 13333 } 13334 13335 /// Check if this operand is compatible with the given \p Mask. 13336 bool isMaskCompatible(SDValue Mask) const { 13337 return !CheckMask || (this->Mask != SDValue() && this->Mask == Mask); 13338 } 13339 13340 /// Helper function to get the Mask and VL from \p Root. 13341 static std::pair<SDValue, SDValue> 13342 getMaskAndVL(const SDNode *Root, SelectionDAG &DAG, 13343 const RISCVSubtarget &Subtarget) { 13344 assert(isSupportedRoot(Root, DAG) && "Unexpected root"); 13345 switch (Root->getOpcode()) { 13346 case ISD::ADD: 13347 case ISD::SUB: 13348 case ISD::MUL: { 13349 SDLoc DL(Root); 13350 MVT VT = Root->getSimpleValueType(0); 13351 return getDefaultScalableVLOps(VT, DL, DAG, Subtarget); 13352 } 13353 default: 13354 return std::make_pair(Root->getOperand(3), Root->getOperand(4)); 13355 } 13356 } 13357 13358 /// Check if the Mask and VL of this operand are compatible with \p Root. 13359 bool areVLAndMaskCompatible(SDNode *Root, SelectionDAG &DAG, 13360 const RISCVSubtarget &Subtarget) const { 13361 auto [Mask, VL] = getMaskAndVL(Root, DAG, Subtarget); 13362 return isMaskCompatible(Mask) && isVLCompatible(VL); 13363 } 13364 13365 /// Helper function to check if \p N is commutative with respect to the 13366 /// foldings that are supported by this class. 13367 static bool isCommutative(const SDNode *N) { 13368 switch (N->getOpcode()) { 13369 case ISD::ADD: 13370 case ISD::MUL: 13371 case RISCVISD::ADD_VL: 13372 case RISCVISD::MUL_VL: 13373 case RISCVISD::VWADD_W_VL: 13374 case RISCVISD::VWADDU_W_VL: 13375 return true; 13376 case ISD::SUB: 13377 case RISCVISD::SUB_VL: 13378 case RISCVISD::VWSUB_W_VL: 13379 case RISCVISD::VWSUBU_W_VL: 13380 return false; 13381 default: 13382 llvm_unreachable("Unexpected opcode"); 13383 } 13384 } 13385 13386 /// Get a list of combine to try for folding extensions in \p Root. 13387 /// Note that each returned CombineToTry function doesn't actually modify 13388 /// anything. Instead they produce an optional CombineResult that if not None, 13389 /// need to be materialized for the combine to be applied. 13390 /// \see CombineResult::materialize. 13391 /// If the related CombineToTry function returns std::nullopt, that means the 13392 /// combine didn't match. 13393 static SmallVector<CombineToTry> getSupportedFoldings(const SDNode *Root); 13394}; 13395 13396/// Helper structure that holds all the necessary information to materialize a 13397/// combine that does some extension folding. 13398struct CombineResult { 13399 /// Opcode to be generated when materializing the combine. 13400 unsigned TargetOpcode; 13401 // No value means no extension is needed. If extension is needed, the value 13402 // indicates if it needs to be sign extended. 13403 std::optional<bool> SExtLHS; 13404 std::optional<bool> SExtRHS; 13405 /// Root of the combine. 13406 SDNode *Root; 13407 /// LHS of the TargetOpcode. 13408 NodeExtensionHelper LHS; 13409 /// RHS of the TargetOpcode. 13410 NodeExtensionHelper RHS; 13411 13412 CombineResult(unsigned TargetOpcode, SDNode *Root, 13413 const NodeExtensionHelper &LHS, std::optional<bool> SExtLHS, 13414 const NodeExtensionHelper &RHS, std::optional<bool> SExtRHS) 13415 : TargetOpcode(TargetOpcode), SExtLHS(SExtLHS), SExtRHS(SExtRHS), 13416 Root(Root), LHS(LHS), RHS(RHS) {} 13417 13418 /// Return a value that uses TargetOpcode and that can be used to replace 13419 /// Root. 13420 /// The actual replacement is *not* done in that method. 13421 SDValue materialize(SelectionDAG &DAG, 13422 const RISCVSubtarget &Subtarget) const { 13423 SDValue Mask, VL, Merge; 13424 std::tie(Mask, VL) = 13425 NodeExtensionHelper::getMaskAndVL(Root, DAG, Subtarget); 13426 switch (Root->getOpcode()) { 13427 default: 13428 Merge = Root->getOperand(2); 13429 break; 13430 case ISD::ADD: 13431 case ISD::SUB: 13432 case ISD::MUL: 13433 Merge = DAG.getUNDEF(Root->getValueType(0)); 13434 break; 13435 } 13436 return DAG.getNode(TargetOpcode, SDLoc(Root), Root->getValueType(0), 13437 LHS.getOrCreateExtendedOp(Root, DAG, Subtarget, SExtLHS), 13438 RHS.getOrCreateExtendedOp(Root, DAG, Subtarget, SExtRHS), 13439 Merge, Mask, VL); 13440 } 13441}; 13442 13443/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS)) 13444/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both 13445/// are zext) and LHS and RHS can be folded into Root. 13446/// AllowSExt and AllozZExt define which form `ext` can take in this pattern. 13447/// 13448/// \note If the pattern can match with both zext and sext, the returned 13449/// CombineResult will feature the zext result. 13450/// 13451/// \returns std::nullopt if the pattern doesn't match or a CombineResult that 13452/// can be used to apply the pattern. 13453static std::optional<CombineResult> 13454canFoldToVWWithSameExtensionImpl(SDNode *Root, const NodeExtensionHelper &LHS, 13455 const NodeExtensionHelper &RHS, bool AllowSExt, 13456 bool AllowZExt, SelectionDAG &DAG, 13457 const RISCVSubtarget &Subtarget) { 13458 assert((AllowSExt || AllowZExt) && "Forgot to set what you want?"); 13459 if (!LHS.areVLAndMaskCompatible(Root, DAG, Subtarget) || 13460 !RHS.areVLAndMaskCompatible(Root, DAG, Subtarget)) 13461 return std::nullopt; 13462 if (AllowZExt && LHS.SupportsZExt && RHS.SupportsZExt) 13463 return CombineResult(NodeExtensionHelper::getSameExtensionOpcode( 13464 Root->getOpcode(), /*IsSExt=*/false), 13465 Root, LHS, /*SExtLHS=*/false, RHS, /*SExtRHS=*/false); 13466 if (AllowSExt && LHS.SupportsSExt && RHS.SupportsSExt) 13467 return CombineResult(NodeExtensionHelper::getSameExtensionOpcode( 13468 Root->getOpcode(), /*IsSExt=*/true), 13469 Root, LHS, /*SExtLHS=*/true, RHS, 13470 /*SExtRHS=*/true); 13471 return std::nullopt; 13472} 13473 13474/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS)) 13475/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both 13476/// are zext) and LHS and RHS can be folded into Root. 13477/// 13478/// \returns std::nullopt if the pattern doesn't match or a CombineResult that 13479/// can be used to apply the pattern. 13480static std::optional<CombineResult> 13481canFoldToVWWithSameExtension(SDNode *Root, const NodeExtensionHelper &LHS, 13482 const NodeExtensionHelper &RHS, SelectionDAG &DAG, 13483 const RISCVSubtarget &Subtarget) { 13484 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, /*AllowSExt=*/true, 13485 /*AllowZExt=*/true, DAG, Subtarget); 13486} 13487 13488/// Check if \p Root follows a pattern Root(LHS, ext(RHS)) 13489/// 13490/// \returns std::nullopt if the pattern doesn't match or a CombineResult that 13491/// can be used to apply the pattern. 13492static std::optional<CombineResult> 13493canFoldToVW_W(SDNode *Root, const NodeExtensionHelper &LHS, 13494 const NodeExtensionHelper &RHS, SelectionDAG &DAG, 13495 const RISCVSubtarget &Subtarget) { 13496 if (!RHS.areVLAndMaskCompatible(Root, DAG, Subtarget)) 13497 return std::nullopt; 13498 13499 // FIXME: Is it useful to form a vwadd.wx or vwsub.wx if it removes a scalar 13500 // sext/zext? 13501 // Control this behavior behind an option (AllowSplatInVW_W) for testing 13502 // purposes. 13503 if (RHS.SupportsZExt && (!RHS.isSplat() || AllowSplatInVW_W)) 13504 return CombineResult( 13505 NodeExtensionHelper::getWOpcode(Root->getOpcode(), /*IsSExt=*/false), 13506 Root, LHS, /*SExtLHS=*/std::nullopt, RHS, /*SExtRHS=*/false); 13507 if (RHS.SupportsSExt && (!RHS.isSplat() || AllowSplatInVW_W)) 13508 return CombineResult( 13509 NodeExtensionHelper::getWOpcode(Root->getOpcode(), /*IsSExt=*/true), 13510 Root, LHS, /*SExtLHS=*/std::nullopt, RHS, /*SExtRHS=*/true); 13511 return std::nullopt; 13512} 13513 13514/// Check if \p Root follows a pattern Root(sext(LHS), sext(RHS)) 13515/// 13516/// \returns std::nullopt if the pattern doesn't match or a CombineResult that 13517/// can be used to apply the pattern. 13518static std::optional<CombineResult> 13519canFoldToVWWithSEXT(SDNode *Root, const NodeExtensionHelper &LHS, 13520 const NodeExtensionHelper &RHS, SelectionDAG &DAG, 13521 const RISCVSubtarget &Subtarget) { 13522 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, /*AllowSExt=*/true, 13523 /*AllowZExt=*/false, DAG, Subtarget); 13524} 13525 13526/// Check if \p Root follows a pattern Root(zext(LHS), zext(RHS)) 13527/// 13528/// \returns std::nullopt if the pattern doesn't match or a CombineResult that 13529/// can be used to apply the pattern. 13530static std::optional<CombineResult> 13531canFoldToVWWithZEXT(SDNode *Root, const NodeExtensionHelper &LHS, 13532 const NodeExtensionHelper &RHS, SelectionDAG &DAG, 13533 const RISCVSubtarget &Subtarget) { 13534 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, /*AllowSExt=*/false, 13535 /*AllowZExt=*/true, DAG, Subtarget); 13536} 13537 13538/// Check if \p Root follows a pattern Root(sext(LHS), zext(RHS)) 13539/// 13540/// \returns std::nullopt if the pattern doesn't match or a CombineResult that 13541/// can be used to apply the pattern. 13542static std::optional<CombineResult> 13543canFoldToVW_SU(SDNode *Root, const NodeExtensionHelper &LHS, 13544 const NodeExtensionHelper &RHS, SelectionDAG &DAG, 13545 const RISCVSubtarget &Subtarget) { 13546 13547 if (!LHS.SupportsSExt || !RHS.SupportsZExt) 13548 return std::nullopt; 13549 if (!LHS.areVLAndMaskCompatible(Root, DAG, Subtarget) || 13550 !RHS.areVLAndMaskCompatible(Root, DAG, Subtarget)) 13551 return std::nullopt; 13552 return CombineResult(NodeExtensionHelper::getSUOpcode(Root->getOpcode()), 13553 Root, LHS, /*SExtLHS=*/true, RHS, /*SExtRHS=*/false); 13554} 13555 13556SmallVector<NodeExtensionHelper::CombineToTry> 13557NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) { 13558 SmallVector<CombineToTry> Strategies; 13559 switch (Root->getOpcode()) { 13560 case ISD::ADD: 13561 case ISD::SUB: 13562 case RISCVISD::ADD_VL: 13563 case RISCVISD::SUB_VL: 13564 // add|sub -> vwadd(u)|vwsub(u) 13565 Strategies.push_back(canFoldToVWWithSameExtension); 13566 // add|sub -> vwadd(u)_w|vwsub(u)_w 13567 Strategies.push_back(canFoldToVW_W); 13568 break; 13569 case ISD::MUL: 13570 case RISCVISD::MUL_VL: 13571 // mul -> vwmul(u) 13572 Strategies.push_back(canFoldToVWWithSameExtension); 13573 // mul -> vwmulsu 13574 Strategies.push_back(canFoldToVW_SU); 13575 break; 13576 case RISCVISD::VWADD_W_VL: 13577 case RISCVISD::VWSUB_W_VL: 13578 // vwadd_w|vwsub_w -> vwadd|vwsub 13579 Strategies.push_back(canFoldToVWWithSEXT); 13580 break; 13581 case RISCVISD::VWADDU_W_VL: 13582 case RISCVISD::VWSUBU_W_VL: 13583 // vwaddu_w|vwsubu_w -> vwaddu|vwsubu 13584 Strategies.push_back(canFoldToVWWithZEXT); 13585 break; 13586 default: 13587 llvm_unreachable("Unexpected opcode"); 13588 } 13589 return Strategies; 13590} 13591} // End anonymous namespace. 13592 13593/// Combine a binary operation to its equivalent VW or VW_W form. 13594/// The supported combines are: 13595/// add_vl -> vwadd(u) | vwadd(u)_w 13596/// sub_vl -> vwsub(u) | vwsub(u)_w 13597/// mul_vl -> vwmul(u) | vwmul_su 13598/// vwadd_w(u) -> vwadd(u) 13599/// vwub_w(u) -> vwadd(u) 13600static SDValue combineBinOp_VLToVWBinOp_VL(SDNode *N, 13601 TargetLowering::DAGCombinerInfo &DCI, 13602 const RISCVSubtarget &Subtarget) { 13603 SelectionDAG &DAG = DCI.DAG; 13604 13605 if (!NodeExtensionHelper::isSupportedRoot(N, DAG)) 13606 return SDValue(); 13607 13608 SmallVector<SDNode *> Worklist; 13609 SmallSet<SDNode *, 8> Inserted; 13610 Worklist.push_back(N); 13611 Inserted.insert(N); 13612 SmallVector<CombineResult> CombinesToApply; 13613 13614 while (!Worklist.empty()) { 13615 SDNode *Root = Worklist.pop_back_val(); 13616 if (!NodeExtensionHelper::isSupportedRoot(Root, DAG)) 13617 return SDValue(); 13618 13619 NodeExtensionHelper LHS(N, 0, DAG, Subtarget); 13620 NodeExtensionHelper RHS(N, 1, DAG, Subtarget); 13621 auto AppendUsersIfNeeded = [&Worklist, 13622 &Inserted](const NodeExtensionHelper &Op) { 13623 if (Op.needToPromoteOtherUsers()) { 13624 for (SDNode *TheUse : Op.OrigOperand->uses()) { 13625 if (Inserted.insert(TheUse).second) 13626 Worklist.push_back(TheUse); 13627 } 13628 } 13629 }; 13630 13631 // Control the compile time by limiting the number of node we look at in 13632 // total. 13633 if (Inserted.size() > ExtensionMaxWebSize) 13634 return SDValue(); 13635 13636 SmallVector<NodeExtensionHelper::CombineToTry> FoldingStrategies = 13637 NodeExtensionHelper::getSupportedFoldings(N); 13638 13639 assert(!FoldingStrategies.empty() && "Nothing to be folded"); 13640 bool Matched = false; 13641 for (int Attempt = 0; 13642 (Attempt != 1 + NodeExtensionHelper::isCommutative(N)) && !Matched; 13643 ++Attempt) { 13644 13645 for (NodeExtensionHelper::CombineToTry FoldingStrategy : 13646 FoldingStrategies) { 13647 std::optional<CombineResult> Res = 13648 FoldingStrategy(N, LHS, RHS, DAG, Subtarget); 13649 if (Res) { 13650 Matched = true; 13651 CombinesToApply.push_back(*Res); 13652 // All the inputs that are extended need to be folded, otherwise 13653 // we would be leaving the old input (since it is may still be used), 13654 // and the new one. 13655 if (Res->SExtLHS.has_value()) 13656 AppendUsersIfNeeded(LHS); 13657 if (Res->SExtRHS.has_value()) 13658 AppendUsersIfNeeded(RHS); 13659 break; 13660 } 13661 } 13662 std::swap(LHS, RHS); 13663 } 13664 // Right now we do an all or nothing approach. 13665 if (!Matched) 13666 return SDValue(); 13667 } 13668 // Store the value for the replacement of the input node separately. 13669 SDValue InputRootReplacement; 13670 // We do the RAUW after we materialize all the combines, because some replaced 13671 // nodes may be feeding some of the yet-to-be-replaced nodes. Put differently, 13672 // some of these nodes may appear in the NodeExtensionHelpers of some of the 13673 // yet-to-be-visited CombinesToApply roots. 13674 SmallVector<std::pair<SDValue, SDValue>> ValuesToReplace; 13675 ValuesToReplace.reserve(CombinesToApply.size()); 13676 for (CombineResult Res : CombinesToApply) { 13677 SDValue NewValue = Res.materialize(DAG, Subtarget); 13678 if (!InputRootReplacement) { 13679 assert(Res.Root == N && 13680 "First element is expected to be the current node"); 13681 InputRootReplacement = NewValue; 13682 } else { 13683 ValuesToReplace.emplace_back(SDValue(Res.Root, 0), NewValue); 13684 } 13685 } 13686 for (std::pair<SDValue, SDValue> OldNewValues : ValuesToReplace) { 13687 DAG.ReplaceAllUsesOfValueWith(OldNewValues.first, OldNewValues.second); 13688 DCI.AddToWorklist(OldNewValues.second.getNode()); 13689 } 13690 return InputRootReplacement; 13691} 13692 13693// Helper function for performMemPairCombine. 13694// Try to combine the memory loads/stores LSNode1 and LSNode2 13695// into a single memory pair operation. 13696static SDValue tryMemPairCombine(SelectionDAG &DAG, LSBaseSDNode *LSNode1, 13697 LSBaseSDNode *LSNode2, SDValue BasePtr, 13698 uint64_t Imm) { 13699 SmallPtrSet<const SDNode *, 32> Visited; 13700 SmallVector<const SDNode *, 8> Worklist = {LSNode1, LSNode2}; 13701 13702 if (SDNode::hasPredecessorHelper(LSNode1, Visited, Worklist) || 13703 SDNode::hasPredecessorHelper(LSNode2, Visited, Worklist)) 13704 return SDValue(); 13705 13706 MachineFunction &MF = DAG.getMachineFunction(); 13707 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>(); 13708 13709 // The new operation has twice the width. 13710 MVT XLenVT = Subtarget.getXLenVT(); 13711 EVT MemVT = LSNode1->getMemoryVT(); 13712 EVT NewMemVT = (MemVT == MVT::i32) ? MVT::i64 : MVT::i128; 13713 MachineMemOperand *MMO = LSNode1->getMemOperand(); 13714 MachineMemOperand *NewMMO = MF.getMachineMemOperand( 13715 MMO, MMO->getPointerInfo(), MemVT == MVT::i32 ? 8 : 16); 13716 13717 if (LSNode1->getOpcode() == ISD::LOAD) { 13718 auto Ext = cast<LoadSDNode>(LSNode1)->getExtensionType(); 13719 unsigned Opcode; 13720 if (MemVT == MVT::i32) 13721 Opcode = (Ext == ISD::ZEXTLOAD) ? RISCVISD::TH_LWUD : RISCVISD::TH_LWD; 13722 else 13723 Opcode = RISCVISD::TH_LDD; 13724 13725 SDValue Res = DAG.getMemIntrinsicNode( 13726 Opcode, SDLoc(LSNode1), DAG.getVTList({XLenVT, XLenVT, MVT::Other}), 13727 {LSNode1->getChain(), BasePtr, 13728 DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)}, 13729 NewMemVT, NewMMO); 13730 13731 SDValue Node1 = 13732 DAG.getMergeValues({Res.getValue(0), Res.getValue(2)}, SDLoc(LSNode1)); 13733 SDValue Node2 = 13734 DAG.getMergeValues({Res.getValue(1), Res.getValue(2)}, SDLoc(LSNode2)); 13735 13736 DAG.ReplaceAllUsesWith(LSNode2, Node2.getNode()); 13737 return Node1; 13738 } else { 13739 unsigned Opcode = (MemVT == MVT::i32) ? RISCVISD::TH_SWD : RISCVISD::TH_SDD; 13740 13741 SDValue Res = DAG.getMemIntrinsicNode( 13742 Opcode, SDLoc(LSNode1), DAG.getVTList(MVT::Other), 13743 {LSNode1->getChain(), LSNode1->getOperand(1), LSNode2->getOperand(1), 13744 BasePtr, DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)}, 13745 NewMemVT, NewMMO); 13746 13747 DAG.ReplaceAllUsesWith(LSNode2, Res.getNode()); 13748 return Res; 13749 } 13750} 13751 13752// Try to combine two adjacent loads/stores to a single pair instruction from 13753// the XTHeadMemPair vendor extension. 13754static SDValue performMemPairCombine(SDNode *N, 13755 TargetLowering::DAGCombinerInfo &DCI) { 13756 SelectionDAG &DAG = DCI.DAG; 13757 MachineFunction &MF = DAG.getMachineFunction(); 13758 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>(); 13759 13760 // Target does not support load/store pair. 13761 if (!Subtarget.hasVendorXTHeadMemPair()) 13762 return SDValue(); 13763 13764 LSBaseSDNode *LSNode1 = cast<LSBaseSDNode>(N); 13765 EVT MemVT = LSNode1->getMemoryVT(); 13766 unsigned OpNum = LSNode1->getOpcode() == ISD::LOAD ? 1 : 2; 13767 13768 // No volatile, indexed or atomic loads/stores. 13769 if (!LSNode1->isSimple() || LSNode1->isIndexed()) 13770 return SDValue(); 13771 13772 // Function to get a base + constant representation from a memory value. 13773 auto ExtractBaseAndOffset = [](SDValue Ptr) -> std::pair<SDValue, uint64_t> { 13774 if (Ptr->getOpcode() == ISD::ADD) 13775 if (auto *C1 = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) 13776 return {Ptr->getOperand(0), C1->getZExtValue()}; 13777 return {Ptr, 0}; 13778 }; 13779 13780 auto [Base1, Offset1] = ExtractBaseAndOffset(LSNode1->getOperand(OpNum)); 13781 13782 SDValue Chain = N->getOperand(0); 13783 for (SDNode::use_iterator UI = Chain->use_begin(), UE = Chain->use_end(); 13784 UI != UE; ++UI) { 13785 SDUse &Use = UI.getUse(); 13786 if (Use.getUser() != N && Use.getResNo() == 0 && 13787 Use.getUser()->getOpcode() == N->getOpcode()) { 13788 LSBaseSDNode *LSNode2 = cast<LSBaseSDNode>(Use.getUser()); 13789 13790 // No volatile, indexed or atomic loads/stores. 13791 if (!LSNode2->isSimple() || LSNode2->isIndexed()) 13792 continue; 13793 13794 // Check if LSNode1 and LSNode2 have the same type and extension. 13795 if (LSNode1->getOpcode() == ISD::LOAD) 13796 if (cast<LoadSDNode>(LSNode2)->getExtensionType() != 13797 cast<LoadSDNode>(LSNode1)->getExtensionType()) 13798 continue; 13799 13800 if (LSNode1->getMemoryVT() != LSNode2->getMemoryVT()) 13801 continue; 13802 13803 auto [Base2, Offset2] = ExtractBaseAndOffset(LSNode2->getOperand(OpNum)); 13804 13805 // Check if the base pointer is the same for both instruction. 13806 if (Base1 != Base2) 13807 continue; 13808 13809 // Check if the offsets match the XTHeadMemPair encoding contraints. 13810 bool Valid = false; 13811 if (MemVT == MVT::i32) { 13812 // Check for adjacent i32 values and a 2-bit index. 13813 if ((Offset1 + 4 == Offset2) && isShiftedUInt<2, 3>(Offset1)) 13814 Valid = true; 13815 } else if (MemVT == MVT::i64) { 13816 // Check for adjacent i64 values and a 2-bit index. 13817 if ((Offset1 + 8 == Offset2) && isShiftedUInt<2, 4>(Offset1)) 13818 Valid = true; 13819 } 13820 13821 if (!Valid) 13822 continue; 13823 13824 // Try to combine. 13825 if (SDValue Res = 13826 tryMemPairCombine(DAG, LSNode1, LSNode2, Base1, Offset1)) 13827 return Res; 13828 } 13829 } 13830 13831 return SDValue(); 13832} 13833 13834// Fold 13835// (fp_to_int (froundeven X)) -> fcvt X, rne 13836// (fp_to_int (ftrunc X)) -> fcvt X, rtz 13837// (fp_to_int (ffloor X)) -> fcvt X, rdn 13838// (fp_to_int (fceil X)) -> fcvt X, rup 13839// (fp_to_int (fround X)) -> fcvt X, rmm 13840// (fp_to_int (frint X)) -> fcvt X 13841static SDValue performFP_TO_INTCombine(SDNode *N, 13842 TargetLowering::DAGCombinerInfo &DCI, 13843 const RISCVSubtarget &Subtarget) { 13844 SelectionDAG &DAG = DCI.DAG; 13845 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 13846 MVT XLenVT = Subtarget.getXLenVT(); 13847 13848 SDValue Src = N->getOperand(0); 13849 13850 // Don't do this for strict-fp Src. 13851 if (Src->isStrictFPOpcode() || Src->isTargetStrictFPOpcode()) 13852 return SDValue(); 13853 13854 // Ensure the FP type is legal. 13855 if (!TLI.isTypeLegal(Src.getValueType())) 13856 return SDValue(); 13857 13858 // Don't do this for f16 with Zfhmin and not Zfh. 13859 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh()) 13860 return SDValue(); 13861 13862 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode()); 13863 // If the result is invalid, we didn't find a foldable instruction. 13864 if (FRM == RISCVFPRndMode::Invalid) 13865 return SDValue(); 13866 13867 SDLoc DL(N); 13868 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT; 13869 EVT VT = N->getValueType(0); 13870 13871 if (VT.isVector() && TLI.isTypeLegal(VT)) { 13872 MVT SrcVT = Src.getSimpleValueType(); 13873 MVT SrcContainerVT = SrcVT; 13874 MVT ContainerVT = VT.getSimpleVT(); 13875 SDValue XVal = Src.getOperand(0); 13876 13877 // For widening and narrowing conversions we just combine it into a 13878 // VFCVT_..._VL node, as there are no specific VFWCVT/VFNCVT VL nodes. They 13879 // end up getting lowered to their appropriate pseudo instructions based on 13880 // their operand types 13881 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits() * 2 || 13882 VT.getScalarSizeInBits() * 2 < SrcVT.getScalarSizeInBits()) 13883 return SDValue(); 13884 13885 // Make fixed-length vectors scalable first 13886 if (SrcVT.isFixedLengthVector()) { 13887 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget); 13888 XVal = convertToScalableVector(SrcContainerVT, XVal, DAG, Subtarget); 13889 ContainerVT = 13890 getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget); 13891 } 13892 13893 auto [Mask, VL] = 13894 getDefaultVLOps(SrcVT, SrcContainerVT, DL, DAG, Subtarget); 13895 13896 SDValue FpToInt; 13897 if (FRM == RISCVFPRndMode::RTZ) { 13898 // Use the dedicated trunc static rounding mode if we're truncating so we 13899 // don't need to generate calls to fsrmi/fsrm 13900 unsigned Opc = 13901 IsSigned ? RISCVISD::VFCVT_RTZ_X_F_VL : RISCVISD::VFCVT_RTZ_XU_F_VL; 13902 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, VL); 13903 } else if (FRM == RISCVFPRndMode::DYN) { 13904 unsigned Opc = 13905 IsSigned ? RISCVISD::VFCVT_X_F_VL : RISCVISD::VFCVT_XU_F_VL; 13906 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, VL); 13907 } else { 13908 unsigned Opc = 13909 IsSigned ? RISCVISD::VFCVT_RM_X_F_VL : RISCVISD::VFCVT_RM_XU_F_VL; 13910 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, 13911 DAG.getTargetConstant(FRM, DL, XLenVT), VL); 13912 } 13913 13914 // If converted from fixed-length to scalable, convert back 13915 if (VT.isFixedLengthVector()) 13916 FpToInt = convertFromScalableVector(VT, FpToInt, DAG, Subtarget); 13917 13918 return FpToInt; 13919 } 13920 13921 // Only handle XLen or i32 types. Other types narrower than XLen will 13922 // eventually be legalized to XLenVT. 13923 if (VT != MVT::i32 && VT != XLenVT) 13924 return SDValue(); 13925 13926 unsigned Opc; 13927 if (VT == XLenVT) 13928 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU; 13929 else 13930 Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64; 13931 13932 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src.getOperand(0), 13933 DAG.getTargetConstant(FRM, DL, XLenVT)); 13934 return DAG.getNode(ISD::TRUNCATE, DL, VT, FpToInt); 13935} 13936 13937// Fold 13938// (fp_to_int_sat (froundeven X)) -> (select X == nan, 0, (fcvt X, rne)) 13939// (fp_to_int_sat (ftrunc X)) -> (select X == nan, 0, (fcvt X, rtz)) 13940// (fp_to_int_sat (ffloor X)) -> (select X == nan, 0, (fcvt X, rdn)) 13941// (fp_to_int_sat (fceil X)) -> (select X == nan, 0, (fcvt X, rup)) 13942// (fp_to_int_sat (fround X)) -> (select X == nan, 0, (fcvt X, rmm)) 13943// (fp_to_int_sat (frint X)) -> (select X == nan, 0, (fcvt X, dyn)) 13944static SDValue performFP_TO_INT_SATCombine(SDNode *N, 13945 TargetLowering::DAGCombinerInfo &DCI, 13946 const RISCVSubtarget &Subtarget) { 13947 SelectionDAG &DAG = DCI.DAG; 13948 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 13949 MVT XLenVT = Subtarget.getXLenVT(); 13950 13951 // Only handle XLen types. Other types narrower than XLen will eventually be 13952 // legalized to XLenVT. 13953 EVT DstVT = N->getValueType(0); 13954 if (DstVT != XLenVT) 13955 return SDValue(); 13956 13957 SDValue Src = N->getOperand(0); 13958 13959 // Don't do this for strict-fp Src. 13960 if (Src->isStrictFPOpcode() || Src->isTargetStrictFPOpcode()) 13961 return SDValue(); 13962 13963 // Ensure the FP type is also legal. 13964 if (!TLI.isTypeLegal(Src.getValueType())) 13965 return SDValue(); 13966 13967 // Don't do this for f16 with Zfhmin and not Zfh. 13968 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh()) 13969 return SDValue(); 13970 13971 EVT SatVT = cast<VTSDNode>(N->getOperand(1))->getVT(); 13972 13973 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode()); 13974 if (FRM == RISCVFPRndMode::Invalid) 13975 return SDValue(); 13976 13977 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT_SAT; 13978 13979 unsigned Opc; 13980 if (SatVT == DstVT) 13981 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU; 13982 else if (DstVT == MVT::i64 && SatVT == MVT::i32) 13983 Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64; 13984 else 13985 return SDValue(); 13986 // FIXME: Support other SatVTs by clamping before or after the conversion. 13987 13988 Src = Src.getOperand(0); 13989 13990 SDLoc DL(N); 13991 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src, 13992 DAG.getTargetConstant(FRM, DL, XLenVT)); 13993 13994 // fcvt.wu.* sign extends bit 31 on RV64. FP_TO_UINT_SAT expects to zero 13995 // extend. 13996 if (Opc == RISCVISD::FCVT_WU_RV64) 13997 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32); 13998 13999 // RISC-V FP-to-int conversions saturate to the destination register size, but 14000 // don't produce 0 for nan. 14001 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT); 14002 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO); 14003} 14004 14005// Combine (bitreverse (bswap X)) to the BREV8 GREVI encoding if the type is 14006// smaller than XLenVT. 14007static SDValue performBITREVERSECombine(SDNode *N, SelectionDAG &DAG, 14008 const RISCVSubtarget &Subtarget) { 14009 assert(Subtarget.hasStdExtZbkb() && "Unexpected extension"); 14010 14011 SDValue Src = N->getOperand(0); 14012 if (Src.getOpcode() != ISD::BSWAP) 14013 return SDValue(); 14014 14015 EVT VT = N->getValueType(0); 14016 if (!VT.isScalarInteger() || VT.getSizeInBits() >= Subtarget.getXLen() || 14017 !llvm::has_single_bit<uint32_t>(VT.getSizeInBits())) 14018 return SDValue(); 14019 14020 SDLoc DL(N); 14021 return DAG.getNode(RISCVISD::BREV8, DL, VT, Src.getOperand(0)); 14022} 14023 14024// Convert from one FMA opcode to another based on whether we are negating the 14025// multiply result and/or the accumulator. 14026// NOTE: Only supports RVV operations with VL. 14027static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc) { 14028 // Negating the multiply result changes ADD<->SUB and toggles 'N'. 14029 if (NegMul) { 14030 // clang-format off 14031 switch (Opcode) { 14032 default: llvm_unreachable("Unexpected opcode"); 14033 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break; 14034 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break; 14035 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break; 14036 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break; 14037 case RISCVISD::STRICT_VFMADD_VL: Opcode = RISCVISD::STRICT_VFNMSUB_VL; break; 14038 case RISCVISD::STRICT_VFNMSUB_VL: Opcode = RISCVISD::STRICT_VFMADD_VL; break; 14039 case RISCVISD::STRICT_VFNMADD_VL: Opcode = RISCVISD::STRICT_VFMSUB_VL; break; 14040 case RISCVISD::STRICT_VFMSUB_VL: Opcode = RISCVISD::STRICT_VFNMADD_VL; break; 14041 } 14042 // clang-format on 14043 } 14044 14045 // Negating the accumulator changes ADD<->SUB. 14046 if (NegAcc) { 14047 // clang-format off 14048 switch (Opcode) { 14049 default: llvm_unreachable("Unexpected opcode"); 14050 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break; 14051 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break; 14052 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break; 14053 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break; 14054 case RISCVISD::STRICT_VFMADD_VL: Opcode = RISCVISD::STRICT_VFMSUB_VL; break; 14055 case RISCVISD::STRICT_VFMSUB_VL: Opcode = RISCVISD::STRICT_VFMADD_VL; break; 14056 case RISCVISD::STRICT_VFNMADD_VL: Opcode = RISCVISD::STRICT_VFNMSUB_VL; break; 14057 case RISCVISD::STRICT_VFNMSUB_VL: Opcode = RISCVISD::STRICT_VFNMADD_VL; break; 14058 } 14059 // clang-format on 14060 } 14061 14062 return Opcode; 14063} 14064 14065static SDValue combineVFMADD_VLWithVFNEG_VL(SDNode *N, SelectionDAG &DAG) { 14066 // Fold FNEG_VL into FMA opcodes. 14067 // The first operand of strict-fp is chain. 14068 unsigned Offset = N->isTargetStrictFPOpcode(); 14069 SDValue A = N->getOperand(0 + Offset); 14070 SDValue B = N->getOperand(1 + Offset); 14071 SDValue C = N->getOperand(2 + Offset); 14072 SDValue Mask = N->getOperand(3 + Offset); 14073 SDValue VL = N->getOperand(4 + Offset); 14074 14075 auto invertIfNegative = [&Mask, &VL](SDValue &V) { 14076 if (V.getOpcode() == RISCVISD::FNEG_VL && V.getOperand(1) == Mask && 14077 V.getOperand(2) == VL) { 14078 // Return the negated input. 14079 V = V.getOperand(0); 14080 return true; 14081 } 14082 14083 return false; 14084 }; 14085 14086 bool NegA = invertIfNegative(A); 14087 bool NegB = invertIfNegative(B); 14088 bool NegC = invertIfNegative(C); 14089 14090 // If no operands are negated, we're done. 14091 if (!NegA && !NegB && !NegC) 14092 return SDValue(); 14093 14094 unsigned NewOpcode = negateFMAOpcode(N->getOpcode(), NegA != NegB, NegC); 14095 if (N->isTargetStrictFPOpcode()) 14096 return DAG.getNode(NewOpcode, SDLoc(N), N->getVTList(), 14097 {N->getOperand(0), A, B, C, Mask, VL}); 14098 return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), A, B, C, Mask, 14099 VL); 14100} 14101 14102static SDValue performVFMADD_VLCombine(SDNode *N, SelectionDAG &DAG, 14103 const RISCVSubtarget &Subtarget) { 14104 if (SDValue V = combineVFMADD_VLWithVFNEG_VL(N, DAG)) 14105 return V; 14106 14107 if (N->getValueType(0).isScalableVector() && 14108 N->getValueType(0).getVectorElementType() == MVT::f32 && 14109 (Subtarget.hasVInstructionsF16Minimal() && 14110 !Subtarget.hasVInstructionsF16())) { 14111 return SDValue(); 14112 } 14113 14114 // FIXME: Ignore strict opcodes for now. 14115 if (N->isTargetStrictFPOpcode()) 14116 return SDValue(); 14117 14118 // Try to form widening FMA. 14119 SDValue Op0 = N->getOperand(0); 14120 SDValue Op1 = N->getOperand(1); 14121 SDValue Mask = N->getOperand(3); 14122 SDValue VL = N->getOperand(4); 14123 14124 if (Op0.getOpcode() != RISCVISD::FP_EXTEND_VL || 14125 Op1.getOpcode() != RISCVISD::FP_EXTEND_VL) 14126 return SDValue(); 14127 14128 // TODO: Refactor to handle more complex cases similar to 14129 // combineBinOp_VLToVWBinOp_VL. 14130 if ((!Op0.hasOneUse() || !Op1.hasOneUse()) && 14131 (Op0 != Op1 || !Op0->hasNUsesOfValue(2, 0))) 14132 return SDValue(); 14133 14134 // Check the mask and VL are the same. 14135 if (Op0.getOperand(1) != Mask || Op0.getOperand(2) != VL || 14136 Op1.getOperand(1) != Mask || Op1.getOperand(2) != VL) 14137 return SDValue(); 14138 14139 unsigned NewOpc; 14140 switch (N->getOpcode()) { 14141 default: 14142 llvm_unreachable("Unexpected opcode"); 14143 case RISCVISD::VFMADD_VL: 14144 NewOpc = RISCVISD::VFWMADD_VL; 14145 break; 14146 case RISCVISD::VFNMSUB_VL: 14147 NewOpc = RISCVISD::VFWNMSUB_VL; 14148 break; 14149 case RISCVISD::VFNMADD_VL: 14150 NewOpc = RISCVISD::VFWNMADD_VL; 14151 break; 14152 case RISCVISD::VFMSUB_VL: 14153 NewOpc = RISCVISD::VFWMSUB_VL; 14154 break; 14155 } 14156 14157 Op0 = Op0.getOperand(0); 14158 Op1 = Op1.getOperand(0); 14159 14160 return DAG.getNode(NewOpc, SDLoc(N), N->getValueType(0), Op0, Op1, 14161 N->getOperand(2), Mask, VL); 14162} 14163 14164static SDValue performVFMUL_VLCombine(SDNode *N, SelectionDAG &DAG, 14165 const RISCVSubtarget &Subtarget) { 14166 if (N->getValueType(0).isScalableVector() && 14167 N->getValueType(0).getVectorElementType() == MVT::f32 && 14168 (Subtarget.hasVInstructionsF16Minimal() && 14169 !Subtarget.hasVInstructionsF16())) { 14170 return SDValue(); 14171 } 14172 14173 // FIXME: Ignore strict opcodes for now. 14174 assert(!N->isTargetStrictFPOpcode() && "Unexpected opcode"); 14175 14176 // Try to form widening multiply. 14177 SDValue Op0 = N->getOperand(0); 14178 SDValue Op1 = N->getOperand(1); 14179 SDValue Merge = N->getOperand(2); 14180 SDValue Mask = N->getOperand(3); 14181 SDValue VL = N->getOperand(4); 14182 14183 if (Op0.getOpcode() != RISCVISD::FP_EXTEND_VL || 14184 Op1.getOpcode() != RISCVISD::FP_EXTEND_VL) 14185 return SDValue(); 14186 14187 // TODO: Refactor to handle more complex cases similar to 14188 // combineBinOp_VLToVWBinOp_VL. 14189 if ((!Op0.hasOneUse() || !Op1.hasOneUse()) && 14190 (Op0 != Op1 || !Op0->hasNUsesOfValue(2, 0))) 14191 return SDValue(); 14192 14193 // Check the mask and VL are the same. 14194 if (Op0.getOperand(1) != Mask || Op0.getOperand(2) != VL || 14195 Op1.getOperand(1) != Mask || Op1.getOperand(2) != VL) 14196 return SDValue(); 14197 14198 Op0 = Op0.getOperand(0); 14199 Op1 = Op1.getOperand(0); 14200 14201 return DAG.getNode(RISCVISD::VFWMUL_VL, SDLoc(N), N->getValueType(0), Op0, 14202 Op1, Merge, Mask, VL); 14203} 14204 14205static SDValue performFADDSUB_VLCombine(SDNode *N, SelectionDAG &DAG, 14206 const RISCVSubtarget &Subtarget) { 14207 if (N->getValueType(0).isScalableVector() && 14208 N->getValueType(0).getVectorElementType() == MVT::f32 && 14209 (Subtarget.hasVInstructionsF16Minimal() && 14210 !Subtarget.hasVInstructionsF16())) { 14211 return SDValue(); 14212 } 14213 14214 SDValue Op0 = N->getOperand(0); 14215 SDValue Op1 = N->getOperand(1); 14216 SDValue Merge = N->getOperand(2); 14217 SDValue Mask = N->getOperand(3); 14218 SDValue VL = N->getOperand(4); 14219 14220 bool IsAdd = N->getOpcode() == RISCVISD::FADD_VL; 14221 14222 // Look for foldable FP_EXTENDS. 14223 bool Op0IsExtend = 14224 Op0.getOpcode() == RISCVISD::FP_EXTEND_VL && 14225 (Op0.hasOneUse() || (Op0 == Op1 && Op0->hasNUsesOfValue(2, 0))); 14226 bool Op1IsExtend = 14227 (Op0 == Op1 && Op0IsExtend) || 14228 (Op1.getOpcode() == RISCVISD::FP_EXTEND_VL && Op1.hasOneUse()); 14229 14230 // Check the mask and VL. 14231 if (Op0IsExtend && (Op0.getOperand(1) != Mask || Op0.getOperand(2) != VL)) 14232 Op0IsExtend = false; 14233 if (Op1IsExtend && (Op1.getOperand(1) != Mask || Op1.getOperand(2) != VL)) 14234 Op1IsExtend = false; 14235 14236 // Canonicalize. 14237 if (!Op1IsExtend) { 14238 // Sub requires at least operand 1 to be an extend. 14239 if (!IsAdd) 14240 return SDValue(); 14241 14242 // Add is commutable, if the other operand is foldable, swap them. 14243 if (!Op0IsExtend) 14244 return SDValue(); 14245 14246 std::swap(Op0, Op1); 14247 std::swap(Op0IsExtend, Op1IsExtend); 14248 } 14249 14250 // Op1 is a foldable extend. Op0 might be foldable. 14251 Op1 = Op1.getOperand(0); 14252 if (Op0IsExtend) 14253 Op0 = Op0.getOperand(0); 14254 14255 unsigned Opc; 14256 if (IsAdd) 14257 Opc = Op0IsExtend ? RISCVISD::VFWADD_VL : RISCVISD::VFWADD_W_VL; 14258 else 14259 Opc = Op0IsExtend ? RISCVISD::VFWSUB_VL : RISCVISD::VFWSUB_W_VL; 14260 14261 return DAG.getNode(Opc, SDLoc(N), N->getValueType(0), Op0, Op1, Merge, Mask, 14262 VL); 14263} 14264 14265static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, 14266 const RISCVSubtarget &Subtarget) { 14267 assert(N->getOpcode() == ISD::SRA && "Unexpected opcode"); 14268 14269 if (N->getValueType(0) != MVT::i64 || !Subtarget.is64Bit()) 14270 return SDValue(); 14271 14272 if (!isa<ConstantSDNode>(N->getOperand(1))) 14273 return SDValue(); 14274 uint64_t ShAmt = N->getConstantOperandVal(1); 14275 if (ShAmt > 32) 14276 return SDValue(); 14277 14278 SDValue N0 = N->getOperand(0); 14279 14280 // Combine (sra (sext_inreg (shl X, C1), i32), C2) -> 14281 // (sra (shl X, C1+32), C2+32) so it gets selected as SLLI+SRAI instead of 14282 // SLLIW+SRAIW. SLLI+SRAI have compressed forms. 14283 if (ShAmt < 32 && 14284 N0.getOpcode() == ISD::SIGN_EXTEND_INREG && N0.hasOneUse() && 14285 cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i32 && 14286 N0.getOperand(0).getOpcode() == ISD::SHL && N0.getOperand(0).hasOneUse() && 14287 isa<ConstantSDNode>(N0.getOperand(0).getOperand(1))) { 14288 uint64_t LShAmt = N0.getOperand(0).getConstantOperandVal(1); 14289 if (LShAmt < 32) { 14290 SDLoc ShlDL(N0.getOperand(0)); 14291 SDValue Shl = DAG.getNode(ISD::SHL, ShlDL, MVT::i64, 14292 N0.getOperand(0).getOperand(0), 14293 DAG.getConstant(LShAmt + 32, ShlDL, MVT::i64)); 14294 SDLoc DL(N); 14295 return DAG.getNode(ISD::SRA, DL, MVT::i64, Shl, 14296 DAG.getConstant(ShAmt + 32, DL, MVT::i64)); 14297 } 14298 } 14299 14300 // Combine (sra (shl X, 32), 32 - C) -> (shl (sext_inreg X, i32), C) 14301 // FIXME: Should this be a generic combine? There's a similar combine on X86. 14302 // 14303 // Also try these folds where an add or sub is in the middle. 14304 // (sra (add (shl X, 32), C1), 32 - C) -> (shl (sext_inreg (add X, C1), C) 14305 // (sra (sub C1, (shl X, 32)), 32 - C) -> (shl (sext_inreg (sub C1, X), C) 14306 SDValue Shl; 14307 ConstantSDNode *AddC = nullptr; 14308 14309 // We might have an ADD or SUB between the SRA and SHL. 14310 bool IsAdd = N0.getOpcode() == ISD::ADD; 14311 if ((IsAdd || N0.getOpcode() == ISD::SUB)) { 14312 // Other operand needs to be a constant we can modify. 14313 AddC = dyn_cast<ConstantSDNode>(N0.getOperand(IsAdd ? 1 : 0)); 14314 if (!AddC) 14315 return SDValue(); 14316 14317 // AddC needs to have at least 32 trailing zeros. 14318 if (AddC->getAPIntValue().countr_zero() < 32) 14319 return SDValue(); 14320 14321 // All users should be a shift by constant less than or equal to 32. This 14322 // ensures we'll do this optimization for each of them to produce an 14323 // add/sub+sext_inreg they can all share. 14324 for (SDNode *U : N0->uses()) { 14325 if (U->getOpcode() != ISD::SRA || 14326 !isa<ConstantSDNode>(U->getOperand(1)) || 14327 U->getConstantOperandVal(1) > 32) 14328 return SDValue(); 14329 } 14330 14331 Shl = N0.getOperand(IsAdd ? 0 : 1); 14332 } else { 14333 // Not an ADD or SUB. 14334 Shl = N0; 14335 } 14336 14337 // Look for a shift left by 32. 14338 if (Shl.getOpcode() != ISD::SHL || !isa<ConstantSDNode>(Shl.getOperand(1)) || 14339 Shl.getConstantOperandVal(1) != 32) 14340 return SDValue(); 14341 14342 // We if we didn't look through an add/sub, then the shl should have one use. 14343 // If we did look through an add/sub, the sext_inreg we create is free so 14344 // we're only creating 2 new instructions. It's enough to only remove the 14345 // original sra+add/sub. 14346 if (!AddC && !Shl.hasOneUse()) 14347 return SDValue(); 14348 14349 SDLoc DL(N); 14350 SDValue In = Shl.getOperand(0); 14351 14352 // If we looked through an ADD or SUB, we need to rebuild it with the shifted 14353 // constant. 14354 if (AddC) { 14355 SDValue ShiftedAddC = 14356 DAG.getConstant(AddC->getAPIntValue().lshr(32), DL, MVT::i64); 14357 if (IsAdd) 14358 In = DAG.getNode(ISD::ADD, DL, MVT::i64, In, ShiftedAddC); 14359 else 14360 In = DAG.getNode(ISD::SUB, DL, MVT::i64, ShiftedAddC, In); 14361 } 14362 14363 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, In, 14364 DAG.getValueType(MVT::i32)); 14365 if (ShAmt == 32) 14366 return SExt; 14367 14368 return DAG.getNode( 14369 ISD::SHL, DL, MVT::i64, SExt, 14370 DAG.getConstant(32 - ShAmt, DL, MVT::i64)); 14371} 14372 14373// Invert (and/or (set cc X, Y), (xor Z, 1)) to (or/and (set !cc X, Y)), Z) if 14374// the result is used as the conditon of a br_cc or select_cc we can invert, 14375// inverting the setcc is free, and Z is 0/1. Caller will invert the 14376// br_cc/select_cc. 14377static SDValue tryDemorganOfBooleanCondition(SDValue Cond, SelectionDAG &DAG) { 14378 bool IsAnd = Cond.getOpcode() == ISD::AND; 14379 if (!IsAnd && Cond.getOpcode() != ISD::OR) 14380 return SDValue(); 14381 14382 if (!Cond.hasOneUse()) 14383 return SDValue(); 14384 14385 SDValue Setcc = Cond.getOperand(0); 14386 SDValue Xor = Cond.getOperand(1); 14387 // Canonicalize setcc to LHS. 14388 if (Setcc.getOpcode() != ISD::SETCC) 14389 std::swap(Setcc, Xor); 14390 // LHS should be a setcc and RHS should be an xor. 14391 if (Setcc.getOpcode() != ISD::SETCC || !Setcc.hasOneUse() || 14392 Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse()) 14393 return SDValue(); 14394 14395 // If the condition is an And, SimplifyDemandedBits may have changed 14396 // (xor Z, 1) to (not Z). 14397 SDValue Xor1 = Xor.getOperand(1); 14398 if (!isOneConstant(Xor1) && !(IsAnd && isAllOnesConstant(Xor1))) 14399 return SDValue(); 14400 14401 EVT VT = Cond.getValueType(); 14402 SDValue Xor0 = Xor.getOperand(0); 14403 14404 // The LHS of the xor needs to be 0/1. 14405 APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), 1); 14406 if (!DAG.MaskedValueIsZero(Xor0, Mask)) 14407 return SDValue(); 14408 14409 // We can only invert integer setccs. 14410 EVT SetCCOpVT = Setcc.getOperand(0).getValueType(); 14411 if (!SetCCOpVT.isScalarInteger()) 14412 return SDValue(); 14413 14414 ISD::CondCode CCVal = cast<CondCodeSDNode>(Setcc.getOperand(2))->get(); 14415 if (ISD::isIntEqualitySetCC(CCVal)) { 14416 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT); 14417 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(0), 14418 Setcc.getOperand(1), CCVal); 14419 } else if (CCVal == ISD::SETLT && isNullConstant(Setcc.getOperand(0))) { 14420 // Invert (setlt 0, X) by converting to (setlt X, 1). 14421 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(1), 14422 DAG.getConstant(1, SDLoc(Setcc), VT), CCVal); 14423 } else if (CCVal == ISD::SETLT && isOneConstant(Setcc.getOperand(1))) { 14424 // (setlt X, 1) by converting to (setlt 0, X). 14425 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, 14426 DAG.getConstant(0, SDLoc(Setcc), VT), 14427 Setcc.getOperand(0), CCVal); 14428 } else 14429 return SDValue(); 14430 14431 unsigned Opc = IsAnd ? ISD::OR : ISD::AND; 14432 return DAG.getNode(Opc, SDLoc(Cond), VT, Setcc, Xor.getOperand(0)); 14433} 14434 14435// Perform common combines for BR_CC and SELECT_CC condtions. 14436static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL, 14437 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { 14438 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get(); 14439 14440 // As far as arithmetic right shift always saves the sign, 14441 // shift can be omitted. 14442 // Fold setlt (sra X, N), 0 -> setlt X, 0 and 14443 // setge (sra X, N), 0 -> setge X, 0 14444 if (isNullConstant(RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) && 14445 LHS.getOpcode() == ISD::SRA) { 14446 LHS = LHS.getOperand(0); 14447 return true; 14448 } 14449 14450 if (!ISD::isIntEqualitySetCC(CCVal)) 14451 return false; 14452 14453 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt) 14454 // Sometimes the setcc is introduced after br_cc/select_cc has been formed. 14455 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) && 14456 LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) { 14457 // If we're looking for eq 0 instead of ne 0, we need to invert the 14458 // condition. 14459 bool Invert = CCVal == ISD::SETEQ; 14460 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get(); 14461 if (Invert) 14462 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType()); 14463 14464 RHS = LHS.getOperand(1); 14465 LHS = LHS.getOperand(0); 14466 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG); 14467 14468 CC = DAG.getCondCode(CCVal); 14469 return true; 14470 } 14471 14472 // Fold ((xor X, Y), 0, eq/ne) -> (X, Y, eq/ne) 14473 if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS)) { 14474 RHS = LHS.getOperand(1); 14475 LHS = LHS.getOperand(0); 14476 return true; 14477 } 14478 14479 // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, XLen-1-C), 0, ge/lt) 14480 if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() && 14481 LHS.getOperand(1).getOpcode() == ISD::Constant) { 14482 SDValue LHS0 = LHS.getOperand(0); 14483 if (LHS0.getOpcode() == ISD::AND && 14484 LHS0.getOperand(1).getOpcode() == ISD::Constant) { 14485 uint64_t Mask = LHS0.getConstantOperandVal(1); 14486 uint64_t ShAmt = LHS.getConstantOperandVal(1); 14487 if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) { 14488 CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT; 14489 CC = DAG.getCondCode(CCVal); 14490 14491 ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt; 14492 LHS = LHS0.getOperand(0); 14493 if (ShAmt != 0) 14494 LHS = 14495 DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0), 14496 DAG.getConstant(ShAmt, DL, LHS.getValueType())); 14497 return true; 14498 } 14499 } 14500 } 14501 14502 // (X, 1, setne) -> // (X, 0, seteq) if we can prove X is 0/1. 14503 // This can occur when legalizing some floating point comparisons. 14504 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1); 14505 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) { 14506 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType()); 14507 CC = DAG.getCondCode(CCVal); 14508 RHS = DAG.getConstant(0, DL, LHS.getValueType()); 14509 return true; 14510 } 14511 14512 if (isNullConstant(RHS)) { 14513 if (SDValue NewCond = tryDemorganOfBooleanCondition(LHS, DAG)) { 14514 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType()); 14515 CC = DAG.getCondCode(CCVal); 14516 LHS = NewCond; 14517 return true; 14518 } 14519 } 14520 14521 return false; 14522} 14523 14524// Fold 14525// (select C, (add Y, X), Y) -> (add Y, (select C, X, 0)). 14526// (select C, (sub Y, X), Y) -> (sub Y, (select C, X, 0)). 14527// (select C, (or Y, X), Y) -> (or Y, (select C, X, 0)). 14528// (select C, (xor Y, X), Y) -> (xor Y, (select C, X, 0)). 14529static SDValue tryFoldSelectIntoOp(SDNode *N, SelectionDAG &DAG, 14530 SDValue TrueVal, SDValue FalseVal, 14531 bool Swapped) { 14532 bool Commutative = true; 14533 unsigned Opc = TrueVal.getOpcode(); 14534 switch (Opc) { 14535 default: 14536 return SDValue(); 14537 case ISD::SHL: 14538 case ISD::SRA: 14539 case ISD::SRL: 14540 case ISD::SUB: 14541 Commutative = false; 14542 break; 14543 case ISD::ADD: 14544 case ISD::OR: 14545 case ISD::XOR: 14546 break; 14547 } 14548 14549 if (!TrueVal.hasOneUse() || isa<ConstantSDNode>(FalseVal)) 14550 return SDValue(); 14551 14552 unsigned OpToFold; 14553 if (FalseVal == TrueVal.getOperand(0)) 14554 OpToFold = 0; 14555 else if (Commutative && FalseVal == TrueVal.getOperand(1)) 14556 OpToFold = 1; 14557 else 14558 return SDValue(); 14559 14560 EVT VT = N->getValueType(0); 14561 SDLoc DL(N); 14562 SDValue OtherOp = TrueVal.getOperand(1 - OpToFold); 14563 EVT OtherOpVT = OtherOp.getValueType(); 14564 SDValue IdentityOperand = 14565 DAG.getNeutralElement(Opc, DL, OtherOpVT, N->getFlags()); 14566 if (!Commutative) 14567 IdentityOperand = DAG.getConstant(0, DL, OtherOpVT); 14568 assert(IdentityOperand && "No identity operand!"); 14569 14570 if (Swapped) 14571 std::swap(OtherOp, IdentityOperand); 14572 SDValue NewSel = 14573 DAG.getSelect(DL, OtherOpVT, N->getOperand(0), OtherOp, IdentityOperand); 14574 return DAG.getNode(TrueVal.getOpcode(), DL, VT, FalseVal, NewSel); 14575} 14576 14577// This tries to get rid of `select` and `icmp` that are being used to handle 14578// `Targets` that do not support `cttz(0)`/`ctlz(0)`. 14579static SDValue foldSelectOfCTTZOrCTLZ(SDNode *N, SelectionDAG &DAG) { 14580 SDValue Cond = N->getOperand(0); 14581 14582 // This represents either CTTZ or CTLZ instruction. 14583 SDValue CountZeroes; 14584 14585 SDValue ValOnZero; 14586 14587 if (Cond.getOpcode() != ISD::SETCC) 14588 return SDValue(); 14589 14590 if (!isNullConstant(Cond->getOperand(1))) 14591 return SDValue(); 14592 14593 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond->getOperand(2))->get(); 14594 if (CCVal == ISD::CondCode::SETEQ) { 14595 CountZeroes = N->getOperand(2); 14596 ValOnZero = N->getOperand(1); 14597 } else if (CCVal == ISD::CondCode::SETNE) { 14598 CountZeroes = N->getOperand(1); 14599 ValOnZero = N->getOperand(2); 14600 } else { 14601 return SDValue(); 14602 } 14603 14604 if (CountZeroes.getOpcode() == ISD::TRUNCATE || 14605 CountZeroes.getOpcode() == ISD::ZERO_EXTEND) 14606 CountZeroes = CountZeroes.getOperand(0); 14607 14608 if (CountZeroes.getOpcode() != ISD::CTTZ && 14609 CountZeroes.getOpcode() != ISD::CTTZ_ZERO_UNDEF && 14610 CountZeroes.getOpcode() != ISD::CTLZ && 14611 CountZeroes.getOpcode() != ISD::CTLZ_ZERO_UNDEF) 14612 return SDValue(); 14613 14614 if (!isNullConstant(ValOnZero)) 14615 return SDValue(); 14616 14617 SDValue CountZeroesArgument = CountZeroes->getOperand(0); 14618 if (Cond->getOperand(0) != CountZeroesArgument) 14619 return SDValue(); 14620 14621 if (CountZeroes.getOpcode() == ISD::CTTZ_ZERO_UNDEF) { 14622 CountZeroes = DAG.getNode(ISD::CTTZ, SDLoc(CountZeroes), 14623 CountZeroes.getValueType(), CountZeroesArgument); 14624 } else if (CountZeroes.getOpcode() == ISD::CTLZ_ZERO_UNDEF) { 14625 CountZeroes = DAG.getNode(ISD::CTLZ, SDLoc(CountZeroes), 14626 CountZeroes.getValueType(), CountZeroesArgument); 14627 } 14628 14629 unsigned BitWidth = CountZeroes.getValueSizeInBits(); 14630 SDValue BitWidthMinusOne = 14631 DAG.getConstant(BitWidth - 1, SDLoc(N), CountZeroes.getValueType()); 14632 14633 auto AndNode = DAG.getNode(ISD::AND, SDLoc(N), CountZeroes.getValueType(), 14634 CountZeroes, BitWidthMinusOne); 14635 return DAG.getZExtOrTrunc(AndNode, SDLoc(N), N->getValueType(0)); 14636} 14637 14638static SDValue useInversedSetcc(SDNode *N, SelectionDAG &DAG, 14639 const RISCVSubtarget &Subtarget) { 14640 SDValue Cond = N->getOperand(0); 14641 SDValue True = N->getOperand(1); 14642 SDValue False = N->getOperand(2); 14643 SDLoc DL(N); 14644 EVT VT = N->getValueType(0); 14645 EVT CondVT = Cond.getValueType(); 14646 14647 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse()) 14648 return SDValue(); 14649 14650 // Replace (setcc eq (and x, C)) with (setcc ne (and x, C))) to generate 14651 // BEXTI, where C is power of 2. 14652 if (Subtarget.hasStdExtZbs() && VT.isScalarInteger() && 14653 (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())) { 14654 SDValue LHS = Cond.getOperand(0); 14655 SDValue RHS = Cond.getOperand(1); 14656 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); 14657 if (CC == ISD::SETEQ && LHS.getOpcode() == ISD::AND && 14658 isa<ConstantSDNode>(LHS.getOperand(1)) && isNullConstant(RHS)) { 14659 const APInt &MaskVal = LHS.getConstantOperandAPInt(1); 14660 if (MaskVal.isPowerOf2() && !MaskVal.isSignedIntN(12)) 14661 return DAG.getSelect(DL, VT, 14662 DAG.getSetCC(DL, CondVT, LHS, RHS, ISD::SETNE), 14663 False, True); 14664 } 14665 } 14666 return SDValue(); 14667} 14668 14669static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG, 14670 const RISCVSubtarget &Subtarget) { 14671 if (SDValue Folded = foldSelectOfCTTZOrCTLZ(N, DAG)) 14672 return Folded; 14673 14674 if (SDValue V = useInversedSetcc(N, DAG, Subtarget)) 14675 return V; 14676 14677 if (Subtarget.hasConditionalMoveFusion()) 14678 return SDValue(); 14679 14680 SDValue TrueVal = N->getOperand(1); 14681 SDValue FalseVal = N->getOperand(2); 14682 if (SDValue V = tryFoldSelectIntoOp(N, DAG, TrueVal, FalseVal, /*Swapped*/false)) 14683 return V; 14684 return tryFoldSelectIntoOp(N, DAG, FalseVal, TrueVal, /*Swapped*/true); 14685} 14686 14687/// If we have a build_vector where each lane is binop X, C, where C 14688/// is a constant (but not necessarily the same constant on all lanes), 14689/// form binop (build_vector x1, x2, ...), (build_vector c1, c2, c3, ..). 14690/// We assume that materializing a constant build vector will be no more 14691/// expensive that performing O(n) binops. 14692static SDValue performBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG, 14693 const RISCVSubtarget &Subtarget, 14694 const RISCVTargetLowering &TLI) { 14695 SDLoc DL(N); 14696 EVT VT = N->getValueType(0); 14697 14698 assert(!VT.isScalableVector() && "unexpected build vector"); 14699 14700 if (VT.getVectorNumElements() == 1) 14701 return SDValue(); 14702 14703 const unsigned Opcode = N->op_begin()->getNode()->getOpcode(); 14704 if (!TLI.isBinOp(Opcode)) 14705 return SDValue(); 14706 14707 if (!TLI.isOperationLegalOrCustom(Opcode, VT) || !TLI.isTypeLegal(VT)) 14708 return SDValue(); 14709 14710 SmallVector<SDValue> LHSOps; 14711 SmallVector<SDValue> RHSOps; 14712 for (SDValue Op : N->ops()) { 14713 if (Op.isUndef()) { 14714 // We can't form a divide or remainder from undef. 14715 if (!DAG.isSafeToSpeculativelyExecute(Opcode)) 14716 return SDValue(); 14717 14718 LHSOps.push_back(Op); 14719 RHSOps.push_back(Op); 14720 continue; 14721 } 14722 14723 // TODO: We can handle operations which have an neutral rhs value 14724 // (e.g. x + 0, a * 1 or a << 0), but we then have to keep track 14725 // of profit in a more explicit manner. 14726 if (Op.getOpcode() != Opcode || !Op.hasOneUse()) 14727 return SDValue(); 14728 14729 LHSOps.push_back(Op.getOperand(0)); 14730 if (!isa<ConstantSDNode>(Op.getOperand(1)) && 14731 !isa<ConstantFPSDNode>(Op.getOperand(1))) 14732 return SDValue(); 14733 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may 14734 // have different LHS and RHS types. 14735 if (Op.getOperand(0).getValueType() != Op.getOperand(1).getValueType()) 14736 return SDValue(); 14737 RHSOps.push_back(Op.getOperand(1)); 14738 } 14739 14740 return DAG.getNode(Opcode, DL, VT, DAG.getBuildVector(VT, DL, LHSOps), 14741 DAG.getBuildVector(VT, DL, RHSOps)); 14742} 14743 14744static SDValue performINSERT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, 14745 const RISCVSubtarget &Subtarget, 14746 const RISCVTargetLowering &TLI) { 14747 SDValue InVec = N->getOperand(0); 14748 SDValue InVal = N->getOperand(1); 14749 SDValue EltNo = N->getOperand(2); 14750 SDLoc DL(N); 14751 14752 EVT VT = InVec.getValueType(); 14753 if (VT.isScalableVector()) 14754 return SDValue(); 14755 14756 if (!InVec.hasOneUse()) 14757 return SDValue(); 14758 14759 // Given insert_vector_elt (binop a, VecC), (same_binop b, C2), Elt 14760 // move the insert_vector_elts into the arms of the binop. Note that 14761 // the new RHS must be a constant. 14762 const unsigned InVecOpcode = InVec->getOpcode(); 14763 if (InVecOpcode == InVal->getOpcode() && TLI.isBinOp(InVecOpcode) && 14764 InVal.hasOneUse()) { 14765 SDValue InVecLHS = InVec->getOperand(0); 14766 SDValue InVecRHS = InVec->getOperand(1); 14767 SDValue InValLHS = InVal->getOperand(0); 14768 SDValue InValRHS = InVal->getOperand(1); 14769 14770 if (!ISD::isBuildVectorOfConstantSDNodes(InVecRHS.getNode())) 14771 return SDValue(); 14772 if (!isa<ConstantSDNode>(InValRHS) && !isa<ConstantFPSDNode>(InValRHS)) 14773 return SDValue(); 14774 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may 14775 // have different LHS and RHS types. 14776 if (InVec.getOperand(0).getValueType() != InVec.getOperand(1).getValueType()) 14777 return SDValue(); 14778 SDValue LHS = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, 14779 InVecLHS, InValLHS, EltNo); 14780 SDValue RHS = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, 14781 InVecRHS, InValRHS, EltNo); 14782 return DAG.getNode(InVecOpcode, DL, VT, LHS, RHS); 14783 } 14784 14785 // Given insert_vector_elt (concat_vectors ...), InVal, Elt 14786 // move the insert_vector_elt to the source operand of the concat_vector. 14787 if (InVec.getOpcode() != ISD::CONCAT_VECTORS) 14788 return SDValue(); 14789 14790 auto *IndexC = dyn_cast<ConstantSDNode>(EltNo); 14791 if (!IndexC) 14792 return SDValue(); 14793 unsigned Elt = IndexC->getZExtValue(); 14794 14795 EVT ConcatVT = InVec.getOperand(0).getValueType(); 14796 if (ConcatVT.getVectorElementType() != InVal.getValueType()) 14797 return SDValue(); 14798 unsigned ConcatNumElts = ConcatVT.getVectorNumElements(); 14799 SDValue NewIdx = DAG.getConstant(Elt % ConcatNumElts, DL, 14800 EltNo.getValueType()); 14801 14802 unsigned ConcatOpIdx = Elt / ConcatNumElts; 14803 SDValue ConcatOp = InVec.getOperand(ConcatOpIdx); 14804 ConcatOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ConcatVT, 14805 ConcatOp, InVal, NewIdx); 14806 14807 SmallVector<SDValue> ConcatOps; 14808 ConcatOps.append(InVec->op_begin(), InVec->op_end()); 14809 ConcatOps[ConcatOpIdx] = ConcatOp; 14810 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps); 14811} 14812 14813// If we're concatenating a series of vector loads like 14814// concat_vectors (load v4i8, p+0), (load v4i8, p+n), (load v4i8, p+n*2) ... 14815// Then we can turn this into a strided load by widening the vector elements 14816// vlse32 p, stride=n 14817static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG, 14818 const RISCVSubtarget &Subtarget, 14819 const RISCVTargetLowering &TLI) { 14820 SDLoc DL(N); 14821 EVT VT = N->getValueType(0); 14822 14823 // Only perform this combine on legal MVTs. 14824 if (!TLI.isTypeLegal(VT)) 14825 return SDValue(); 14826 14827 // TODO: Potentially extend this to scalable vectors 14828 if (VT.isScalableVector()) 14829 return SDValue(); 14830 14831 auto *BaseLd = dyn_cast<LoadSDNode>(N->getOperand(0)); 14832 if (!BaseLd || !BaseLd->isSimple() || !ISD::isNormalLoad(BaseLd) || 14833 !SDValue(BaseLd, 0).hasOneUse()) 14834 return SDValue(); 14835 14836 EVT BaseLdVT = BaseLd->getValueType(0); 14837 14838 // Go through the loads and check that they're strided 14839 SmallVector<LoadSDNode *> Lds; 14840 Lds.push_back(BaseLd); 14841 Align Align = BaseLd->getAlign(); 14842 for (SDValue Op : N->ops().drop_front()) { 14843 auto *Ld = dyn_cast<LoadSDNode>(Op); 14844 if (!Ld || !Ld->isSimple() || !Op.hasOneUse() || 14845 Ld->getChain() != BaseLd->getChain() || !ISD::isNormalLoad(Ld) || 14846 Ld->getValueType(0) != BaseLdVT) 14847 return SDValue(); 14848 14849 Lds.push_back(Ld); 14850 14851 // The common alignment is the most restrictive (smallest) of all the loads 14852 Align = std::min(Align, Ld->getAlign()); 14853 } 14854 14855 using PtrDiff = std::pair<std::variant<int64_t, SDValue>, bool>; 14856 auto GetPtrDiff = [&DAG](LoadSDNode *Ld1, 14857 LoadSDNode *Ld2) -> std::optional<PtrDiff> { 14858 // If the load ptrs can be decomposed into a common (Base + Index) with a 14859 // common constant stride, then return the constant stride. 14860 BaseIndexOffset BIO1 = BaseIndexOffset::match(Ld1, DAG); 14861 BaseIndexOffset BIO2 = BaseIndexOffset::match(Ld2, DAG); 14862 if (BIO1.equalBaseIndex(BIO2, DAG)) 14863 return {{BIO2.getOffset() - BIO1.getOffset(), false}}; 14864 14865 // Otherwise try to match (add LastPtr, Stride) or (add NextPtr, Stride) 14866 SDValue P1 = Ld1->getBasePtr(); 14867 SDValue P2 = Ld2->getBasePtr(); 14868 if (P2.getOpcode() == ISD::ADD && P2.getOperand(0) == P1) 14869 return {{P2.getOperand(1), false}}; 14870 if (P1.getOpcode() == ISD::ADD && P1.getOperand(0) == P2) 14871 return {{P1.getOperand(1), true}}; 14872 14873 return std::nullopt; 14874 }; 14875 14876 // Get the distance between the first and second loads 14877 auto BaseDiff = GetPtrDiff(Lds[0], Lds[1]); 14878 if (!BaseDiff) 14879 return SDValue(); 14880 14881 // Check all the loads are the same distance apart 14882 for (auto *It = Lds.begin() + 1; It != Lds.end() - 1; It++) 14883 if (GetPtrDiff(*It, *std::next(It)) != BaseDiff) 14884 return SDValue(); 14885 14886 // TODO: At this point, we've successfully matched a generalized gather 14887 // load. Maybe we should emit that, and then move the specialized 14888 // matchers above and below into a DAG combine? 14889 14890 // Get the widened scalar type, e.g. v4i8 -> i64 14891 unsigned WideScalarBitWidth = 14892 BaseLdVT.getScalarSizeInBits() * BaseLdVT.getVectorNumElements(); 14893 MVT WideScalarVT = MVT::getIntegerVT(WideScalarBitWidth); 14894 14895 // Get the vector type for the strided load, e.g. 4 x v4i8 -> v4i64 14896 MVT WideVecVT = MVT::getVectorVT(WideScalarVT, N->getNumOperands()); 14897 if (!TLI.isTypeLegal(WideVecVT)) 14898 return SDValue(); 14899 14900 // Check that the operation is legal 14901 if (!TLI.isLegalStridedLoadStore(WideVecVT, Align)) 14902 return SDValue(); 14903 14904 auto [StrideVariant, MustNegateStride] = *BaseDiff; 14905 SDValue Stride = std::holds_alternative<SDValue>(StrideVariant) 14906 ? std::get<SDValue>(StrideVariant) 14907 : DAG.getConstant(std::get<int64_t>(StrideVariant), DL, 14908 Lds[0]->getOffset().getValueType()); 14909 if (MustNegateStride) 14910 Stride = DAG.getNegative(Stride, DL, Stride.getValueType()); 14911 14912 SDVTList VTs = DAG.getVTList({WideVecVT, MVT::Other}); 14913 SDValue IntID = 14914 DAG.getTargetConstant(Intrinsic::riscv_masked_strided_load, DL, 14915 Subtarget.getXLenVT()); 14916 14917 SDValue AllOneMask = 14918 DAG.getSplat(WideVecVT.changeVectorElementType(MVT::i1), DL, 14919 DAG.getConstant(1, DL, MVT::i1)); 14920 14921 SDValue Ops[] = {BaseLd->getChain(), IntID, DAG.getUNDEF(WideVecVT), 14922 BaseLd->getBasePtr(), Stride, AllOneMask}; 14923 14924 uint64_t MemSize; 14925 if (auto *ConstStride = dyn_cast<ConstantSDNode>(Stride); 14926 ConstStride && ConstStride->getSExtValue() >= 0) 14927 // total size = (elsize * n) + (stride - elsize) * (n-1) 14928 // = elsize + stride * (n-1) 14929 MemSize = WideScalarVT.getSizeInBits() + 14930 ConstStride->getSExtValue() * (N->getNumOperands() - 1); 14931 else 14932 // If Stride isn't constant, then we can't know how much it will load 14933 MemSize = MemoryLocation::UnknownSize; 14934 14935 MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( 14936 BaseLd->getPointerInfo(), BaseLd->getMemOperand()->getFlags(), MemSize, 14937 Align); 14938 14939 SDValue StridedLoad = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, 14940 Ops, WideVecVT, MMO); 14941 for (SDValue Ld : N->ops()) 14942 DAG.makeEquivalentMemoryOrdering(cast<LoadSDNode>(Ld), StridedLoad); 14943 14944 return DAG.getBitcast(VT.getSimpleVT(), StridedLoad); 14945} 14946 14947static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG, 14948 const RISCVSubtarget &Subtarget) { 14949 14950 assert(N->getOpcode() == RISCVISD::ADD_VL || N->getOpcode() == ISD::ADD); 14951 14952 if (N->getValueType(0).isFixedLengthVector()) 14953 return SDValue(); 14954 14955 SDValue Addend = N->getOperand(0); 14956 SDValue MulOp = N->getOperand(1); 14957 14958 if (N->getOpcode() == RISCVISD::ADD_VL) { 14959 SDValue AddMergeOp = N->getOperand(2); 14960 if (!AddMergeOp.isUndef()) 14961 return SDValue(); 14962 } 14963 14964 auto IsVWMulOpc = [](unsigned Opc) { 14965 switch (Opc) { 14966 case RISCVISD::VWMUL_VL: 14967 case RISCVISD::VWMULU_VL: 14968 case RISCVISD::VWMULSU_VL: 14969 return true; 14970 default: 14971 return false; 14972 } 14973 }; 14974 14975 if (!IsVWMulOpc(MulOp.getOpcode())) 14976 std::swap(Addend, MulOp); 14977 14978 if (!IsVWMulOpc(MulOp.getOpcode())) 14979 return SDValue(); 14980 14981 SDValue MulMergeOp = MulOp.getOperand(2); 14982 14983 if (!MulMergeOp.isUndef()) 14984 return SDValue(); 14985 14986 auto [AddMask, AddVL] = [](SDNode *N, SelectionDAG &DAG, 14987 const RISCVSubtarget &Subtarget) { 14988 if (N->getOpcode() == ISD::ADD) { 14989 SDLoc DL(N); 14990 return getDefaultScalableVLOps(N->getSimpleValueType(0), DL, DAG, 14991 Subtarget); 14992 } 14993 return std::make_pair(N->getOperand(3), N->getOperand(4)); 14994 }(N, DAG, Subtarget); 14995 14996 SDValue MulMask = MulOp.getOperand(3); 14997 SDValue MulVL = MulOp.getOperand(4); 14998 14999 if (AddMask != MulMask || AddVL != MulVL) 15000 return SDValue(); 15001 15002 unsigned Opc = RISCVISD::VWMACC_VL + MulOp.getOpcode() - RISCVISD::VWMUL_VL; 15003 static_assert(RISCVISD::VWMACC_VL + 1 == RISCVISD::VWMACCU_VL, 15004 "Unexpected opcode after VWMACC_VL"); 15005 static_assert(RISCVISD::VWMACC_VL + 2 == RISCVISD::VWMACCSU_VL, 15006 "Unexpected opcode after VWMACC_VL!"); 15007 static_assert(RISCVISD::VWMUL_VL + 1 == RISCVISD::VWMULU_VL, 15008 "Unexpected opcode after VWMUL_VL!"); 15009 static_assert(RISCVISD::VWMUL_VL + 2 == RISCVISD::VWMULSU_VL, 15010 "Unexpected opcode after VWMUL_VL!"); 15011 15012 SDLoc DL(N); 15013 EVT VT = N->getValueType(0); 15014 SDValue Ops[] = {MulOp.getOperand(0), MulOp.getOperand(1), Addend, AddMask, 15015 AddVL}; 15016 return DAG.getNode(Opc, DL, VT, Ops); 15017} 15018 15019static bool legalizeScatterGatherIndexType(SDLoc DL, SDValue &Index, 15020 ISD::MemIndexType &IndexType, 15021 RISCVTargetLowering::DAGCombinerInfo &DCI) { 15022 if (!DCI.isBeforeLegalize()) 15023 return false; 15024 15025 SelectionDAG &DAG = DCI.DAG; 15026 const MVT XLenVT = 15027 DAG.getMachineFunction().getSubtarget<RISCVSubtarget>().getXLenVT(); 15028 15029 const EVT IndexVT = Index.getValueType(); 15030 15031 // RISC-V indexed loads only support the "unsigned unscaled" addressing 15032 // mode, so anything else must be manually legalized. 15033 if (!isIndexTypeSigned(IndexType)) 15034 return false; 15035 15036 if (IndexVT.getVectorElementType().bitsLT(XLenVT)) { 15037 // Any index legalization should first promote to XLenVT, so we don't lose 15038 // bits when scaling. This may create an illegal index type so we let 15039 // LLVM's legalization take care of the splitting. 15040 // FIXME: LLVM can't split VP_GATHER or VP_SCATTER yet. 15041 Index = DAG.getNode(ISD::SIGN_EXTEND, DL, 15042 IndexVT.changeVectorElementType(XLenVT), Index); 15043 } 15044 IndexType = ISD::UNSIGNED_SCALED; 15045 return true; 15046} 15047 15048/// Match the index vector of a scatter or gather node as the shuffle mask 15049/// which performs the rearrangement if possible. Will only match if 15050/// all lanes are touched, and thus replacing the scatter or gather with 15051/// a unit strided access and shuffle is legal. 15052static bool matchIndexAsShuffle(EVT VT, SDValue Index, SDValue Mask, 15053 SmallVector<int> &ShuffleMask) { 15054 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode())) 15055 return false; 15056 if (!ISD::isBuildVectorOfConstantSDNodes(Index.getNode())) 15057 return false; 15058 15059 const unsigned ElementSize = VT.getScalarStoreSize(); 15060 const unsigned NumElems = VT.getVectorNumElements(); 15061 15062 // Create the shuffle mask and check all bits active 15063 assert(ShuffleMask.empty()); 15064 BitVector ActiveLanes(NumElems); 15065 for (unsigned i = 0; i < Index->getNumOperands(); i++) { 15066 // TODO: We've found an active bit of UB, and could be 15067 // more aggressive here if desired. 15068 if (Index->getOperand(i)->isUndef()) 15069 return false; 15070 uint64_t C = Index->getConstantOperandVal(i); 15071 if (C % ElementSize != 0) 15072 return false; 15073 C = C / ElementSize; 15074 if (C >= NumElems) 15075 return false; 15076 ShuffleMask.push_back(C); 15077 ActiveLanes.set(C); 15078 } 15079 return ActiveLanes.all(); 15080} 15081 15082/// Match the index of a gather or scatter operation as an operation 15083/// with twice the element width and half the number of elements. This is 15084/// generally profitable (if legal) because these operations are linear 15085/// in VL, so even if we cause some extract VTYPE/VL toggles, we still 15086/// come out ahead. 15087static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask, 15088 Align BaseAlign, const RISCVSubtarget &ST) { 15089 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode())) 15090 return false; 15091 if (!ISD::isBuildVectorOfConstantSDNodes(Index.getNode())) 15092 return false; 15093 15094 // Attempt a doubling. If we can use a element type 4x or 8x in 15095 // size, this will happen via multiply iterations of the transform. 15096 const unsigned NumElems = VT.getVectorNumElements(); 15097 if (NumElems % 2 != 0) 15098 return false; 15099 15100 const unsigned ElementSize = VT.getScalarStoreSize(); 15101 const unsigned WiderElementSize = ElementSize * 2; 15102 if (WiderElementSize > ST.getELen()/8) 15103 return false; 15104 15105 if (!ST.hasFastUnalignedAccess() && BaseAlign < WiderElementSize) 15106 return false; 15107 15108 for (unsigned i = 0; i < Index->getNumOperands(); i++) { 15109 // TODO: We've found an active bit of UB, and could be 15110 // more aggressive here if desired. 15111 if (Index->getOperand(i)->isUndef()) 15112 return false; 15113 // TODO: This offset check is too strict if we support fully 15114 // misaligned memory operations. 15115 uint64_t C = Index->getConstantOperandVal(i); 15116 if (i % 2 == 0) { 15117 if (C % WiderElementSize != 0) 15118 return false; 15119 continue; 15120 } 15121 uint64_t Last = Index->getConstantOperandVal(i-1); 15122 if (C != Last + ElementSize) 15123 return false; 15124 } 15125 return true; 15126} 15127 15128 15129SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, 15130 DAGCombinerInfo &DCI) const { 15131 SelectionDAG &DAG = DCI.DAG; 15132 const MVT XLenVT = Subtarget.getXLenVT(); 15133 SDLoc DL(N); 15134 15135 // Helper to call SimplifyDemandedBits on an operand of N where only some low 15136 // bits are demanded. N will be added to the Worklist if it was not deleted. 15137 // Caller should return SDValue(N, 0) if this returns true. 15138 auto SimplifyDemandedLowBitsHelper = [&](unsigned OpNo, unsigned LowBits) { 15139 SDValue Op = N->getOperand(OpNo); 15140 APInt Mask = APInt::getLowBitsSet(Op.getValueSizeInBits(), LowBits); 15141 if (!SimplifyDemandedBits(Op, Mask, DCI)) 15142 return false; 15143 15144 if (N->getOpcode() != ISD::DELETED_NODE) 15145 DCI.AddToWorklist(N); 15146 return true; 15147 }; 15148 15149 switch (N->getOpcode()) { 15150 default: 15151 break; 15152 case RISCVISD::SplitF64: { 15153 SDValue Op0 = N->getOperand(0); 15154 // If the input to SplitF64 is just BuildPairF64 then the operation is 15155 // redundant. Instead, use BuildPairF64's operands directly. 15156 if (Op0->getOpcode() == RISCVISD::BuildPairF64) 15157 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1)); 15158 15159 if (Op0->isUndef()) { 15160 SDValue Lo = DAG.getUNDEF(MVT::i32); 15161 SDValue Hi = DAG.getUNDEF(MVT::i32); 15162 return DCI.CombineTo(N, Lo, Hi); 15163 } 15164 15165 // It's cheaper to materialise two 32-bit integers than to load a double 15166 // from the constant pool and transfer it to integer registers through the 15167 // stack. 15168 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) { 15169 APInt V = C->getValueAPF().bitcastToAPInt(); 15170 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32); 15171 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32); 15172 return DCI.CombineTo(N, Lo, Hi); 15173 } 15174 15175 // This is a target-specific version of a DAGCombine performed in 15176 // DAGCombiner::visitBITCAST. It performs the equivalent of: 15177 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) 15178 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) 15179 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) || 15180 !Op0.getNode()->hasOneUse()) 15181 break; 15182 SDValue NewSplitF64 = 15183 DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), 15184 Op0.getOperand(0)); 15185 SDValue Lo = NewSplitF64.getValue(0); 15186 SDValue Hi = NewSplitF64.getValue(1); 15187 APInt SignBit = APInt::getSignMask(32); 15188 if (Op0.getOpcode() == ISD::FNEG) { 15189 SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi, 15190 DAG.getConstant(SignBit, DL, MVT::i32)); 15191 return DCI.CombineTo(N, Lo, NewHi); 15192 } 15193 assert(Op0.getOpcode() == ISD::FABS); 15194 SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi, 15195 DAG.getConstant(~SignBit, DL, MVT::i32)); 15196 return DCI.CombineTo(N, Lo, NewHi); 15197 } 15198 case RISCVISD::SLLW: 15199 case RISCVISD::SRAW: 15200 case RISCVISD::SRLW: 15201 case RISCVISD::RORW: 15202 case RISCVISD::ROLW: { 15203 // Only the lower 32 bits of LHS and lower 5 bits of RHS are read. 15204 if (SimplifyDemandedLowBitsHelper(0, 32) || 15205 SimplifyDemandedLowBitsHelper(1, 5)) 15206 return SDValue(N, 0); 15207 15208 break; 15209 } 15210 case RISCVISD::CLZW: 15211 case RISCVISD::CTZW: { 15212 // Only the lower 32 bits of the first operand are read 15213 if (SimplifyDemandedLowBitsHelper(0, 32)) 15214 return SDValue(N, 0); 15215 break; 15216 } 15217 case RISCVISD::FMV_W_X_RV64: { 15218 // If the input to FMV_W_X_RV64 is just FMV_X_ANYEXTW_RV64 the the 15219 // conversion is unnecessary and can be replaced with the 15220 // FMV_X_ANYEXTW_RV64 operand. 15221 SDValue Op0 = N->getOperand(0); 15222 if (Op0.getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64) 15223 return Op0.getOperand(0); 15224 break; 15225 } 15226 case RISCVISD::FMV_X_ANYEXTH: 15227 case RISCVISD::FMV_X_ANYEXTW_RV64: { 15228 SDLoc DL(N); 15229 SDValue Op0 = N->getOperand(0); 15230 MVT VT = N->getSimpleValueType(0); 15231 // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the 15232 // conversion is unnecessary and can be replaced with the FMV_W_X_RV64 15233 // operand. Similar for FMV_X_ANYEXTH and FMV_H_X. 15234 if ((N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 && 15235 Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) || 15236 (N->getOpcode() == RISCVISD::FMV_X_ANYEXTH && 15237 Op0->getOpcode() == RISCVISD::FMV_H_X)) { 15238 assert(Op0.getOperand(0).getValueType() == VT && 15239 "Unexpected value type!"); 15240 return Op0.getOperand(0); 15241 } 15242 15243 // This is a target-specific version of a DAGCombine performed in 15244 // DAGCombiner::visitBITCAST. It performs the equivalent of: 15245 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) 15246 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) 15247 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) || 15248 !Op0.getNode()->hasOneUse()) 15249 break; 15250 SDValue NewFMV = DAG.getNode(N->getOpcode(), DL, VT, Op0.getOperand(0)); 15251 unsigned FPBits = N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 ? 32 : 16; 15252 APInt SignBit = APInt::getSignMask(FPBits).sext(VT.getSizeInBits()); 15253 if (Op0.getOpcode() == ISD::FNEG) 15254 return DAG.getNode(ISD::XOR, DL, VT, NewFMV, 15255 DAG.getConstant(SignBit, DL, VT)); 15256 15257 assert(Op0.getOpcode() == ISD::FABS); 15258 return DAG.getNode(ISD::AND, DL, VT, NewFMV, 15259 DAG.getConstant(~SignBit, DL, VT)); 15260 } 15261 case ISD::ADD: { 15262 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget)) 15263 return V; 15264 if (SDValue V = combineToVWMACC(N, DAG, Subtarget)) 15265 return V; 15266 return performADDCombine(N, DAG, Subtarget); 15267 } 15268 case ISD::SUB: { 15269 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget)) 15270 return V; 15271 return performSUBCombine(N, DAG, Subtarget); 15272 } 15273 case ISD::AND: 15274 return performANDCombine(N, DCI, Subtarget); 15275 case ISD::OR: 15276 return performORCombine(N, DCI, Subtarget); 15277 case ISD::XOR: 15278 return performXORCombine(N, DAG, Subtarget); 15279 case ISD::MUL: 15280 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget)) 15281 return V; 15282 return performMULCombine(N, DAG); 15283 case ISD::FADD: 15284 case ISD::UMAX: 15285 case ISD::UMIN: 15286 case ISD::SMAX: 15287 case ISD::SMIN: 15288 case ISD::FMAXNUM: 15289 case ISD::FMINNUM: { 15290 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget)) 15291 return V; 15292 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget)) 15293 return V; 15294 return SDValue(); 15295 } 15296 case ISD::SETCC: 15297 return performSETCCCombine(N, DAG, Subtarget); 15298 case ISD::SIGN_EXTEND_INREG: 15299 return performSIGN_EXTEND_INREGCombine(N, DAG, Subtarget); 15300 case ISD::ZERO_EXTEND: 15301 // Fold (zero_extend (fp_to_uint X)) to prevent forming fcvt+zexti32 during 15302 // type legalization. This is safe because fp_to_uint produces poison if 15303 // it overflows. 15304 if (N->getValueType(0) == MVT::i64 && Subtarget.is64Bit()) { 15305 SDValue Src = N->getOperand(0); 15306 if (Src.getOpcode() == ISD::FP_TO_UINT && 15307 isTypeLegal(Src.getOperand(0).getValueType())) 15308 return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), MVT::i64, 15309 Src.getOperand(0)); 15310 if (Src.getOpcode() == ISD::STRICT_FP_TO_UINT && Src.hasOneUse() && 15311 isTypeLegal(Src.getOperand(1).getValueType())) { 15312 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other); 15313 SDValue Res = DAG.getNode(ISD::STRICT_FP_TO_UINT, SDLoc(N), VTs, 15314 Src.getOperand(0), Src.getOperand(1)); 15315 DCI.CombineTo(N, Res); 15316 DAG.ReplaceAllUsesOfValueWith(Src.getValue(1), Res.getValue(1)); 15317 DCI.recursivelyDeleteUnusedNodes(Src.getNode()); 15318 return SDValue(N, 0); // Return N so it doesn't get rechecked. 15319 } 15320 } 15321 return SDValue(); 15322 case RISCVISD::TRUNCATE_VECTOR_VL: { 15323 // trunc (sra sext (X), zext (Y)) -> sra (X, smin (Y, scalarsize(Y) - 1)) 15324 // This would be benefit for the cases where X and Y are both the same value 15325 // type of low precision vectors. Since the truncate would be lowered into 15326 // n-levels TRUNCATE_VECTOR_VL to satisfy RVV's SEW*2->SEW truncate 15327 // restriction, such pattern would be expanded into a series of "vsetvli" 15328 // and "vnsrl" instructions later to reach this point. 15329 auto IsTruncNode = [](SDValue V) { 15330 if (V.getOpcode() != RISCVISD::TRUNCATE_VECTOR_VL) 15331 return false; 15332 SDValue VL = V.getOperand(2); 15333 auto *C = dyn_cast<ConstantSDNode>(VL); 15334 // Assume all TRUNCATE_VECTOR_VL nodes use VLMAX for VMSET_VL operand 15335 bool IsVLMAXForVMSET = (C && C->isAllOnes()) || 15336 (isa<RegisterSDNode>(VL) && 15337 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0); 15338 return V.getOperand(1).getOpcode() == RISCVISD::VMSET_VL && 15339 IsVLMAXForVMSET; 15340 }; 15341 15342 SDValue Op = N->getOperand(0); 15343 15344 // We need to first find the inner level of TRUNCATE_VECTOR_VL node 15345 // to distinguish such pattern. 15346 while (IsTruncNode(Op)) { 15347 if (!Op.hasOneUse()) 15348 return SDValue(); 15349 Op = Op.getOperand(0); 15350 } 15351 15352 if (Op.getOpcode() == ISD::SRA && Op.hasOneUse()) { 15353 SDValue N0 = Op.getOperand(0); 15354 SDValue N1 = Op.getOperand(1); 15355 if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() && 15356 N1.getOpcode() == ISD::ZERO_EXTEND && N1.hasOneUse()) { 15357 SDValue N00 = N0.getOperand(0); 15358 SDValue N10 = N1.getOperand(0); 15359 if (N00.getValueType().isVector() && 15360 N00.getValueType() == N10.getValueType() && 15361 N->getValueType(0) == N10.getValueType()) { 15362 unsigned MaxShAmt = N10.getValueType().getScalarSizeInBits() - 1; 15363 SDValue SMin = DAG.getNode( 15364 ISD::SMIN, SDLoc(N1), N->getValueType(0), N10, 15365 DAG.getConstant(MaxShAmt, SDLoc(N1), N->getValueType(0))); 15366 return DAG.getNode(ISD::SRA, SDLoc(N), N->getValueType(0), N00, SMin); 15367 } 15368 } 15369 } 15370 break; 15371 } 15372 case ISD::TRUNCATE: 15373 return performTRUNCATECombine(N, DAG, Subtarget); 15374 case ISD::SELECT: 15375 return performSELECTCombine(N, DAG, Subtarget); 15376 case RISCVISD::CZERO_EQZ: 15377 case RISCVISD::CZERO_NEZ: 15378 // czero_eq X, (xor Y, 1) -> czero_ne X, Y if Y is 0 or 1. 15379 // czero_ne X, (xor Y, 1) -> czero_eq X, Y if Y is 0 or 1. 15380 if (N->getOperand(1).getOpcode() == ISD::XOR && 15381 isOneConstant(N->getOperand(1).getOperand(1))) { 15382 SDValue Cond = N->getOperand(1).getOperand(0); 15383 APInt Mask = APInt::getBitsSetFrom(Cond.getValueSizeInBits(), 1); 15384 if (DAG.MaskedValueIsZero(Cond, Mask)) { 15385 unsigned NewOpc = N->getOpcode() == RISCVISD::CZERO_EQZ 15386 ? RISCVISD::CZERO_NEZ 15387 : RISCVISD::CZERO_EQZ; 15388 return DAG.getNode(NewOpc, SDLoc(N), N->getValueType(0), 15389 N->getOperand(0), Cond); 15390 } 15391 } 15392 return SDValue(); 15393 15394 case RISCVISD::SELECT_CC: { 15395 // Transform 15396 SDValue LHS = N->getOperand(0); 15397 SDValue RHS = N->getOperand(1); 15398 SDValue CC = N->getOperand(2); 15399 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get(); 15400 SDValue TrueV = N->getOperand(3); 15401 SDValue FalseV = N->getOperand(4); 15402 SDLoc DL(N); 15403 EVT VT = N->getValueType(0); 15404 15405 // If the True and False values are the same, we don't need a select_cc. 15406 if (TrueV == FalseV) 15407 return TrueV; 15408 15409 // (select (x < 0), y, z) -> x >> (XLEN - 1) & (y - z) + z 15410 // (select (x >= 0), y, z) -> x >> (XLEN - 1) & (z - y) + y 15411 if (!Subtarget.hasShortForwardBranchOpt() && isa<ConstantSDNode>(TrueV) && 15412 isa<ConstantSDNode>(FalseV) && isNullConstant(RHS) && 15413 (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) { 15414 if (CCVal == ISD::CondCode::SETGE) 15415 std::swap(TrueV, FalseV); 15416 15417 int64_t TrueSImm = cast<ConstantSDNode>(TrueV)->getSExtValue(); 15418 int64_t FalseSImm = cast<ConstantSDNode>(FalseV)->getSExtValue(); 15419 // Only handle simm12, if it is not in this range, it can be considered as 15420 // register. 15421 if (isInt<12>(TrueSImm) && isInt<12>(FalseSImm) && 15422 isInt<12>(TrueSImm - FalseSImm)) { 15423 SDValue SRA = 15424 DAG.getNode(ISD::SRA, DL, VT, LHS, 15425 DAG.getConstant(Subtarget.getXLen() - 1, DL, VT)); 15426 SDValue AND = 15427 DAG.getNode(ISD::AND, DL, VT, SRA, 15428 DAG.getConstant(TrueSImm - FalseSImm, DL, VT)); 15429 return DAG.getNode(ISD::ADD, DL, VT, AND, FalseV); 15430 } 15431 15432 if (CCVal == ISD::CondCode::SETGE) 15433 std::swap(TrueV, FalseV); 15434 } 15435 15436 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget)) 15437 return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0), 15438 {LHS, RHS, CC, TrueV, FalseV}); 15439 15440 if (!Subtarget.hasConditionalMoveFusion()) { 15441 // (select c, -1, y) -> -c | y 15442 if (isAllOnesConstant(TrueV)) { 15443 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal); 15444 SDValue Neg = DAG.getNegative(C, DL, VT); 15445 return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV); 15446 } 15447 // (select c, y, -1) -> -!c | y 15448 if (isAllOnesConstant(FalseV)) { 15449 SDValue C = 15450 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT)); 15451 SDValue Neg = DAG.getNegative(C, DL, VT); 15452 return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV); 15453 } 15454 15455 // (select c, 0, y) -> -!c & y 15456 if (isNullConstant(TrueV)) { 15457 SDValue C = 15458 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT)); 15459 SDValue Neg = DAG.getNegative(C, DL, VT); 15460 return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV); 15461 } 15462 // (select c, y, 0) -> -c & y 15463 if (isNullConstant(FalseV)) { 15464 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal); 15465 SDValue Neg = DAG.getNegative(C, DL, VT); 15466 return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV); 15467 } 15468 // (riscvisd::select_cc x, 0, ne, x, 1) -> (add x, (setcc x, 0, eq)) 15469 // (riscvisd::select_cc x, 0, eq, 1, x) -> (add x, (setcc x, 0, eq)) 15470 if (((isOneConstant(FalseV) && LHS == TrueV && 15471 CCVal == ISD::CondCode::SETNE) || 15472 (isOneConstant(TrueV) && LHS == FalseV && 15473 CCVal == ISD::CondCode::SETEQ)) && 15474 isNullConstant(RHS)) { 15475 // freeze it to be safe. 15476 LHS = DAG.getFreeze(LHS); 15477 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, ISD::CondCode::SETEQ); 15478 return DAG.getNode(ISD::ADD, DL, VT, LHS, C); 15479 } 15480 } 15481 15482 // If both true/false are an xor with 1, pull through the select. 15483 // This can occur after op legalization if both operands are setccs that 15484 // require an xor to invert. 15485 // FIXME: Generalize to other binary ops with identical operand? 15486 if (TrueV.getOpcode() == ISD::XOR && FalseV.getOpcode() == ISD::XOR && 15487 TrueV.getOperand(1) == FalseV.getOperand(1) && 15488 isOneConstant(TrueV.getOperand(1)) && 15489 TrueV.hasOneUse() && FalseV.hasOneUse()) { 15490 SDValue NewSel = DAG.getNode(RISCVISD::SELECT_CC, DL, VT, LHS, RHS, CC, 15491 TrueV.getOperand(0), FalseV.getOperand(0)); 15492 return DAG.getNode(ISD::XOR, DL, VT, NewSel, TrueV.getOperand(1)); 15493 } 15494 15495 return SDValue(); 15496 } 15497 case RISCVISD::BR_CC: { 15498 SDValue LHS = N->getOperand(1); 15499 SDValue RHS = N->getOperand(2); 15500 SDValue CC = N->getOperand(3); 15501 SDLoc DL(N); 15502 15503 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget)) 15504 return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0), 15505 N->getOperand(0), LHS, RHS, CC, N->getOperand(4)); 15506 15507 return SDValue(); 15508 } 15509 case ISD::BITREVERSE: 15510 return performBITREVERSECombine(N, DAG, Subtarget); 15511 case ISD::FP_TO_SINT: 15512 case ISD::FP_TO_UINT: 15513 return performFP_TO_INTCombine(N, DCI, Subtarget); 15514 case ISD::FP_TO_SINT_SAT: 15515 case ISD::FP_TO_UINT_SAT: 15516 return performFP_TO_INT_SATCombine(N, DCI, Subtarget); 15517 case ISD::FCOPYSIGN: { 15518 EVT VT = N->getValueType(0); 15519 if (!VT.isVector()) 15520 break; 15521 // There is a form of VFSGNJ which injects the negated sign of its second 15522 // operand. Try and bubble any FNEG up after the extend/round to produce 15523 // this optimized pattern. Avoid modifying cases where FP_ROUND and 15524 // TRUNC=1. 15525 SDValue In2 = N->getOperand(1); 15526 // Avoid cases where the extend/round has multiple uses, as duplicating 15527 // those is typically more expensive than removing a fneg. 15528 if (!In2.hasOneUse()) 15529 break; 15530 if (In2.getOpcode() != ISD::FP_EXTEND && 15531 (In2.getOpcode() != ISD::FP_ROUND || In2.getConstantOperandVal(1) != 0)) 15532 break; 15533 In2 = In2.getOperand(0); 15534 if (In2.getOpcode() != ISD::FNEG) 15535 break; 15536 SDLoc DL(N); 15537 SDValue NewFPExtRound = DAG.getFPExtendOrRound(In2.getOperand(0), DL, VT); 15538 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N->getOperand(0), 15539 DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound)); 15540 } 15541 case ISD::MGATHER: { 15542 const auto *MGN = dyn_cast<MaskedGatherSDNode>(N); 15543 const EVT VT = N->getValueType(0); 15544 SDValue Index = MGN->getIndex(); 15545 SDValue ScaleOp = MGN->getScale(); 15546 ISD::MemIndexType IndexType = MGN->getIndexType(); 15547 assert(!MGN->isIndexScaled() && 15548 "Scaled gather/scatter should not be formed"); 15549 15550 SDLoc DL(N); 15551 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI)) 15552 return DAG.getMaskedGather( 15553 N->getVTList(), MGN->getMemoryVT(), DL, 15554 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(), 15555 MGN->getBasePtr(), Index, ScaleOp}, 15556 MGN->getMemOperand(), IndexType, MGN->getExtensionType()); 15557 15558 if (narrowIndex(Index, IndexType, DAG)) 15559 return DAG.getMaskedGather( 15560 N->getVTList(), MGN->getMemoryVT(), DL, 15561 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(), 15562 MGN->getBasePtr(), Index, ScaleOp}, 15563 MGN->getMemOperand(), IndexType, MGN->getExtensionType()); 15564 15565 if (Index.getOpcode() == ISD::BUILD_VECTOR && 15566 MGN->getExtensionType() == ISD::NON_EXTLOAD && isTypeLegal(VT)) { 15567 // The sequence will be XLenVT, not the type of Index. Tell 15568 // isSimpleVIDSequence this so we avoid overflow. 15569 if (std::optional<VIDSequence> SimpleVID = 15570 isSimpleVIDSequence(Index, Subtarget.getXLen()); 15571 SimpleVID && SimpleVID->StepDenominator == 1) { 15572 const int64_t StepNumerator = SimpleVID->StepNumerator; 15573 const int64_t Addend = SimpleVID->Addend; 15574 15575 // Note: We don't need to check alignment here since (by assumption 15576 // from the existance of the gather), our offsets must be sufficiently 15577 // aligned. 15578 15579 const EVT PtrVT = getPointerTy(DAG.getDataLayout()); 15580 assert(MGN->getBasePtr()->getValueType(0) == PtrVT); 15581 assert(IndexType == ISD::UNSIGNED_SCALED); 15582 SDValue BasePtr = DAG.getNode(ISD::ADD, DL, PtrVT, MGN->getBasePtr(), 15583 DAG.getConstant(Addend, DL, PtrVT)); 15584 15585 SDVTList VTs = DAG.getVTList({VT, MVT::Other}); 15586 SDValue IntID = 15587 DAG.getTargetConstant(Intrinsic::riscv_masked_strided_load, DL, 15588 XLenVT); 15589 SDValue Ops[] = 15590 {MGN->getChain(), IntID, MGN->getPassThru(), BasePtr, 15591 DAG.getConstant(StepNumerator, DL, XLenVT), MGN->getMask()}; 15592 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, 15593 Ops, VT, MGN->getMemOperand()); 15594 } 15595 } 15596 15597 SmallVector<int> ShuffleMask; 15598 if (MGN->getExtensionType() == ISD::NON_EXTLOAD && 15599 matchIndexAsShuffle(VT, Index, MGN->getMask(), ShuffleMask)) { 15600 SDValue Load = DAG.getMaskedLoad(VT, DL, MGN->getChain(), 15601 MGN->getBasePtr(), DAG.getUNDEF(XLenVT), 15602 MGN->getMask(), DAG.getUNDEF(VT), 15603 MGN->getMemoryVT(), MGN->getMemOperand(), 15604 ISD::UNINDEXED, ISD::NON_EXTLOAD); 15605 SDValue Shuffle = 15606 DAG.getVectorShuffle(VT, DL, Load, DAG.getUNDEF(VT), ShuffleMask); 15607 return DAG.getMergeValues({Shuffle, Load.getValue(1)}, DL); 15608 } 15609 15610 if (MGN->getExtensionType() == ISD::NON_EXTLOAD && 15611 matchIndexAsWiderOp(VT, Index, MGN->getMask(), 15612 MGN->getMemOperand()->getBaseAlign(), Subtarget)) { 15613 SmallVector<SDValue> NewIndices; 15614 for (unsigned i = 0; i < Index->getNumOperands(); i += 2) 15615 NewIndices.push_back(Index.getOperand(i)); 15616 EVT IndexVT = Index.getValueType() 15617 .getHalfNumVectorElementsVT(*DAG.getContext()); 15618 Index = DAG.getBuildVector(IndexVT, DL, NewIndices); 15619 15620 unsigned ElementSize = VT.getScalarStoreSize(); 15621 EVT WideScalarVT = MVT::getIntegerVT(ElementSize * 8 * 2); 15622 auto EltCnt = VT.getVectorElementCount(); 15623 assert(EltCnt.isKnownEven() && "Splitting vector, but not in half!"); 15624 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), WideScalarVT, 15625 EltCnt.divideCoefficientBy(2)); 15626 SDValue Passthru = DAG.getBitcast(WideVT, MGN->getPassThru()); 15627 EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, 15628 EltCnt.divideCoefficientBy(2)); 15629 SDValue Mask = DAG.getSplat(MaskVT, DL, DAG.getConstant(1, DL, MVT::i1)); 15630 15631 SDValue Gather = 15632 DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other), WideVT, DL, 15633 {MGN->getChain(), Passthru, Mask, MGN->getBasePtr(), 15634 Index, ScaleOp}, 15635 MGN->getMemOperand(), IndexType, ISD::NON_EXTLOAD); 15636 SDValue Result = DAG.getBitcast(VT, Gather.getValue(0)); 15637 return DAG.getMergeValues({Result, Gather.getValue(1)}, DL); 15638 } 15639 break; 15640 } 15641 case ISD::MSCATTER:{ 15642 const auto *MSN = dyn_cast<MaskedScatterSDNode>(N); 15643 SDValue Index = MSN->getIndex(); 15644 SDValue ScaleOp = MSN->getScale(); 15645 ISD::MemIndexType IndexType = MSN->getIndexType(); 15646 assert(!MSN->isIndexScaled() && 15647 "Scaled gather/scatter should not be formed"); 15648 15649 SDLoc DL(N); 15650 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI)) 15651 return DAG.getMaskedScatter( 15652 N->getVTList(), MSN->getMemoryVT(), DL, 15653 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(), 15654 Index, ScaleOp}, 15655 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore()); 15656 15657 if (narrowIndex(Index, IndexType, DAG)) 15658 return DAG.getMaskedScatter( 15659 N->getVTList(), MSN->getMemoryVT(), DL, 15660 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(), 15661 Index, ScaleOp}, 15662 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore()); 15663 15664 EVT VT = MSN->getValue()->getValueType(0); 15665 SmallVector<int> ShuffleMask; 15666 if (!MSN->isTruncatingStore() && 15667 matchIndexAsShuffle(VT, Index, MSN->getMask(), ShuffleMask)) { 15668 SDValue Shuffle = DAG.getVectorShuffle(VT, DL, MSN->getValue(), 15669 DAG.getUNDEF(VT), ShuffleMask); 15670 return DAG.getMaskedStore(MSN->getChain(), DL, Shuffle, MSN->getBasePtr(), 15671 DAG.getUNDEF(XLenVT), MSN->getMask(), 15672 MSN->getMemoryVT(), MSN->getMemOperand(), 15673 ISD::UNINDEXED, false); 15674 } 15675 break; 15676 } 15677 case ISD::VP_GATHER: { 15678 const auto *VPGN = dyn_cast<VPGatherSDNode>(N); 15679 SDValue Index = VPGN->getIndex(); 15680 SDValue ScaleOp = VPGN->getScale(); 15681 ISD::MemIndexType IndexType = VPGN->getIndexType(); 15682 assert(!VPGN->isIndexScaled() && 15683 "Scaled gather/scatter should not be formed"); 15684 15685 SDLoc DL(N); 15686 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI)) 15687 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL, 15688 {VPGN->getChain(), VPGN->getBasePtr(), Index, 15689 ScaleOp, VPGN->getMask(), 15690 VPGN->getVectorLength()}, 15691 VPGN->getMemOperand(), IndexType); 15692 15693 if (narrowIndex(Index, IndexType, DAG)) 15694 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL, 15695 {VPGN->getChain(), VPGN->getBasePtr(), Index, 15696 ScaleOp, VPGN->getMask(), 15697 VPGN->getVectorLength()}, 15698 VPGN->getMemOperand(), IndexType); 15699 15700 break; 15701 } 15702 case ISD::VP_SCATTER: { 15703 const auto *VPSN = dyn_cast<VPScatterSDNode>(N); 15704 SDValue Index = VPSN->getIndex(); 15705 SDValue ScaleOp = VPSN->getScale(); 15706 ISD::MemIndexType IndexType = VPSN->getIndexType(); 15707 assert(!VPSN->isIndexScaled() && 15708 "Scaled gather/scatter should not be formed"); 15709 15710 SDLoc DL(N); 15711 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI)) 15712 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL, 15713 {VPSN->getChain(), VPSN->getValue(), 15714 VPSN->getBasePtr(), Index, ScaleOp, 15715 VPSN->getMask(), VPSN->getVectorLength()}, 15716 VPSN->getMemOperand(), IndexType); 15717 15718 if (narrowIndex(Index, IndexType, DAG)) 15719 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL, 15720 {VPSN->getChain(), VPSN->getValue(), 15721 VPSN->getBasePtr(), Index, ScaleOp, 15722 VPSN->getMask(), VPSN->getVectorLength()}, 15723 VPSN->getMemOperand(), IndexType); 15724 break; 15725 } 15726 case RISCVISD::SRA_VL: 15727 case RISCVISD::SRL_VL: 15728 case RISCVISD::SHL_VL: { 15729 SDValue ShAmt = N->getOperand(1); 15730 if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) { 15731 // We don't need the upper 32 bits of a 64-bit element for a shift amount. 15732 SDLoc DL(N); 15733 SDValue VL = N->getOperand(4); 15734 EVT VT = N->getValueType(0); 15735 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT), 15736 ShAmt.getOperand(1), VL); 15737 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt, 15738 N->getOperand(2), N->getOperand(3), N->getOperand(4)); 15739 } 15740 break; 15741 } 15742 case ISD::SRA: 15743 if (SDValue V = performSRACombine(N, DAG, Subtarget)) 15744 return V; 15745 [[fallthrough]]; 15746 case ISD::SRL: 15747 case ISD::SHL: { 15748 SDValue ShAmt = N->getOperand(1); 15749 if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) { 15750 // We don't need the upper 32 bits of a 64-bit element for a shift amount. 15751 SDLoc DL(N); 15752 EVT VT = N->getValueType(0); 15753 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT), 15754 ShAmt.getOperand(1), 15755 DAG.getRegister(RISCV::X0, Subtarget.getXLenVT())); 15756 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt); 15757 } 15758 break; 15759 } 15760 case RISCVISD::ADD_VL: 15761 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget)) 15762 return V; 15763 return combineToVWMACC(N, DAG, Subtarget); 15764 case RISCVISD::SUB_VL: 15765 case RISCVISD::VWADD_W_VL: 15766 case RISCVISD::VWADDU_W_VL: 15767 case RISCVISD::VWSUB_W_VL: 15768 case RISCVISD::VWSUBU_W_VL: 15769 case RISCVISD::MUL_VL: 15770 return combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget); 15771 case RISCVISD::VFMADD_VL: 15772 case RISCVISD::VFNMADD_VL: 15773 case RISCVISD::VFMSUB_VL: 15774 case RISCVISD::VFNMSUB_VL: 15775 case RISCVISD::STRICT_VFMADD_VL: 15776 case RISCVISD::STRICT_VFNMADD_VL: 15777 case RISCVISD::STRICT_VFMSUB_VL: 15778 case RISCVISD::STRICT_VFNMSUB_VL: 15779 return performVFMADD_VLCombine(N, DAG, Subtarget); 15780 case RISCVISD::FMUL_VL: 15781 return performVFMUL_VLCombine(N, DAG, Subtarget); 15782 case RISCVISD::FADD_VL: 15783 case RISCVISD::FSUB_VL: 15784 return performFADDSUB_VLCombine(N, DAG, Subtarget); 15785 case ISD::LOAD: 15786 case ISD::STORE: { 15787 if (DCI.isAfterLegalizeDAG()) 15788 if (SDValue V = performMemPairCombine(N, DCI)) 15789 return V; 15790 15791 if (N->getOpcode() != ISD::STORE) 15792 break; 15793 15794 auto *Store = cast<StoreSDNode>(N); 15795 SDValue Chain = Store->getChain(); 15796 EVT MemVT = Store->getMemoryVT(); 15797 SDValue Val = Store->getValue(); 15798 SDLoc DL(N); 15799 15800 bool IsScalarizable = 15801 MemVT.isFixedLengthVector() && ISD::isNormalStore(Store) && 15802 Store->isSimple() && 15803 MemVT.getVectorElementType().bitsLE(Subtarget.getXLenVT()) && 15804 isPowerOf2_64(MemVT.getSizeInBits()) && 15805 MemVT.getSizeInBits() <= Subtarget.getXLen(); 15806 15807 // If sufficiently aligned we can scalarize stores of constant vectors of 15808 // any power-of-two size up to XLen bits, provided that they aren't too 15809 // expensive to materialize. 15810 // vsetivli zero, 2, e8, m1, ta, ma 15811 // vmv.v.i v8, 4 15812 // vse64.v v8, (a0) 15813 // -> 15814 // li a1, 1028 15815 // sh a1, 0(a0) 15816 if (DCI.isBeforeLegalize() && IsScalarizable && 15817 ISD::isBuildVectorOfConstantSDNodes(Val.getNode())) { 15818 // Get the constant vector bits 15819 APInt NewC(Val.getValueSizeInBits(), 0); 15820 uint64_t EltSize = Val.getScalarValueSizeInBits(); 15821 for (unsigned i = 0; i < Val.getNumOperands(); i++) { 15822 if (Val.getOperand(i).isUndef()) 15823 continue; 15824 NewC.insertBits(Val.getConstantOperandAPInt(i).trunc(EltSize), 15825 i * EltSize); 15826 } 15827 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits()); 15828 15829 if (RISCVMatInt::getIntMatCost(NewC, Subtarget.getXLen(), Subtarget, 15830 true) <= 2 && 15831 allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), 15832 NewVT, *Store->getMemOperand())) { 15833 SDValue NewV = DAG.getConstant(NewC, DL, NewVT); 15834 return DAG.getStore(Chain, DL, NewV, Store->getBasePtr(), 15835 Store->getPointerInfo(), Store->getOriginalAlign(), 15836 Store->getMemOperand()->getFlags()); 15837 } 15838 } 15839 15840 // Similarly, if sufficiently aligned we can scalarize vector copies, e.g. 15841 // vsetivli zero, 2, e16, m1, ta, ma 15842 // vle16.v v8, (a0) 15843 // vse16.v v8, (a1) 15844 if (auto *L = dyn_cast<LoadSDNode>(Val); 15845 L && DCI.isBeforeLegalize() && IsScalarizable && L->isSimple() && 15846 L->hasNUsesOfValue(1, 0) && L->hasNUsesOfValue(1, 1) && 15847 Store->getChain() == SDValue(L, 1) && ISD::isNormalLoad(L) && 15848 L->getMemoryVT() == MemVT) { 15849 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits()); 15850 if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), 15851 NewVT, *Store->getMemOperand()) && 15852 allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), 15853 NewVT, *L->getMemOperand())) { 15854 SDValue NewL = DAG.getLoad(NewVT, DL, L->getChain(), L->getBasePtr(), 15855 L->getPointerInfo(), L->getOriginalAlign(), 15856 L->getMemOperand()->getFlags()); 15857 return DAG.getStore(Chain, DL, NewL, Store->getBasePtr(), 15858 Store->getPointerInfo(), Store->getOriginalAlign(), 15859 Store->getMemOperand()->getFlags()); 15860 } 15861 } 15862 15863 // Combine store of vmv.x.s/vfmv.f.s to vse with VL of 1. 15864 // vfmv.f.s is represented as extract element from 0. Match it late to avoid 15865 // any illegal types. 15866 if (Val.getOpcode() == RISCVISD::VMV_X_S || 15867 (DCI.isAfterLegalizeDAG() && 15868 Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT && 15869 isNullConstant(Val.getOperand(1)))) { 15870 SDValue Src = Val.getOperand(0); 15871 MVT VecVT = Src.getSimpleValueType(); 15872 // VecVT should be scalable and memory VT should match the element type. 15873 if (!Store->isIndexed() && VecVT.isScalableVector() && 15874 MemVT == VecVT.getVectorElementType()) { 15875 SDLoc DL(N); 15876 MVT MaskVT = getMaskTypeFor(VecVT); 15877 return DAG.getStoreVP( 15878 Store->getChain(), DL, Src, Store->getBasePtr(), Store->getOffset(), 15879 DAG.getConstant(1, DL, MaskVT), 15880 DAG.getConstant(1, DL, Subtarget.getXLenVT()), MemVT, 15881 Store->getMemOperand(), Store->getAddressingMode(), 15882 Store->isTruncatingStore(), /*IsCompress*/ false); 15883 } 15884 } 15885 15886 break; 15887 } 15888 case ISD::SPLAT_VECTOR: { 15889 EVT VT = N->getValueType(0); 15890 // Only perform this combine on legal MVT types. 15891 if (!isTypeLegal(VT)) 15892 break; 15893 if (auto Gather = matchSplatAsGather(N->getOperand(0), VT.getSimpleVT(), N, 15894 DAG, Subtarget)) 15895 return Gather; 15896 break; 15897 } 15898 case ISD::BUILD_VECTOR: 15899 if (SDValue V = performBUILD_VECTORCombine(N, DAG, Subtarget, *this)) 15900 return V; 15901 break; 15902 case ISD::CONCAT_VECTORS: 15903 if (SDValue V = performCONCAT_VECTORSCombine(N, DAG, Subtarget, *this)) 15904 return V; 15905 break; 15906 case ISD::INSERT_VECTOR_ELT: 15907 if (SDValue V = performINSERT_VECTOR_ELTCombine(N, DAG, Subtarget, *this)) 15908 return V; 15909 break; 15910 case RISCVISD::VFMV_V_F_VL: { 15911 const MVT VT = N->getSimpleValueType(0); 15912 SDValue Passthru = N->getOperand(0); 15913 SDValue Scalar = N->getOperand(1); 15914 SDValue VL = N->getOperand(2); 15915 15916 // If VL is 1, we can use vfmv.s.f. 15917 if (isOneConstant(VL)) 15918 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, Passthru, Scalar, VL); 15919 break; 15920 } 15921 case RISCVISD::VMV_V_X_VL: { 15922 const MVT VT = N->getSimpleValueType(0); 15923 SDValue Passthru = N->getOperand(0); 15924 SDValue Scalar = N->getOperand(1); 15925 SDValue VL = N->getOperand(2); 15926 15927 // Tail agnostic VMV.V.X only demands the vector element bitwidth from the 15928 // scalar input. 15929 unsigned ScalarSize = Scalar.getValueSizeInBits(); 15930 unsigned EltWidth = VT.getScalarSizeInBits(); 15931 if (ScalarSize > EltWidth && Passthru.isUndef()) 15932 if (SimplifyDemandedLowBitsHelper(1, EltWidth)) 15933 return SDValue(N, 0); 15934 15935 // If VL is 1 and the scalar value won't benefit from immediate, we can 15936 // use vmv.s.x. 15937 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar); 15938 if (isOneConstant(VL) && 15939 (!Const || Const->isZero() || 15940 !Const->getAPIntValue().sextOrTrunc(EltWidth).isSignedIntN(5))) 15941 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru, Scalar, VL); 15942 15943 break; 15944 } 15945 case RISCVISD::VFMV_S_F_VL: { 15946 SDValue Src = N->getOperand(1); 15947 // Try to remove vector->scalar->vector if the scalar->vector is inserting 15948 // into an undef vector. 15949 // TODO: Could use a vslide or vmv.v.v for non-undef. 15950 if (N->getOperand(0).isUndef() && 15951 Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT && 15952 isNullConstant(Src.getOperand(1)) && 15953 Src.getOperand(0).getValueType().isScalableVector()) { 15954 EVT VT = N->getValueType(0); 15955 EVT SrcVT = Src.getOperand(0).getValueType(); 15956 assert(SrcVT.getVectorElementType() == VT.getVectorElementType()); 15957 // Widths match, just return the original vector. 15958 if (SrcVT == VT) 15959 return Src.getOperand(0); 15960 // TODO: Use insert_subvector/extract_subvector to change widen/narrow? 15961 } 15962 [[fallthrough]]; 15963 } 15964 case RISCVISD::VMV_S_X_VL: { 15965 const MVT VT = N->getSimpleValueType(0); 15966 SDValue Passthru = N->getOperand(0); 15967 SDValue Scalar = N->getOperand(1); 15968 SDValue VL = N->getOperand(2); 15969 15970 // Use M1 or smaller to avoid over constraining register allocation 15971 const MVT M1VT = getLMUL1VT(VT); 15972 if (M1VT.bitsLT(VT)) { 15973 SDValue M1Passthru = 15974 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Passthru, 15975 DAG.getVectorIdxConstant(0, DL)); 15976 SDValue Result = 15977 DAG.getNode(N->getOpcode(), DL, M1VT, M1Passthru, Scalar, VL); 15978 Result = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Passthru, Result, 15979 DAG.getConstant(0, DL, XLenVT)); 15980 return Result; 15981 } 15982 15983 // We use a vmv.v.i if possible. We limit this to LMUL1. LMUL2 or 15984 // higher would involve overly constraining the register allocator for 15985 // no purpose. 15986 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar); 15987 Const && !Const->isZero() && isInt<5>(Const->getSExtValue()) && 15988 VT.bitsLE(getLMUL1VT(VT)) && Passthru.isUndef()) 15989 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL); 15990 15991 break; 15992 } 15993 case RISCVISD::VMV_X_S: { 15994 SDValue Vec = N->getOperand(0); 15995 MVT VecVT = N->getOperand(0).getSimpleValueType(); 15996 const MVT M1VT = getLMUL1VT(VecVT); 15997 if (M1VT.bitsLT(VecVT)) { 15998 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Vec, 15999 DAG.getVectorIdxConstant(0, DL)); 16000 return DAG.getNode(RISCVISD::VMV_X_S, DL, N->getSimpleValueType(0), Vec); 16001 } 16002 break; 16003 } 16004 case ISD::INTRINSIC_VOID: 16005 case ISD::INTRINSIC_W_CHAIN: 16006 case ISD::INTRINSIC_WO_CHAIN: { 16007 unsigned IntOpNo = N->getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1; 16008 unsigned IntNo = N->getConstantOperandVal(IntOpNo); 16009 switch (IntNo) { 16010 // By default we do not combine any intrinsic. 16011 default: 16012 return SDValue(); 16013 case Intrinsic::riscv_masked_strided_load: { 16014 MVT VT = N->getSimpleValueType(0); 16015 auto *Load = cast<MemIntrinsicSDNode>(N); 16016 SDValue PassThru = N->getOperand(2); 16017 SDValue Base = N->getOperand(3); 16018 SDValue Stride = N->getOperand(4); 16019 SDValue Mask = N->getOperand(5); 16020 16021 // If the stride is equal to the element size in bytes, we can use 16022 // a masked.load. 16023 const unsigned ElementSize = VT.getScalarStoreSize(); 16024 if (auto *StrideC = dyn_cast<ConstantSDNode>(Stride); 16025 StrideC && StrideC->getZExtValue() == ElementSize) 16026 return DAG.getMaskedLoad(VT, DL, Load->getChain(), Base, 16027 DAG.getUNDEF(XLenVT), Mask, PassThru, 16028 Load->getMemoryVT(), Load->getMemOperand(), 16029 ISD::UNINDEXED, ISD::NON_EXTLOAD); 16030 return SDValue(); 16031 } 16032 case Intrinsic::riscv_masked_strided_store: { 16033 auto *Store = cast<MemIntrinsicSDNode>(N); 16034 SDValue Value = N->getOperand(2); 16035 SDValue Base = N->getOperand(3); 16036 SDValue Stride = N->getOperand(4); 16037 SDValue Mask = N->getOperand(5); 16038 16039 // If the stride is equal to the element size in bytes, we can use 16040 // a masked.store. 16041 const unsigned ElementSize = Value.getValueType().getScalarStoreSize(); 16042 if (auto *StrideC = dyn_cast<ConstantSDNode>(Stride); 16043 StrideC && StrideC->getZExtValue() == ElementSize) 16044 return DAG.getMaskedStore(Store->getChain(), DL, Value, Base, 16045 DAG.getUNDEF(XLenVT), Mask, 16046 Store->getMemoryVT(), Store->getMemOperand(), 16047 ISD::UNINDEXED, false); 16048 return SDValue(); 16049 } 16050 case Intrinsic::riscv_vcpop: 16051 case Intrinsic::riscv_vcpop_mask: 16052 case Intrinsic::riscv_vfirst: 16053 case Intrinsic::riscv_vfirst_mask: { 16054 SDValue VL = N->getOperand(2); 16055 if (IntNo == Intrinsic::riscv_vcpop_mask || 16056 IntNo == Intrinsic::riscv_vfirst_mask) 16057 VL = N->getOperand(3); 16058 if (!isNullConstant(VL)) 16059 return SDValue(); 16060 // If VL is 0, vcpop -> li 0, vfirst -> li -1. 16061 SDLoc DL(N); 16062 EVT VT = N->getValueType(0); 16063 if (IntNo == Intrinsic::riscv_vfirst || 16064 IntNo == Intrinsic::riscv_vfirst_mask) 16065 return DAG.getConstant(-1, DL, VT); 16066 return DAG.getConstant(0, DL, VT); 16067 } 16068 } 16069 } 16070 case ISD::BITCAST: { 16071 assert(Subtarget.useRVVForFixedLengthVectors()); 16072 SDValue N0 = N->getOperand(0); 16073 EVT VT = N->getValueType(0); 16074 EVT SrcVT = N0.getValueType(); 16075 // If this is a bitcast between a MVT::v4i1/v2i1/v1i1 and an illegal integer 16076 // type, widen both sides to avoid a trip through memory. 16077 if ((SrcVT == MVT::v1i1 || SrcVT == MVT::v2i1 || SrcVT == MVT::v4i1) && 16078 VT.isScalarInteger()) { 16079 unsigned NumConcats = 8 / SrcVT.getVectorNumElements(); 16080 SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(SrcVT)); 16081 Ops[0] = N0; 16082 SDLoc DL(N); 16083 N0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i1, Ops); 16084 N0 = DAG.getBitcast(MVT::i8, N0); 16085 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0); 16086 } 16087 16088 return SDValue(); 16089 } 16090 } 16091 16092 return SDValue(); 16093} 16094 16095bool RISCVTargetLowering::shouldTransformSignedTruncationCheck( 16096 EVT XVT, unsigned KeptBits) const { 16097 // For vectors, we don't have a preference.. 16098 if (XVT.isVector()) 16099 return false; 16100 16101 if (XVT != MVT::i32 && XVT != MVT::i64) 16102 return false; 16103 16104 // We can use sext.w for RV64 or an srai 31 on RV32. 16105 if (KeptBits == 32 || KeptBits == 64) 16106 return true; 16107 16108 // With Zbb we can use sext.h/sext.b. 16109 return Subtarget.hasStdExtZbb() && 16110 ((KeptBits == 8 && XVT == MVT::i64 && !Subtarget.is64Bit()) || 16111 KeptBits == 16); 16112} 16113 16114bool RISCVTargetLowering::isDesirableToCommuteWithShift( 16115 const SDNode *N, CombineLevel Level) const { 16116 assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA || 16117 N->getOpcode() == ISD::SRL) && 16118 "Expected shift op"); 16119 16120 // The following folds are only desirable if `(OP _, c1 << c2)` can be 16121 // materialised in fewer instructions than `(OP _, c1)`: 16122 // 16123 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2) 16124 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2) 16125 SDValue N0 = N->getOperand(0); 16126 EVT Ty = N0.getValueType(); 16127 if (Ty.isScalarInteger() && 16128 (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) { 16129 auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1)); 16130 auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)); 16131 if (C1 && C2) { 16132 const APInt &C1Int = C1->getAPIntValue(); 16133 APInt ShiftedC1Int = C1Int << C2->getAPIntValue(); 16134 16135 // We can materialise `c1 << c2` into an add immediate, so it's "free", 16136 // and the combine should happen, to potentially allow further combines 16137 // later. 16138 if (ShiftedC1Int.getSignificantBits() <= 64 && 16139 isLegalAddImmediate(ShiftedC1Int.getSExtValue())) 16140 return true; 16141 16142 // We can materialise `c1` in an add immediate, so it's "free", and the 16143 // combine should be prevented. 16144 if (C1Int.getSignificantBits() <= 64 && 16145 isLegalAddImmediate(C1Int.getSExtValue())) 16146 return false; 16147 16148 // Neither constant will fit into an immediate, so find materialisation 16149 // costs. 16150 int C1Cost = 16151 RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(), Subtarget, 16152 /*CompressionCost*/ true); 16153 int ShiftedC1Cost = RISCVMatInt::getIntMatCost( 16154 ShiftedC1Int, Ty.getSizeInBits(), Subtarget, 16155 /*CompressionCost*/ true); 16156 16157 // Materialising `c1` is cheaper than materialising `c1 << c2`, so the 16158 // combine should be prevented. 16159 if (C1Cost < ShiftedC1Cost) 16160 return false; 16161 } 16162 } 16163 return true; 16164} 16165 16166bool RISCVTargetLowering::targetShrinkDemandedConstant( 16167 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, 16168 TargetLoweringOpt &TLO) const { 16169 // Delay this optimization as late as possible. 16170 if (!TLO.LegalOps) 16171 return false; 16172 16173 EVT VT = Op.getValueType(); 16174 if (VT.isVector()) 16175 return false; 16176 16177 unsigned Opcode = Op.getOpcode(); 16178 if (Opcode != ISD::AND && Opcode != ISD::OR && Opcode != ISD::XOR) 16179 return false; 16180 16181 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); 16182 if (!C) 16183 return false; 16184 16185 const APInt &Mask = C->getAPIntValue(); 16186 16187 // Clear all non-demanded bits initially. 16188 APInt ShrunkMask = Mask & DemandedBits; 16189 16190 // Try to make a smaller immediate by setting undemanded bits. 16191 16192 APInt ExpandedMask = Mask | ~DemandedBits; 16193 16194 auto IsLegalMask = [ShrunkMask, ExpandedMask](const APInt &Mask) -> bool { 16195 return ShrunkMask.isSubsetOf(Mask) && Mask.isSubsetOf(ExpandedMask); 16196 }; 16197 auto UseMask = [Mask, Op, &TLO](const APInt &NewMask) -> bool { 16198 if (NewMask == Mask) 16199 return true; 16200 SDLoc DL(Op); 16201 SDValue NewC = TLO.DAG.getConstant(NewMask, DL, Op.getValueType()); 16202 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), 16203 Op.getOperand(0), NewC); 16204 return TLO.CombineTo(Op, NewOp); 16205 }; 16206 16207 // If the shrunk mask fits in sign extended 12 bits, let the target 16208 // independent code apply it. 16209 if (ShrunkMask.isSignedIntN(12)) 16210 return false; 16211 16212 // And has a few special cases for zext. 16213 if (Opcode == ISD::AND) { 16214 // Preserve (and X, 0xffff), if zext.h exists use zext.h, 16215 // otherwise use SLLI + SRLI. 16216 APInt NewMask = APInt(Mask.getBitWidth(), 0xffff); 16217 if (IsLegalMask(NewMask)) 16218 return UseMask(NewMask); 16219 16220 // Try to preserve (and X, 0xffffffff), the (zext_inreg X, i32) pattern. 16221 if (VT == MVT::i64) { 16222 APInt NewMask = APInt(64, 0xffffffff); 16223 if (IsLegalMask(NewMask)) 16224 return UseMask(NewMask); 16225 } 16226 } 16227 16228 // For the remaining optimizations, we need to be able to make a negative 16229 // number through a combination of mask and undemanded bits. 16230 if (!ExpandedMask.isNegative()) 16231 return false; 16232 16233 // What is the fewest number of bits we need to represent the negative number. 16234 unsigned MinSignedBits = ExpandedMask.getSignificantBits(); 16235 16236 // Try to make a 12 bit negative immediate. If that fails try to make a 32 16237 // bit negative immediate unless the shrunk immediate already fits in 32 bits. 16238 // If we can't create a simm12, we shouldn't change opaque constants. 16239 APInt NewMask = ShrunkMask; 16240 if (MinSignedBits <= 12) 16241 NewMask.setBitsFrom(11); 16242 else if (!C->isOpaque() && MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32)) 16243 NewMask.setBitsFrom(31); 16244 else 16245 return false; 16246 16247 // Check that our new mask is a subset of the demanded mask. 16248 assert(IsLegalMask(NewMask)); 16249 return UseMask(NewMask); 16250} 16251 16252static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC) { 16253 static const uint64_t GREVMasks[] = { 16254 0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL, 16255 0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL}; 16256 16257 for (unsigned Stage = 0; Stage != 6; ++Stage) { 16258 unsigned Shift = 1 << Stage; 16259 if (ShAmt & Shift) { 16260 uint64_t Mask = GREVMasks[Stage]; 16261 uint64_t Res = ((x & Mask) << Shift) | ((x >> Shift) & Mask); 16262 if (IsGORC) 16263 Res |= x; 16264 x = Res; 16265 } 16266 } 16267 16268 return x; 16269} 16270 16271void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, 16272 KnownBits &Known, 16273 const APInt &DemandedElts, 16274 const SelectionDAG &DAG, 16275 unsigned Depth) const { 16276 unsigned BitWidth = Known.getBitWidth(); 16277 unsigned Opc = Op.getOpcode(); 16278 assert((Opc >= ISD::BUILTIN_OP_END || 16279 Opc == ISD::INTRINSIC_WO_CHAIN || 16280 Opc == ISD::INTRINSIC_W_CHAIN || 16281 Opc == ISD::INTRINSIC_VOID) && 16282 "Should use MaskedValueIsZero if you don't know whether Op" 16283 " is a target node!"); 16284 16285 Known.resetAll(); 16286 switch (Opc) { 16287 default: break; 16288 case RISCVISD::SELECT_CC: { 16289 Known = DAG.computeKnownBits(Op.getOperand(4), Depth + 1); 16290 // If we don't know any bits, early out. 16291 if (Known.isUnknown()) 16292 break; 16293 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(3), Depth + 1); 16294 16295 // Only known if known in both the LHS and RHS. 16296 Known = Known.intersectWith(Known2); 16297 break; 16298 } 16299 case RISCVISD::CZERO_EQZ: 16300 case RISCVISD::CZERO_NEZ: 16301 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1); 16302 // Result is either all zero or operand 0. We can propagate zeros, but not 16303 // ones. 16304 Known.One.clearAllBits(); 16305 break; 16306 case RISCVISD::REMUW: { 16307 KnownBits Known2; 16308 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 16309 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 16310 // We only care about the lower 32 bits. 16311 Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32)); 16312 // Restore the original width by sign extending. 16313 Known = Known.sext(BitWidth); 16314 break; 16315 } 16316 case RISCVISD::DIVUW: { 16317 KnownBits Known2; 16318 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 16319 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 16320 // We only care about the lower 32 bits. 16321 Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32)); 16322 // Restore the original width by sign extending. 16323 Known = Known.sext(BitWidth); 16324 break; 16325 } 16326 case RISCVISD::SLLW: { 16327 KnownBits Known2; 16328 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 16329 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 16330 Known = KnownBits::shl(Known.trunc(32), Known2.trunc(5).zext(32)); 16331 // Restore the original width by sign extending. 16332 Known = Known.sext(BitWidth); 16333 break; 16334 } 16335 case RISCVISD::CTZW: { 16336 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1); 16337 unsigned PossibleTZ = Known2.trunc(32).countMaxTrailingZeros(); 16338 unsigned LowBits = llvm::bit_width(PossibleTZ); 16339 Known.Zero.setBitsFrom(LowBits); 16340 break; 16341 } 16342 case RISCVISD::CLZW: { 16343 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1); 16344 unsigned PossibleLZ = Known2.trunc(32).countMaxLeadingZeros(); 16345 unsigned LowBits = llvm::bit_width(PossibleLZ); 16346 Known.Zero.setBitsFrom(LowBits); 16347 break; 16348 } 16349 case RISCVISD::BREV8: 16350 case RISCVISD::ORC_B: { 16351 // FIXME: This is based on the non-ratified Zbp GREV and GORC where a 16352 // control value of 7 is equivalent to brev8 and orc.b. 16353 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1); 16354 bool IsGORC = Op.getOpcode() == RISCVISD::ORC_B; 16355 // To compute zeros, we need to invert the value and invert it back after. 16356 Known.Zero = 16357 ~computeGREVOrGORC(~Known.Zero.getZExtValue(), 7, IsGORC); 16358 Known.One = computeGREVOrGORC(Known.One.getZExtValue(), 7, IsGORC); 16359 break; 16360 } 16361 case RISCVISD::READ_VLENB: { 16362 // We can use the minimum and maximum VLEN values to bound VLENB. We 16363 // know VLEN must be a power of two. 16364 const unsigned MinVLenB = Subtarget.getRealMinVLen() / 8; 16365 const unsigned MaxVLenB = Subtarget.getRealMaxVLen() / 8; 16366 assert(MinVLenB > 0 && "READ_VLENB without vector extension enabled?"); 16367 Known.Zero.setLowBits(Log2_32(MinVLenB)); 16368 Known.Zero.setBitsFrom(Log2_32(MaxVLenB)+1); 16369 if (MaxVLenB == MinVLenB) 16370 Known.One.setBit(Log2_32(MinVLenB)); 16371 break; 16372 } 16373 case RISCVISD::FCLASS: { 16374 // fclass will only set one of the low 10 bits. 16375 Known.Zero.setBitsFrom(10); 16376 break; 16377 } 16378 case ISD::INTRINSIC_W_CHAIN: 16379 case ISD::INTRINSIC_WO_CHAIN: { 16380 unsigned IntNo = 16381 Op.getConstantOperandVal(Opc == ISD::INTRINSIC_WO_CHAIN ? 0 : 1); 16382 switch (IntNo) { 16383 default: 16384 // We can't do anything for most intrinsics. 16385 break; 16386 case Intrinsic::riscv_vsetvli: 16387 case Intrinsic::riscv_vsetvlimax: { 16388 bool HasAVL = IntNo == Intrinsic::riscv_vsetvli; 16389 unsigned VSEW = Op.getConstantOperandVal(HasAVL + 1); 16390 RISCVII::VLMUL VLMUL = 16391 static_cast<RISCVII::VLMUL>(Op.getConstantOperandVal(HasAVL + 2)); 16392 unsigned SEW = RISCVVType::decodeVSEW(VSEW); 16393 auto [LMul, Fractional] = RISCVVType::decodeVLMUL(VLMUL); 16394 uint64_t MaxVL = Subtarget.getRealMaxVLen() / SEW; 16395 MaxVL = (Fractional) ? MaxVL / LMul : MaxVL * LMul; 16396 16397 // Result of vsetvli must be not larger than AVL. 16398 if (HasAVL && isa<ConstantSDNode>(Op.getOperand(1))) 16399 MaxVL = std::min(MaxVL, Op.getConstantOperandVal(1)); 16400 16401 unsigned KnownZeroFirstBit = Log2_32(MaxVL) + 1; 16402 if (BitWidth > KnownZeroFirstBit) 16403 Known.Zero.setBitsFrom(KnownZeroFirstBit); 16404 break; 16405 } 16406 } 16407 break; 16408 } 16409 } 16410} 16411 16412unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode( 16413 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, 16414 unsigned Depth) const { 16415 switch (Op.getOpcode()) { 16416 default: 16417 break; 16418 case RISCVISD::SELECT_CC: { 16419 unsigned Tmp = 16420 DAG.ComputeNumSignBits(Op.getOperand(3), DemandedElts, Depth + 1); 16421 if (Tmp == 1) return 1; // Early out. 16422 unsigned Tmp2 = 16423 DAG.ComputeNumSignBits(Op.getOperand(4), DemandedElts, Depth + 1); 16424 return std::min(Tmp, Tmp2); 16425 } 16426 case RISCVISD::CZERO_EQZ: 16427 case RISCVISD::CZERO_NEZ: 16428 // Output is either all zero or operand 0. We can propagate sign bit count 16429 // from operand 0. 16430 return DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); 16431 case RISCVISD::ABSW: { 16432 // We expand this at isel to negw+max. The result will have 33 sign bits 16433 // if the input has at least 33 sign bits. 16434 unsigned Tmp = 16435 DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); 16436 if (Tmp < 33) return 1; 16437 return 33; 16438 } 16439 case RISCVISD::SLLW: 16440 case RISCVISD::SRAW: 16441 case RISCVISD::SRLW: 16442 case RISCVISD::DIVW: 16443 case RISCVISD::DIVUW: 16444 case RISCVISD::REMUW: 16445 case RISCVISD::ROLW: 16446 case RISCVISD::RORW: 16447 case RISCVISD::FCVT_W_RV64: 16448 case RISCVISD::FCVT_WU_RV64: 16449 case RISCVISD::STRICT_FCVT_W_RV64: 16450 case RISCVISD::STRICT_FCVT_WU_RV64: 16451 // TODO: As the result is sign-extended, this is conservatively correct. A 16452 // more precise answer could be calculated for SRAW depending on known 16453 // bits in the shift amount. 16454 return 33; 16455 case RISCVISD::VMV_X_S: { 16456 // The number of sign bits of the scalar result is computed by obtaining the 16457 // element type of the input vector operand, subtracting its width from the 16458 // XLEN, and then adding one (sign bit within the element type). If the 16459 // element type is wider than XLen, the least-significant XLEN bits are 16460 // taken. 16461 unsigned XLen = Subtarget.getXLen(); 16462 unsigned EltBits = Op.getOperand(0).getScalarValueSizeInBits(); 16463 if (EltBits <= XLen) 16464 return XLen - EltBits + 1; 16465 break; 16466 } 16467 case ISD::INTRINSIC_W_CHAIN: { 16468 unsigned IntNo = Op.getConstantOperandVal(1); 16469 switch (IntNo) { 16470 default: 16471 break; 16472 case Intrinsic::riscv_masked_atomicrmw_xchg_i64: 16473 case Intrinsic::riscv_masked_atomicrmw_add_i64: 16474 case Intrinsic::riscv_masked_atomicrmw_sub_i64: 16475 case Intrinsic::riscv_masked_atomicrmw_nand_i64: 16476 case Intrinsic::riscv_masked_atomicrmw_max_i64: 16477 case Intrinsic::riscv_masked_atomicrmw_min_i64: 16478 case Intrinsic::riscv_masked_atomicrmw_umax_i64: 16479 case Intrinsic::riscv_masked_atomicrmw_umin_i64: 16480 case Intrinsic::riscv_masked_cmpxchg_i64: 16481 // riscv_masked_{atomicrmw_*,cmpxchg} intrinsics represent an emulated 16482 // narrow atomic operation. These are implemented using atomic 16483 // operations at the minimum supported atomicrmw/cmpxchg width whose 16484 // result is then sign extended to XLEN. With +A, the minimum width is 16485 // 32 for both 64 and 32. 16486 assert(Subtarget.getXLen() == 64); 16487 assert(getMinCmpXchgSizeInBits() == 32); 16488 assert(Subtarget.hasStdExtA()); 16489 return 33; 16490 } 16491 break; 16492 } 16493 } 16494 16495 return 1; 16496} 16497 16498const Constant * 16499RISCVTargetLowering::getTargetConstantFromLoad(LoadSDNode *Ld) const { 16500 assert(Ld && "Unexpected null LoadSDNode"); 16501 if (!ISD::isNormalLoad(Ld)) 16502 return nullptr; 16503 16504 SDValue Ptr = Ld->getBasePtr(); 16505 16506 // Only constant pools with no offset are supported. 16507 auto GetSupportedConstantPool = [](SDValue Ptr) -> ConstantPoolSDNode * { 16508 auto *CNode = dyn_cast<ConstantPoolSDNode>(Ptr); 16509 if (!CNode || CNode->isMachineConstantPoolEntry() || 16510 CNode->getOffset() != 0) 16511 return nullptr; 16512 16513 return CNode; 16514 }; 16515 16516 // Simple case, LLA. 16517 if (Ptr.getOpcode() == RISCVISD::LLA) { 16518 auto *CNode = GetSupportedConstantPool(Ptr); 16519 if (!CNode || CNode->getTargetFlags() != 0) 16520 return nullptr; 16521 16522 return CNode->getConstVal(); 16523 } 16524 16525 // Look for a HI and ADD_LO pair. 16526 if (Ptr.getOpcode() != RISCVISD::ADD_LO || 16527 Ptr.getOperand(0).getOpcode() != RISCVISD::HI) 16528 return nullptr; 16529 16530 auto *CNodeLo = GetSupportedConstantPool(Ptr.getOperand(1)); 16531 auto *CNodeHi = GetSupportedConstantPool(Ptr.getOperand(0).getOperand(0)); 16532 16533 if (!CNodeLo || CNodeLo->getTargetFlags() != RISCVII::MO_LO || 16534 !CNodeHi || CNodeHi->getTargetFlags() != RISCVII::MO_HI) 16535 return nullptr; 16536 16537 if (CNodeLo->getConstVal() != CNodeHi->getConstVal()) 16538 return nullptr; 16539 16540 return CNodeLo->getConstVal(); 16541} 16542 16543static MachineBasicBlock *emitReadCycleWidePseudo(MachineInstr &MI, 16544 MachineBasicBlock *BB) { 16545 assert(MI.getOpcode() == RISCV::ReadCycleWide && "Unexpected instruction"); 16546 16547 // To read the 64-bit cycle CSR on a 32-bit target, we read the two halves. 16548 // Should the count have wrapped while it was being read, we need to try 16549 // again. 16550 // ... 16551 // read: 16552 // rdcycleh x3 # load high word of cycle 16553 // rdcycle x2 # load low word of cycle 16554 // rdcycleh x4 # load high word of cycle 16555 // bne x3, x4, read # check if high word reads match, otherwise try again 16556 // ... 16557 16558 MachineFunction &MF = *BB->getParent(); 16559 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 16560 MachineFunction::iterator It = ++BB->getIterator(); 16561 16562 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB); 16563 MF.insert(It, LoopMBB); 16564 16565 MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVM_BB); 16566 MF.insert(It, DoneMBB); 16567 16568 // Transfer the remainder of BB and its successor edges to DoneMBB. 16569 DoneMBB->splice(DoneMBB->begin(), BB, 16570 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 16571 DoneMBB->transferSuccessorsAndUpdatePHIs(BB); 16572 16573 BB->addSuccessor(LoopMBB); 16574 16575 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 16576 Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 16577 Register LoReg = MI.getOperand(0).getReg(); 16578 Register HiReg = MI.getOperand(1).getReg(); 16579 DebugLoc DL = MI.getDebugLoc(); 16580 16581 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); 16582 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg) 16583 .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding) 16584 .addReg(RISCV::X0); 16585 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg) 16586 .addImm(RISCVSysReg::lookupSysRegByName("CYCLE")->Encoding) 16587 .addReg(RISCV::X0); 16588 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg) 16589 .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding) 16590 .addReg(RISCV::X0); 16591 16592 BuildMI(LoopMBB, DL, TII->get(RISCV::BNE)) 16593 .addReg(HiReg) 16594 .addReg(ReadAgainReg) 16595 .addMBB(LoopMBB); 16596 16597 LoopMBB->addSuccessor(LoopMBB); 16598 LoopMBB->addSuccessor(DoneMBB); 16599 16600 MI.eraseFromParent(); 16601 16602 return DoneMBB; 16603} 16604 16605static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI, 16606 MachineBasicBlock *BB, 16607 const RISCVSubtarget &Subtarget) { 16608 assert((MI.getOpcode() == RISCV::SplitF64Pseudo || 16609 MI.getOpcode() == RISCV::SplitF64Pseudo_INX) && 16610 "Unexpected instruction"); 16611 16612 MachineFunction &MF = *BB->getParent(); 16613 DebugLoc DL = MI.getDebugLoc(); 16614 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 16615 const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); 16616 Register LoReg = MI.getOperand(0).getReg(); 16617 Register HiReg = MI.getOperand(1).getReg(); 16618 Register SrcReg = MI.getOperand(2).getReg(); 16619 16620 const TargetRegisterClass *SrcRC = MI.getOpcode() == RISCV::SplitF64Pseudo_INX 16621 ? &RISCV::GPRPairRegClass 16622 : &RISCV::FPR64RegClass; 16623 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF); 16624 16625 TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC, 16626 RI, Register()); 16627 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI); 16628 MachineMemOperand *MMOLo = 16629 MF.getMachineMemOperand(MPI, MachineMemOperand::MOLoad, 4, Align(8)); 16630 MachineMemOperand *MMOHi = MF.getMachineMemOperand( 16631 MPI.getWithOffset(4), MachineMemOperand::MOLoad, 4, Align(8)); 16632 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg) 16633 .addFrameIndex(FI) 16634 .addImm(0) 16635 .addMemOperand(MMOLo); 16636 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg) 16637 .addFrameIndex(FI) 16638 .addImm(4) 16639 .addMemOperand(MMOHi); 16640 MI.eraseFromParent(); // The pseudo instruction is gone now. 16641 return BB; 16642} 16643 16644static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI, 16645 MachineBasicBlock *BB, 16646 const RISCVSubtarget &Subtarget) { 16647 assert((MI.getOpcode() == RISCV::BuildPairF64Pseudo || 16648 MI.getOpcode() == RISCV::BuildPairF64Pseudo_INX) && 16649 "Unexpected instruction"); 16650 16651 MachineFunction &MF = *BB->getParent(); 16652 DebugLoc DL = MI.getDebugLoc(); 16653 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 16654 const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); 16655 Register DstReg = MI.getOperand(0).getReg(); 16656 Register LoReg = MI.getOperand(1).getReg(); 16657 Register HiReg = MI.getOperand(2).getReg(); 16658 16659 const TargetRegisterClass *DstRC = 16660 MI.getOpcode() == RISCV::BuildPairF64Pseudo_INX ? &RISCV::GPRPairRegClass 16661 : &RISCV::FPR64RegClass; 16662 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF); 16663 16664 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI); 16665 MachineMemOperand *MMOLo = 16666 MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Align(8)); 16667 MachineMemOperand *MMOHi = MF.getMachineMemOperand( 16668 MPI.getWithOffset(4), MachineMemOperand::MOStore, 4, Align(8)); 16669 BuildMI(*BB, MI, DL, TII.get(RISCV::SW)) 16670 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill())) 16671 .addFrameIndex(FI) 16672 .addImm(0) 16673 .addMemOperand(MMOLo); 16674 BuildMI(*BB, MI, DL, TII.get(RISCV::SW)) 16675 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill())) 16676 .addFrameIndex(FI) 16677 .addImm(4) 16678 .addMemOperand(MMOHi); 16679 TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI, Register()); 16680 MI.eraseFromParent(); // The pseudo instruction is gone now. 16681 return BB; 16682} 16683 16684static bool isSelectPseudo(MachineInstr &MI) { 16685 switch (MI.getOpcode()) { 16686 default: 16687 return false; 16688 case RISCV::Select_GPR_Using_CC_GPR: 16689 case RISCV::Select_FPR16_Using_CC_GPR: 16690 case RISCV::Select_FPR16INX_Using_CC_GPR: 16691 case RISCV::Select_FPR32_Using_CC_GPR: 16692 case RISCV::Select_FPR32INX_Using_CC_GPR: 16693 case RISCV::Select_FPR64_Using_CC_GPR: 16694 case RISCV::Select_FPR64INX_Using_CC_GPR: 16695 case RISCV::Select_FPR64IN32X_Using_CC_GPR: 16696 return true; 16697 } 16698} 16699 16700static MachineBasicBlock *emitQuietFCMP(MachineInstr &MI, MachineBasicBlock *BB, 16701 unsigned RelOpcode, unsigned EqOpcode, 16702 const RISCVSubtarget &Subtarget) { 16703 DebugLoc DL = MI.getDebugLoc(); 16704 Register DstReg = MI.getOperand(0).getReg(); 16705 Register Src1Reg = MI.getOperand(1).getReg(); 16706 Register Src2Reg = MI.getOperand(2).getReg(); 16707 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); 16708 Register SavedFFlags = MRI.createVirtualRegister(&RISCV::GPRRegClass); 16709 const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo(); 16710 16711 // Save the current FFLAGS. 16712 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFlags); 16713 16714 auto MIB = BuildMI(*BB, MI, DL, TII.get(RelOpcode), DstReg) 16715 .addReg(Src1Reg) 16716 .addReg(Src2Reg); 16717 if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept)) 16718 MIB->setFlag(MachineInstr::MIFlag::NoFPExcept); 16719 16720 // Restore the FFLAGS. 16721 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS)) 16722 .addReg(SavedFFlags, RegState::Kill); 16723 16724 // Issue a dummy FEQ opcode to raise exception for signaling NaNs. 16725 auto MIB2 = BuildMI(*BB, MI, DL, TII.get(EqOpcode), RISCV::X0) 16726 .addReg(Src1Reg, getKillRegState(MI.getOperand(1).isKill())) 16727 .addReg(Src2Reg, getKillRegState(MI.getOperand(2).isKill())); 16728 if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept)) 16729 MIB2->setFlag(MachineInstr::MIFlag::NoFPExcept); 16730 16731 // Erase the pseudoinstruction. 16732 MI.eraseFromParent(); 16733 return BB; 16734} 16735 16736static MachineBasicBlock * 16737EmitLoweredCascadedSelect(MachineInstr &First, MachineInstr &Second, 16738 MachineBasicBlock *ThisMBB, 16739 const RISCVSubtarget &Subtarget) { 16740 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5) 16741 // Without this, custom-inserter would have generated: 16742 // 16743 // A 16744 // | \ 16745 // | B 16746 // | / 16747 // C 16748 // | \ 16749 // | D 16750 // | / 16751 // E 16752 // 16753 // A: X = ...; Y = ... 16754 // B: empty 16755 // C: Z = PHI [X, A], [Y, B] 16756 // D: empty 16757 // E: PHI [X, C], [Z, D] 16758 // 16759 // If we lower both Select_FPRX_ in a single step, we can instead generate: 16760 // 16761 // A 16762 // | \ 16763 // | C 16764 // | /| 16765 // |/ | 16766 // | | 16767 // | D 16768 // | / 16769 // E 16770 // 16771 // A: X = ...; Y = ... 16772 // D: empty 16773 // E: PHI [X, A], [X, C], [Y, D] 16774 16775 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo(); 16776 const DebugLoc &DL = First.getDebugLoc(); 16777 const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock(); 16778 MachineFunction *F = ThisMBB->getParent(); 16779 MachineBasicBlock *FirstMBB = F->CreateMachineBasicBlock(LLVM_BB); 16780 MachineBasicBlock *SecondMBB = F->CreateMachineBasicBlock(LLVM_BB); 16781 MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(LLVM_BB); 16782 MachineFunction::iterator It = ++ThisMBB->getIterator(); 16783 F->insert(It, FirstMBB); 16784 F->insert(It, SecondMBB); 16785 F->insert(It, SinkMBB); 16786 16787 // Transfer the remainder of ThisMBB and its successor edges to SinkMBB. 16788 SinkMBB->splice(SinkMBB->begin(), ThisMBB, 16789 std::next(MachineBasicBlock::iterator(First)), 16790 ThisMBB->end()); 16791 SinkMBB->transferSuccessorsAndUpdatePHIs(ThisMBB); 16792 16793 // Fallthrough block for ThisMBB. 16794 ThisMBB->addSuccessor(FirstMBB); 16795 // Fallthrough block for FirstMBB. 16796 FirstMBB->addSuccessor(SecondMBB); 16797 ThisMBB->addSuccessor(SinkMBB); 16798 FirstMBB->addSuccessor(SinkMBB); 16799 // This is fallthrough. 16800 SecondMBB->addSuccessor(SinkMBB); 16801 16802 auto FirstCC = static_cast<RISCVCC::CondCode>(First.getOperand(3).getImm()); 16803 Register FLHS = First.getOperand(1).getReg(); 16804 Register FRHS = First.getOperand(2).getReg(); 16805 // Insert appropriate branch. 16806 BuildMI(FirstMBB, DL, TII.getBrCond(FirstCC)) 16807 .addReg(FLHS) 16808 .addReg(FRHS) 16809 .addMBB(SinkMBB); 16810 16811 Register SLHS = Second.getOperand(1).getReg(); 16812 Register SRHS = Second.getOperand(2).getReg(); 16813 Register Op1Reg4 = First.getOperand(4).getReg(); 16814 Register Op1Reg5 = First.getOperand(5).getReg(); 16815 16816 auto SecondCC = static_cast<RISCVCC::CondCode>(Second.getOperand(3).getImm()); 16817 // Insert appropriate branch. 16818 BuildMI(ThisMBB, DL, TII.getBrCond(SecondCC)) 16819 .addReg(SLHS) 16820 .addReg(SRHS) 16821 .addMBB(SinkMBB); 16822 16823 Register DestReg = Second.getOperand(0).getReg(); 16824 Register Op2Reg4 = Second.getOperand(4).getReg(); 16825 BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII.get(RISCV::PHI), DestReg) 16826 .addReg(Op2Reg4) 16827 .addMBB(ThisMBB) 16828 .addReg(Op1Reg4) 16829 .addMBB(FirstMBB) 16830 .addReg(Op1Reg5) 16831 .addMBB(SecondMBB); 16832 16833 // Now remove the Select_FPRX_s. 16834 First.eraseFromParent(); 16835 Second.eraseFromParent(); 16836 return SinkMBB; 16837} 16838 16839static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI, 16840 MachineBasicBlock *BB, 16841 const RISCVSubtarget &Subtarget) { 16842 // To "insert" Select_* instructions, we actually have to insert the triangle 16843 // control-flow pattern. The incoming instructions know the destination vreg 16844 // to set, the condition code register to branch on, the true/false values to 16845 // select between, and the condcode to use to select the appropriate branch. 16846 // 16847 // We produce the following control flow: 16848 // HeadMBB 16849 // | \ 16850 // | IfFalseMBB 16851 // | / 16852 // TailMBB 16853 // 16854 // When we find a sequence of selects we attempt to optimize their emission 16855 // by sharing the control flow. Currently we only handle cases where we have 16856 // multiple selects with the exact same condition (same LHS, RHS and CC). 16857 // The selects may be interleaved with other instructions if the other 16858 // instructions meet some requirements we deem safe: 16859 // - They are not pseudo instructions. 16860 // - They are debug instructions. Otherwise, 16861 // - They do not have side-effects, do not access memory and their inputs do 16862 // not depend on the results of the select pseudo-instructions. 16863 // The TrueV/FalseV operands of the selects cannot depend on the result of 16864 // previous selects in the sequence. 16865 // These conditions could be further relaxed. See the X86 target for a 16866 // related approach and more information. 16867 // 16868 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5)) 16869 // is checked here and handled by a separate function - 16870 // EmitLoweredCascadedSelect. 16871 Register LHS = MI.getOperand(1).getReg(); 16872 Register RHS = MI.getOperand(2).getReg(); 16873 auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm()); 16874 16875 SmallVector<MachineInstr *, 4> SelectDebugValues; 16876 SmallSet<Register, 4> SelectDests; 16877 SelectDests.insert(MI.getOperand(0).getReg()); 16878 16879 MachineInstr *LastSelectPseudo = &MI; 16880 auto Next = next_nodbg(MI.getIterator(), BB->instr_end()); 16881 if (MI.getOpcode() != RISCV::Select_GPR_Using_CC_GPR && Next != BB->end() && 16882 Next->getOpcode() == MI.getOpcode() && 16883 Next->getOperand(5).getReg() == MI.getOperand(0).getReg() && 16884 Next->getOperand(5).isKill()) { 16885 return EmitLoweredCascadedSelect(MI, *Next, BB, Subtarget); 16886 } 16887 16888 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI); 16889 SequenceMBBI != E; ++SequenceMBBI) { 16890 if (SequenceMBBI->isDebugInstr()) 16891 continue; 16892 if (isSelectPseudo(*SequenceMBBI)) { 16893 if (SequenceMBBI->getOperand(1).getReg() != LHS || 16894 SequenceMBBI->getOperand(2).getReg() != RHS || 16895 SequenceMBBI->getOperand(3).getImm() != CC || 16896 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) || 16897 SelectDests.count(SequenceMBBI->getOperand(5).getReg())) 16898 break; 16899 LastSelectPseudo = &*SequenceMBBI; 16900 SequenceMBBI->collectDebugValues(SelectDebugValues); 16901 SelectDests.insert(SequenceMBBI->getOperand(0).getReg()); 16902 continue; 16903 } 16904 if (SequenceMBBI->hasUnmodeledSideEffects() || 16905 SequenceMBBI->mayLoadOrStore() || 16906 SequenceMBBI->usesCustomInsertionHook()) 16907 break; 16908 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) { 16909 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg()); 16910 })) 16911 break; 16912 } 16913 16914 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo(); 16915 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 16916 DebugLoc DL = MI.getDebugLoc(); 16917 MachineFunction::iterator I = ++BB->getIterator(); 16918 16919 MachineBasicBlock *HeadMBB = BB; 16920 MachineFunction *F = BB->getParent(); 16921 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB); 16922 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB); 16923 16924 F->insert(I, IfFalseMBB); 16925 F->insert(I, TailMBB); 16926 16927 // Transfer debug instructions associated with the selects to TailMBB. 16928 for (MachineInstr *DebugInstr : SelectDebugValues) { 16929 TailMBB->push_back(DebugInstr->removeFromParent()); 16930 } 16931 16932 // Move all instructions after the sequence to TailMBB. 16933 TailMBB->splice(TailMBB->end(), HeadMBB, 16934 std::next(LastSelectPseudo->getIterator()), HeadMBB->end()); 16935 // Update machine-CFG edges by transferring all successors of the current 16936 // block to the new block which will contain the Phi nodes for the selects. 16937 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB); 16938 // Set the successors for HeadMBB. 16939 HeadMBB->addSuccessor(IfFalseMBB); 16940 HeadMBB->addSuccessor(TailMBB); 16941 16942 // Insert appropriate branch. 16943 BuildMI(HeadMBB, DL, TII.getBrCond(CC)) 16944 .addReg(LHS) 16945 .addReg(RHS) 16946 .addMBB(TailMBB); 16947 16948 // IfFalseMBB just falls through to TailMBB. 16949 IfFalseMBB->addSuccessor(TailMBB); 16950 16951 // Create PHIs for all of the select pseudo-instructions. 16952 auto SelectMBBI = MI.getIterator(); 16953 auto SelectEnd = std::next(LastSelectPseudo->getIterator()); 16954 auto InsertionPoint = TailMBB->begin(); 16955 while (SelectMBBI != SelectEnd) { 16956 auto Next = std::next(SelectMBBI); 16957 if (isSelectPseudo(*SelectMBBI)) { 16958 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ] 16959 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(), 16960 TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg()) 16961 .addReg(SelectMBBI->getOperand(4).getReg()) 16962 .addMBB(HeadMBB) 16963 .addReg(SelectMBBI->getOperand(5).getReg()) 16964 .addMBB(IfFalseMBB); 16965 SelectMBBI->eraseFromParent(); 16966 } 16967 SelectMBBI = Next; 16968 } 16969 16970 F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs); 16971 return TailMBB; 16972} 16973 16974static MachineBasicBlock *emitVFROUND_NOEXCEPT_MASK(MachineInstr &MI, 16975 MachineBasicBlock *BB, 16976 unsigned CVTXOpc, 16977 unsigned CVTFOpc) { 16978 DebugLoc DL = MI.getDebugLoc(); 16979 16980 const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo(); 16981 16982 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); 16983 Register SavedFFLAGS = MRI.createVirtualRegister(&RISCV::GPRRegClass); 16984 16985 // Save the old value of FFLAGS. 16986 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFLAGS); 16987 16988 assert(MI.getNumOperands() == 7); 16989 16990 // Emit a VFCVT_X_F 16991 const TargetRegisterInfo *TRI = 16992 BB->getParent()->getSubtarget().getRegisterInfo(); 16993 const TargetRegisterClass *RC = MI.getRegClassConstraint(0, &TII, TRI); 16994 Register Tmp = MRI.createVirtualRegister(RC); 16995 BuildMI(*BB, MI, DL, TII.get(CVTXOpc), Tmp) 16996 .add(MI.getOperand(1)) 16997 .add(MI.getOperand(2)) 16998 .add(MI.getOperand(3)) 16999 .add(MachineOperand::CreateImm(7)) // frm = DYN 17000 .add(MI.getOperand(4)) 17001 .add(MI.getOperand(5)) 17002 .add(MI.getOperand(6)) 17003 .add(MachineOperand::CreateReg(RISCV::FRM, 17004 /*IsDef*/ false, 17005 /*IsImp*/ true)); 17006 17007 // Emit a VFCVT_F_X 17008 BuildMI(*BB, MI, DL, TII.get(CVTFOpc)) 17009 .add(MI.getOperand(0)) 17010 .add(MI.getOperand(1)) 17011 .addReg(Tmp) 17012 .add(MI.getOperand(3)) 17013 .add(MachineOperand::CreateImm(7)) // frm = DYN 17014 .add(MI.getOperand(4)) 17015 .add(MI.getOperand(5)) 17016 .add(MI.getOperand(6)) 17017 .add(MachineOperand::CreateReg(RISCV::FRM, 17018 /*IsDef*/ false, 17019 /*IsImp*/ true)); 17020 17021 // Restore FFLAGS. 17022 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS)) 17023 .addReg(SavedFFLAGS, RegState::Kill); 17024 17025 // Erase the pseudoinstruction. 17026 MI.eraseFromParent(); 17027 return BB; 17028} 17029 17030static MachineBasicBlock *emitFROUND(MachineInstr &MI, MachineBasicBlock *MBB, 17031 const RISCVSubtarget &Subtarget) { 17032 unsigned CmpOpc, F2IOpc, I2FOpc, FSGNJOpc, FSGNJXOpc; 17033 const TargetRegisterClass *RC; 17034 switch (MI.getOpcode()) { 17035 default: 17036 llvm_unreachable("Unexpected opcode"); 17037 case RISCV::PseudoFROUND_H: 17038 CmpOpc = RISCV::FLT_H; 17039 F2IOpc = RISCV::FCVT_W_H; 17040 I2FOpc = RISCV::FCVT_H_W; 17041 FSGNJOpc = RISCV::FSGNJ_H; 17042 FSGNJXOpc = RISCV::FSGNJX_H; 17043 RC = &RISCV::FPR16RegClass; 17044 break; 17045 case RISCV::PseudoFROUND_H_INX: 17046 CmpOpc = RISCV::FLT_H_INX; 17047 F2IOpc = RISCV::FCVT_W_H_INX; 17048 I2FOpc = RISCV::FCVT_H_W_INX; 17049 FSGNJOpc = RISCV::FSGNJ_H_INX; 17050 FSGNJXOpc = RISCV::FSGNJX_H_INX; 17051 RC = &RISCV::GPRF16RegClass; 17052 break; 17053 case RISCV::PseudoFROUND_S: 17054 CmpOpc = RISCV::FLT_S; 17055 F2IOpc = RISCV::FCVT_W_S; 17056 I2FOpc = RISCV::FCVT_S_W; 17057 FSGNJOpc = RISCV::FSGNJ_S; 17058 FSGNJXOpc = RISCV::FSGNJX_S; 17059 RC = &RISCV::FPR32RegClass; 17060 break; 17061 case RISCV::PseudoFROUND_S_INX: 17062 CmpOpc = RISCV::FLT_S_INX; 17063 F2IOpc = RISCV::FCVT_W_S_INX; 17064 I2FOpc = RISCV::FCVT_S_W_INX; 17065 FSGNJOpc = RISCV::FSGNJ_S_INX; 17066 FSGNJXOpc = RISCV::FSGNJX_S_INX; 17067 RC = &RISCV::GPRF32RegClass; 17068 break; 17069 case RISCV::PseudoFROUND_D: 17070 assert(Subtarget.is64Bit() && "Expected 64-bit GPR."); 17071 CmpOpc = RISCV::FLT_D; 17072 F2IOpc = RISCV::FCVT_L_D; 17073 I2FOpc = RISCV::FCVT_D_L; 17074 FSGNJOpc = RISCV::FSGNJ_D; 17075 FSGNJXOpc = RISCV::FSGNJX_D; 17076 RC = &RISCV::FPR64RegClass; 17077 break; 17078 case RISCV::PseudoFROUND_D_INX: 17079 assert(Subtarget.is64Bit() && "Expected 64-bit GPR."); 17080 CmpOpc = RISCV::FLT_D_INX; 17081 F2IOpc = RISCV::FCVT_L_D_INX; 17082 I2FOpc = RISCV::FCVT_D_L_INX; 17083 FSGNJOpc = RISCV::FSGNJ_D_INX; 17084 FSGNJXOpc = RISCV::FSGNJX_D_INX; 17085 RC = &RISCV::GPRRegClass; 17086 break; 17087 } 17088 17089 const BasicBlock *BB = MBB->getBasicBlock(); 17090 DebugLoc DL = MI.getDebugLoc(); 17091 MachineFunction::iterator I = ++MBB->getIterator(); 17092 17093 MachineFunction *F = MBB->getParent(); 17094 MachineBasicBlock *CvtMBB = F->CreateMachineBasicBlock(BB); 17095 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(BB); 17096 17097 F->insert(I, CvtMBB); 17098 F->insert(I, DoneMBB); 17099 // Move all instructions after the sequence to DoneMBB. 17100 DoneMBB->splice(DoneMBB->end(), MBB, MachineBasicBlock::iterator(MI), 17101 MBB->end()); 17102 // Update machine-CFG edges by transferring all successors of the current 17103 // block to the new block which will contain the Phi nodes for the selects. 17104 DoneMBB->transferSuccessorsAndUpdatePHIs(MBB); 17105 // Set the successors for MBB. 17106 MBB->addSuccessor(CvtMBB); 17107 MBB->addSuccessor(DoneMBB); 17108 17109 Register DstReg = MI.getOperand(0).getReg(); 17110 Register SrcReg = MI.getOperand(1).getReg(); 17111 Register MaxReg = MI.getOperand(2).getReg(); 17112 int64_t FRM = MI.getOperand(3).getImm(); 17113 17114 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo(); 17115 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); 17116 17117 Register FabsReg = MRI.createVirtualRegister(RC); 17118 BuildMI(MBB, DL, TII.get(FSGNJXOpc), FabsReg).addReg(SrcReg).addReg(SrcReg); 17119 17120 // Compare the FP value to the max value. 17121 Register CmpReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); 17122 auto MIB = 17123 BuildMI(MBB, DL, TII.get(CmpOpc), CmpReg).addReg(FabsReg).addReg(MaxReg); 17124 if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept)) 17125 MIB->setFlag(MachineInstr::MIFlag::NoFPExcept); 17126 17127 // Insert branch. 17128 BuildMI(MBB, DL, TII.get(RISCV::BEQ)) 17129 .addReg(CmpReg) 17130 .addReg(RISCV::X0) 17131 .addMBB(DoneMBB); 17132 17133 CvtMBB->addSuccessor(DoneMBB); 17134 17135 // Convert to integer. 17136 Register F2IReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); 17137 MIB = BuildMI(CvtMBB, DL, TII.get(F2IOpc), F2IReg).addReg(SrcReg).addImm(FRM); 17138 if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept)) 17139 MIB->setFlag(MachineInstr::MIFlag::NoFPExcept); 17140 17141 // Convert back to FP. 17142 Register I2FReg = MRI.createVirtualRegister(RC); 17143 MIB = BuildMI(CvtMBB, DL, TII.get(I2FOpc), I2FReg).addReg(F2IReg).addImm(FRM); 17144 if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept)) 17145 MIB->setFlag(MachineInstr::MIFlag::NoFPExcept); 17146 17147 // Restore the sign bit. 17148 Register CvtReg = MRI.createVirtualRegister(RC); 17149 BuildMI(CvtMBB, DL, TII.get(FSGNJOpc), CvtReg).addReg(I2FReg).addReg(SrcReg); 17150 17151 // Merge the results. 17152 BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(RISCV::PHI), DstReg) 17153 .addReg(SrcReg) 17154 .addMBB(MBB) 17155 .addReg(CvtReg) 17156 .addMBB(CvtMBB); 17157 17158 MI.eraseFromParent(); 17159 return DoneMBB; 17160} 17161 17162MachineBasicBlock * 17163RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, 17164 MachineBasicBlock *BB) const { 17165 switch (MI.getOpcode()) { 17166 default: 17167 llvm_unreachable("Unexpected instr type to insert"); 17168 case RISCV::ReadCycleWide: 17169 assert(!Subtarget.is64Bit() && 17170 "ReadCycleWrite is only to be used on riscv32"); 17171 return emitReadCycleWidePseudo(MI, BB); 17172 case RISCV::Select_GPR_Using_CC_GPR: 17173 case RISCV::Select_FPR16_Using_CC_GPR: 17174 case RISCV::Select_FPR16INX_Using_CC_GPR: 17175 case RISCV::Select_FPR32_Using_CC_GPR: 17176 case RISCV::Select_FPR32INX_Using_CC_GPR: 17177 case RISCV::Select_FPR64_Using_CC_GPR: 17178 case RISCV::Select_FPR64INX_Using_CC_GPR: 17179 case RISCV::Select_FPR64IN32X_Using_CC_GPR: 17180 return emitSelectPseudo(MI, BB, Subtarget); 17181 case RISCV::BuildPairF64Pseudo: 17182 case RISCV::BuildPairF64Pseudo_INX: 17183 return emitBuildPairF64Pseudo(MI, BB, Subtarget); 17184 case RISCV::SplitF64Pseudo: 17185 case RISCV::SplitF64Pseudo_INX: 17186 return emitSplitF64Pseudo(MI, BB, Subtarget); 17187 case RISCV::PseudoQuietFLE_H: 17188 return emitQuietFCMP(MI, BB, RISCV::FLE_H, RISCV::FEQ_H, Subtarget); 17189 case RISCV::PseudoQuietFLE_H_INX: 17190 return emitQuietFCMP(MI, BB, RISCV::FLE_H_INX, RISCV::FEQ_H_INX, Subtarget); 17191 case RISCV::PseudoQuietFLT_H: 17192 return emitQuietFCMP(MI, BB, RISCV::FLT_H, RISCV::FEQ_H, Subtarget); 17193 case RISCV::PseudoQuietFLT_H_INX: 17194 return emitQuietFCMP(MI, BB, RISCV::FLT_H_INX, RISCV::FEQ_H_INX, Subtarget); 17195 case RISCV::PseudoQuietFLE_S: 17196 return emitQuietFCMP(MI, BB, RISCV::FLE_S, RISCV::FEQ_S, Subtarget); 17197 case RISCV::PseudoQuietFLE_S_INX: 17198 return emitQuietFCMP(MI, BB, RISCV::FLE_S_INX, RISCV::FEQ_S_INX, Subtarget); 17199 case RISCV::PseudoQuietFLT_S: 17200 return emitQuietFCMP(MI, BB, RISCV::FLT_S, RISCV::FEQ_S, Subtarget); 17201 case RISCV::PseudoQuietFLT_S_INX: 17202 return emitQuietFCMP(MI, BB, RISCV::FLT_S_INX, RISCV::FEQ_S_INX, Subtarget); 17203 case RISCV::PseudoQuietFLE_D: 17204 return emitQuietFCMP(MI, BB, RISCV::FLE_D, RISCV::FEQ_D, Subtarget); 17205 case RISCV::PseudoQuietFLE_D_INX: 17206 return emitQuietFCMP(MI, BB, RISCV::FLE_D_INX, RISCV::FEQ_D_INX, Subtarget); 17207 case RISCV::PseudoQuietFLE_D_IN32X: 17208 return emitQuietFCMP(MI, BB, RISCV::FLE_D_IN32X, RISCV::FEQ_D_IN32X, 17209 Subtarget); 17210 case RISCV::PseudoQuietFLT_D: 17211 return emitQuietFCMP(MI, BB, RISCV::FLT_D, RISCV::FEQ_D, Subtarget); 17212 case RISCV::PseudoQuietFLT_D_INX: 17213 return emitQuietFCMP(MI, BB, RISCV::FLT_D_INX, RISCV::FEQ_D_INX, Subtarget); 17214 case RISCV::PseudoQuietFLT_D_IN32X: 17215 return emitQuietFCMP(MI, BB, RISCV::FLT_D_IN32X, RISCV::FEQ_D_IN32X, 17216 Subtarget); 17217 17218 case RISCV::PseudoVFROUND_NOEXCEPT_V_M1_MASK: 17219 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M1_MASK, 17220 RISCV::PseudoVFCVT_F_X_V_M1_MASK); 17221 case RISCV::PseudoVFROUND_NOEXCEPT_V_M2_MASK: 17222 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M2_MASK, 17223 RISCV::PseudoVFCVT_F_X_V_M2_MASK); 17224 case RISCV::PseudoVFROUND_NOEXCEPT_V_M4_MASK: 17225 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M4_MASK, 17226 RISCV::PseudoVFCVT_F_X_V_M4_MASK); 17227 case RISCV::PseudoVFROUND_NOEXCEPT_V_M8_MASK: 17228 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M8_MASK, 17229 RISCV::PseudoVFCVT_F_X_V_M8_MASK); 17230 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF2_MASK: 17231 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF2_MASK, 17232 RISCV::PseudoVFCVT_F_X_V_MF2_MASK); 17233 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF4_MASK: 17234 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF4_MASK, 17235 RISCV::PseudoVFCVT_F_X_V_MF4_MASK); 17236 case RISCV::PseudoFROUND_H: 17237 case RISCV::PseudoFROUND_H_INX: 17238 case RISCV::PseudoFROUND_S: 17239 case RISCV::PseudoFROUND_S_INX: 17240 case RISCV::PseudoFROUND_D: 17241 case RISCV::PseudoFROUND_D_INX: 17242 case RISCV::PseudoFROUND_D_IN32X: 17243 return emitFROUND(MI, BB, Subtarget); 17244 case TargetOpcode::STATEPOINT: 17245 case TargetOpcode::STACKMAP: 17246 case TargetOpcode::PATCHPOINT: 17247 if (!Subtarget.is64Bit()) 17248 report_fatal_error("STACKMAP, PATCHPOINT and STATEPOINT are only " 17249 "supported on 64-bit targets"); 17250 return emitPatchPoint(MI, BB); 17251 } 17252} 17253 17254void RISCVTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI, 17255 SDNode *Node) const { 17256 // Add FRM dependency to any instructions with dynamic rounding mode. 17257 int Idx = RISCV::getNamedOperandIdx(MI.getOpcode(), RISCV::OpName::frm); 17258 if (Idx < 0) { 17259 // Vector pseudos have FRM index indicated by TSFlags. 17260 Idx = RISCVII::getFRMOpNum(MI.getDesc()); 17261 if (Idx < 0) 17262 return; 17263 } 17264 if (MI.getOperand(Idx).getImm() != RISCVFPRndMode::DYN) 17265 return; 17266 // If the instruction already reads FRM, don't add another read. 17267 if (MI.readsRegister(RISCV::FRM)) 17268 return; 17269 MI.addOperand( 17270 MachineOperand::CreateReg(RISCV::FRM, /*isDef*/ false, /*isImp*/ true)); 17271} 17272 17273// Calling Convention Implementation. 17274// The expectations for frontend ABI lowering vary from target to target. 17275// Ideally, an LLVM frontend would be able to avoid worrying about many ABI 17276// details, but this is a longer term goal. For now, we simply try to keep the 17277// role of the frontend as simple and well-defined as possible. The rules can 17278// be summarised as: 17279// * Never split up large scalar arguments. We handle them here. 17280// * If a hardfloat calling convention is being used, and the struct may be 17281// passed in a pair of registers (fp+fp, int+fp), and both registers are 17282// available, then pass as two separate arguments. If either the GPRs or FPRs 17283// are exhausted, then pass according to the rule below. 17284// * If a struct could never be passed in registers or directly in a stack 17285// slot (as it is larger than 2*XLEN and the floating point rules don't 17286// apply), then pass it using a pointer with the byval attribute. 17287// * If a struct is less than 2*XLEN, then coerce to either a two-element 17288// word-sized array or a 2*XLEN scalar (depending on alignment). 17289// * The frontend can determine whether a struct is returned by reference or 17290// not based on its size and fields. If it will be returned by reference, the 17291// frontend must modify the prototype so a pointer with the sret annotation is 17292// passed as the first argument. This is not necessary for large scalar 17293// returns. 17294// * Struct return values and varargs should be coerced to structs containing 17295// register-size fields in the same situations they would be for fixed 17296// arguments. 17297 17298static const MCPhysReg ArgFPR16s[] = { 17299 RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, 17300 RISCV::F14_H, RISCV::F15_H, RISCV::F16_H, RISCV::F17_H 17301}; 17302static const MCPhysReg ArgFPR32s[] = { 17303 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, 17304 RISCV::F14_F, RISCV::F15_F, RISCV::F16_F, RISCV::F17_F 17305}; 17306static const MCPhysReg ArgFPR64s[] = { 17307 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, 17308 RISCV::F14_D, RISCV::F15_D, RISCV::F16_D, RISCV::F17_D 17309}; 17310// This is an interim calling convention and it may be changed in the future. 17311static const MCPhysReg ArgVRs[] = { 17312 RISCV::V8, RISCV::V9, RISCV::V10, RISCV::V11, RISCV::V12, RISCV::V13, 17313 RISCV::V14, RISCV::V15, RISCV::V16, RISCV::V17, RISCV::V18, RISCV::V19, 17314 RISCV::V20, RISCV::V21, RISCV::V22, RISCV::V23}; 17315static const MCPhysReg ArgVRM2s[] = {RISCV::V8M2, RISCV::V10M2, RISCV::V12M2, 17316 RISCV::V14M2, RISCV::V16M2, RISCV::V18M2, 17317 RISCV::V20M2, RISCV::V22M2}; 17318static const MCPhysReg ArgVRM4s[] = {RISCV::V8M4, RISCV::V12M4, RISCV::V16M4, 17319 RISCV::V20M4}; 17320static const MCPhysReg ArgVRM8s[] = {RISCV::V8M8, RISCV::V16M8}; 17321 17322ArrayRef<MCPhysReg> RISCV::getArgGPRs(const RISCVABI::ABI ABI) { 17323 // The GPRs used for passing arguments in the ILP32* and LP64* ABIs, except 17324 // the ILP32E ABI. 17325 static const MCPhysReg ArgIGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12, 17326 RISCV::X13, RISCV::X14, RISCV::X15, 17327 RISCV::X16, RISCV::X17}; 17328 // The GPRs used for passing arguments in the ILP32E/ILP64E ABI. 17329 static const MCPhysReg ArgEGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12, 17330 RISCV::X13, RISCV::X14, RISCV::X15}; 17331 17332 if (ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E) 17333 return ArrayRef(ArgEGPRs); 17334 17335 return ArrayRef(ArgIGPRs); 17336} 17337 17338static ArrayRef<MCPhysReg> getFastCCArgGPRs(const RISCVABI::ABI ABI) { 17339 // The GPRs used for passing arguments in the FastCC, X5 and X6 might be used 17340 // for save-restore libcall, so we don't use them. 17341 static const MCPhysReg FastCCIGPRs[] = { 17342 RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14, 17343 RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X7, RISCV::X28, 17344 RISCV::X29, RISCV::X30, RISCV::X31}; 17345 17346 // The GPRs used for passing arguments in the FastCC when using ILP32E/ILP64E. 17347 static const MCPhysReg FastCCEGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12, 17348 RISCV::X13, RISCV::X14, RISCV::X15, 17349 RISCV::X7}; 17350 17351 if (ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E) 17352 return ArrayRef(FastCCEGPRs); 17353 17354 return ArrayRef(FastCCIGPRs); 17355} 17356 17357// Pass a 2*XLEN argument that has been split into two XLEN values through 17358// registers or the stack as necessary. 17359static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1, 17360 ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, 17361 MVT ValVT2, MVT LocVT2, 17362 ISD::ArgFlagsTy ArgFlags2, bool EABI) { 17363 unsigned XLenInBytes = XLen / 8; 17364 const RISCVSubtarget &STI = 17365 State.getMachineFunction().getSubtarget<RISCVSubtarget>(); 17366 ArrayRef<MCPhysReg> ArgGPRs = RISCV::getArgGPRs(STI.getTargetABI()); 17367 17368 if (Register Reg = State.AllocateReg(ArgGPRs)) { 17369 // At least one half can be passed via register. 17370 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg, 17371 VA1.getLocVT(), CCValAssign::Full)); 17372 } else { 17373 // Both halves must be passed on the stack, with proper alignment. 17374 // TODO: To be compatible with GCC's behaviors, we force them to have 4-byte 17375 // alignment. This behavior may be changed when RV32E/ILP32E is ratified. 17376 Align StackAlign(XLenInBytes); 17377 if (!EABI || XLen != 32) 17378 StackAlign = std::max(StackAlign, ArgFlags1.getNonZeroOrigAlign()); 17379 State.addLoc( 17380 CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(), 17381 State.AllocateStack(XLenInBytes, StackAlign), 17382 VA1.getLocVT(), CCValAssign::Full)); 17383 State.addLoc(CCValAssign::getMem( 17384 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)), 17385 LocVT2, CCValAssign::Full)); 17386 return false; 17387 } 17388 17389 if (Register Reg = State.AllocateReg(ArgGPRs)) { 17390 // The second half can also be passed via register. 17391 State.addLoc( 17392 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full)); 17393 } else { 17394 // The second half is passed via the stack, without additional alignment. 17395 State.addLoc(CCValAssign::getMem( 17396 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)), 17397 LocVT2, CCValAssign::Full)); 17398 } 17399 17400 return false; 17401} 17402 17403static unsigned allocateRVVReg(MVT ValVT, unsigned ValNo, 17404 std::optional<unsigned> FirstMaskArgument, 17405 CCState &State, const RISCVTargetLowering &TLI) { 17406 const TargetRegisterClass *RC = TLI.getRegClassFor(ValVT); 17407 if (RC == &RISCV::VRRegClass) { 17408 // Assign the first mask argument to V0. 17409 // This is an interim calling convention and it may be changed in the 17410 // future. 17411 if (FirstMaskArgument && ValNo == *FirstMaskArgument) 17412 return State.AllocateReg(RISCV::V0); 17413 return State.AllocateReg(ArgVRs); 17414 } 17415 if (RC == &RISCV::VRM2RegClass) 17416 return State.AllocateReg(ArgVRM2s); 17417 if (RC == &RISCV::VRM4RegClass) 17418 return State.AllocateReg(ArgVRM4s); 17419 if (RC == &RISCV::VRM8RegClass) 17420 return State.AllocateReg(ArgVRM8s); 17421 llvm_unreachable("Unhandled register class for ValueType"); 17422} 17423 17424// Implements the RISC-V calling convention. Returns true upon failure. 17425bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, 17426 MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, 17427 ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, 17428 bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI, 17429 std::optional<unsigned> FirstMaskArgument) { 17430 unsigned XLen = DL.getLargestLegalIntTypeSizeInBits(); 17431 assert(XLen == 32 || XLen == 64); 17432 MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64; 17433 17434 // Static chain parameter must not be passed in normal argument registers, 17435 // so we assign t2 for it as done in GCC's __builtin_call_with_static_chain 17436 if (ArgFlags.isNest()) { 17437 if (unsigned Reg = State.AllocateReg(RISCV::X7)) { 17438 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 17439 return false; 17440 } 17441 } 17442 17443 // Any return value split in to more than two values can't be returned 17444 // directly. Vectors are returned via the available vector registers. 17445 if (!LocVT.isVector() && IsRet && ValNo > 1) 17446 return true; 17447 17448 // UseGPRForF16_F32 if targeting one of the soft-float ABIs, if passing a 17449 // variadic argument, or if no F16/F32 argument registers are available. 17450 bool UseGPRForF16_F32 = true; 17451 // UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a 17452 // variadic argument, or if no F64 argument registers are available. 17453 bool UseGPRForF64 = true; 17454 17455 switch (ABI) { 17456 default: 17457 llvm_unreachable("Unexpected ABI"); 17458 case RISCVABI::ABI_ILP32: 17459 case RISCVABI::ABI_ILP32E: 17460 case RISCVABI::ABI_LP64: 17461 case RISCVABI::ABI_LP64E: 17462 break; 17463 case RISCVABI::ABI_ILP32F: 17464 case RISCVABI::ABI_LP64F: 17465 UseGPRForF16_F32 = !IsFixed; 17466 break; 17467 case RISCVABI::ABI_ILP32D: 17468 case RISCVABI::ABI_LP64D: 17469 UseGPRForF16_F32 = !IsFixed; 17470 UseGPRForF64 = !IsFixed; 17471 break; 17472 } 17473 17474 // FPR16, FPR32, and FPR64 alias each other. 17475 if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s)) { 17476 UseGPRForF16_F32 = true; 17477 UseGPRForF64 = true; 17478 } 17479 17480 // From this point on, rely on UseGPRForF16_F32, UseGPRForF64 and 17481 // similar local variables rather than directly checking against the target 17482 // ABI. 17483 17484 if (UseGPRForF16_F32 && 17485 (ValVT == MVT::f16 || ValVT == MVT::bf16 || ValVT == MVT::f32)) { 17486 LocVT = XLenVT; 17487 LocInfo = CCValAssign::BCvt; 17488 } else if (UseGPRForF64 && XLen == 64 && ValVT == MVT::f64) { 17489 LocVT = MVT::i64; 17490 LocInfo = CCValAssign::BCvt; 17491 } 17492 17493 ArrayRef<MCPhysReg> ArgGPRs = RISCV::getArgGPRs(ABI); 17494 17495 // If this is a variadic argument, the RISC-V calling convention requires 17496 // that it is assigned an 'even' or 'aligned' register if it has 8-byte 17497 // alignment (RV32) or 16-byte alignment (RV64). An aligned register should 17498 // be used regardless of whether the original argument was split during 17499 // legalisation or not. The argument will not be passed by registers if the 17500 // original type is larger than 2*XLEN, so the register alignment rule does 17501 // not apply. 17502 // TODO: To be compatible with GCC's behaviors, we don't align registers 17503 // currently if we are using ILP32E calling convention. This behavior may be 17504 // changed when RV32E/ILP32E is ratified. 17505 unsigned TwoXLenInBytes = (2 * XLen) / 8; 17506 if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes && 17507 DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes && 17508 ABI != RISCVABI::ABI_ILP32E) { 17509 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs); 17510 // Skip 'odd' register if necessary. 17511 if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1) 17512 State.AllocateReg(ArgGPRs); 17513 } 17514 17515 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs(); 17516 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags = 17517 State.getPendingArgFlags(); 17518 17519 assert(PendingLocs.size() == PendingArgFlags.size() && 17520 "PendingLocs and PendingArgFlags out of sync"); 17521 17522 // Handle passing f64 on RV32D with a soft float ABI or when floating point 17523 // registers are exhausted. 17524 if (UseGPRForF64 && XLen == 32 && ValVT == MVT::f64) { 17525 assert(PendingLocs.empty() && "Can't lower f64 if it is split"); 17526 // Depending on available argument GPRS, f64 may be passed in a pair of 17527 // GPRs, split between a GPR and the stack, or passed completely on the 17528 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these 17529 // cases. 17530 Register Reg = State.AllocateReg(ArgGPRs); 17531 if (!Reg) { 17532 unsigned StackOffset = State.AllocateStack(8, Align(8)); 17533 State.addLoc( 17534 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); 17535 return false; 17536 } 17537 LocVT = MVT::i32; 17538 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 17539 Register HiReg = State.AllocateReg(ArgGPRs); 17540 if (HiReg) { 17541 State.addLoc( 17542 CCValAssign::getCustomReg(ValNo, ValVT, HiReg, LocVT, LocInfo)); 17543 } else { 17544 unsigned StackOffset = State.AllocateStack(4, Align(4)); 17545 State.addLoc( 17546 CCValAssign::getCustomMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); 17547 } 17548 return false; 17549 } 17550 17551 // Fixed-length vectors are located in the corresponding scalable-vector 17552 // container types. 17553 if (ValVT.isFixedLengthVector()) 17554 LocVT = TLI.getContainerForFixedLengthVector(LocVT); 17555 17556 // Split arguments might be passed indirectly, so keep track of the pending 17557 // values. Split vectors are passed via a mix of registers and indirectly, so 17558 // treat them as we would any other argument. 17559 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) { 17560 LocVT = XLenVT; 17561 LocInfo = CCValAssign::Indirect; 17562 PendingLocs.push_back( 17563 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo)); 17564 PendingArgFlags.push_back(ArgFlags); 17565 if (!ArgFlags.isSplitEnd()) { 17566 return false; 17567 } 17568 } 17569 17570 // If the split argument only had two elements, it should be passed directly 17571 // in registers or on the stack. 17572 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() && 17573 PendingLocs.size() <= 2) { 17574 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()"); 17575 // Apply the normal calling convention rules to the first half of the 17576 // split argument. 17577 CCValAssign VA = PendingLocs[0]; 17578 ISD::ArgFlagsTy AF = PendingArgFlags[0]; 17579 PendingLocs.clear(); 17580 PendingArgFlags.clear(); 17581 return CC_RISCVAssign2XLen( 17582 XLen, State, VA, AF, ValNo, ValVT, LocVT, ArgFlags, 17583 ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E); 17584 } 17585 17586 // Allocate to a register if possible, or else a stack slot. 17587 Register Reg; 17588 unsigned StoreSizeBytes = XLen / 8; 17589 Align StackAlign = Align(XLen / 8); 17590 17591 if ((ValVT == MVT::f16 || ValVT == MVT::bf16) && !UseGPRForF16_F32) 17592 Reg = State.AllocateReg(ArgFPR16s); 17593 else if (ValVT == MVT::f32 && !UseGPRForF16_F32) 17594 Reg = State.AllocateReg(ArgFPR32s); 17595 else if (ValVT == MVT::f64 && !UseGPRForF64) 17596 Reg = State.AllocateReg(ArgFPR64s); 17597 else if (ValVT.isVector()) { 17598 Reg = allocateRVVReg(ValVT, ValNo, FirstMaskArgument, State, TLI); 17599 if (!Reg) { 17600 // For return values, the vector must be passed fully via registers or 17601 // via the stack. 17602 // FIXME: The proposed vector ABI only mandates v8-v15 for return values, 17603 // but we're using all of them. 17604 if (IsRet) 17605 return true; 17606 // Try using a GPR to pass the address 17607 if ((Reg = State.AllocateReg(ArgGPRs))) { 17608 LocVT = XLenVT; 17609 LocInfo = CCValAssign::Indirect; 17610 } else if (ValVT.isScalableVector()) { 17611 LocVT = XLenVT; 17612 LocInfo = CCValAssign::Indirect; 17613 } else { 17614 // Pass fixed-length vectors on the stack. 17615 LocVT = ValVT; 17616 StoreSizeBytes = ValVT.getStoreSize(); 17617 // Align vectors to their element sizes, being careful for vXi1 17618 // vectors. 17619 StackAlign = MaybeAlign(ValVT.getScalarSizeInBits() / 8).valueOrOne(); 17620 } 17621 } 17622 } else { 17623 Reg = State.AllocateReg(ArgGPRs); 17624 } 17625 17626 unsigned StackOffset = 17627 Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign); 17628 17629 // If we reach this point and PendingLocs is non-empty, we must be at the 17630 // end of a split argument that must be passed indirectly. 17631 if (!PendingLocs.empty()) { 17632 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()"); 17633 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()"); 17634 17635 for (auto &It : PendingLocs) { 17636 if (Reg) 17637 It.convertToReg(Reg); 17638 else 17639 It.convertToMem(StackOffset); 17640 State.addLoc(It); 17641 } 17642 PendingLocs.clear(); 17643 PendingArgFlags.clear(); 17644 return false; 17645 } 17646 17647 assert((!UseGPRForF16_F32 || !UseGPRForF64 || LocVT == XLenVT || 17648 (TLI.getSubtarget().hasVInstructions() && ValVT.isVector())) && 17649 "Expected an XLenVT or vector types at this stage"); 17650 17651 if (Reg) { 17652 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 17653 return false; 17654 } 17655 17656 // When a scalar floating-point value is passed on the stack, no 17657 // bit-conversion is needed. 17658 if (ValVT.isFloatingPoint() && LocInfo != CCValAssign::Indirect) { 17659 assert(!ValVT.isVector()); 17660 LocVT = ValVT; 17661 LocInfo = CCValAssign::Full; 17662 } 17663 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); 17664 return false; 17665} 17666 17667template <typename ArgTy> 17668static std::optional<unsigned> preAssignMask(const ArgTy &Args) { 17669 for (const auto &ArgIdx : enumerate(Args)) { 17670 MVT ArgVT = ArgIdx.value().VT; 17671 if (ArgVT.isVector() && ArgVT.getVectorElementType() == MVT::i1) 17672 return ArgIdx.index(); 17673 } 17674 return std::nullopt; 17675} 17676 17677void RISCVTargetLowering::analyzeInputArgs( 17678 MachineFunction &MF, CCState &CCInfo, 17679 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet, 17680 RISCVCCAssignFn Fn) const { 17681 unsigned NumArgs = Ins.size(); 17682 FunctionType *FType = MF.getFunction().getFunctionType(); 17683 17684 std::optional<unsigned> FirstMaskArgument; 17685 if (Subtarget.hasVInstructions()) 17686 FirstMaskArgument = preAssignMask(Ins); 17687 17688 for (unsigned i = 0; i != NumArgs; ++i) { 17689 MVT ArgVT = Ins[i].VT; 17690 ISD::ArgFlagsTy ArgFlags = Ins[i].Flags; 17691 17692 Type *ArgTy = nullptr; 17693 if (IsRet) 17694 ArgTy = FType->getReturnType(); 17695 else if (Ins[i].isOrigArg()) 17696 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex()); 17697 17698 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); 17699 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full, 17700 ArgFlags, CCInfo, /*IsFixed=*/true, IsRet, ArgTy, *this, 17701 FirstMaskArgument)) { 17702 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " 17703 << ArgVT << '\n'); 17704 llvm_unreachable(nullptr); 17705 } 17706 } 17707} 17708 17709void RISCVTargetLowering::analyzeOutputArgs( 17710 MachineFunction &MF, CCState &CCInfo, 17711 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet, 17712 CallLoweringInfo *CLI, RISCVCCAssignFn Fn) const { 17713 unsigned NumArgs = Outs.size(); 17714 17715 std::optional<unsigned> FirstMaskArgument; 17716 if (Subtarget.hasVInstructions()) 17717 FirstMaskArgument = preAssignMask(Outs); 17718 17719 for (unsigned i = 0; i != NumArgs; i++) { 17720 MVT ArgVT = Outs[i].VT; 17721 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; 17722 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr; 17723 17724 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); 17725 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full, 17726 ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy, *this, 17727 FirstMaskArgument)) { 17728 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " 17729 << ArgVT << "\n"); 17730 llvm_unreachable(nullptr); 17731 } 17732 } 17733} 17734 17735// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect 17736// values. 17737static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, 17738 const CCValAssign &VA, const SDLoc &DL, 17739 const RISCVSubtarget &Subtarget) { 17740 switch (VA.getLocInfo()) { 17741 default: 17742 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 17743 case CCValAssign::Full: 17744 if (VA.getValVT().isFixedLengthVector() && VA.getLocVT().isScalableVector()) 17745 Val = convertFromScalableVector(VA.getValVT(), Val, DAG, Subtarget); 17746 break; 17747 case CCValAssign::BCvt: 17748 if (VA.getLocVT().isInteger() && 17749 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) { 17750 Val = DAG.getNode(RISCVISD::FMV_H_X, DL, VA.getValVT(), Val); 17751 } else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) { 17752 if (RV64LegalI32) { 17753 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Val); 17754 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val); 17755 } else { 17756 Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val); 17757 } 17758 } else { 17759 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val); 17760 } 17761 break; 17762 } 17763 return Val; 17764} 17765 17766// The caller is responsible for loading the full value if the argument is 17767// passed with CCValAssign::Indirect. 17768static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain, 17769 const CCValAssign &VA, const SDLoc &DL, 17770 const ISD::InputArg &In, 17771 const RISCVTargetLowering &TLI) { 17772 MachineFunction &MF = DAG.getMachineFunction(); 17773 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 17774 EVT LocVT = VA.getLocVT(); 17775 SDValue Val; 17776 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT()); 17777 Register VReg = RegInfo.createVirtualRegister(RC); 17778 RegInfo.addLiveIn(VA.getLocReg(), VReg); 17779 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT); 17780 17781 // If input is sign extended from 32 bits, note it for the SExtWRemoval pass. 17782 if (In.isOrigArg()) { 17783 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex()); 17784 if (OrigArg->getType()->isIntegerTy()) { 17785 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth(); 17786 // An input zero extended from i31 can also be considered sign extended. 17787 if ((BitWidth <= 32 && In.Flags.isSExt()) || 17788 (BitWidth < 32 && In.Flags.isZExt())) { 17789 RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>(); 17790 RVFI->addSExt32Register(VReg); 17791 } 17792 } 17793 } 17794 17795 if (VA.getLocInfo() == CCValAssign::Indirect) 17796 return Val; 17797 17798 return convertLocVTToValVT(DAG, Val, VA, DL, TLI.getSubtarget()); 17799} 17800 17801static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, 17802 const CCValAssign &VA, const SDLoc &DL, 17803 const RISCVSubtarget &Subtarget) { 17804 EVT LocVT = VA.getLocVT(); 17805 17806 switch (VA.getLocInfo()) { 17807 default: 17808 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 17809 case CCValAssign::Full: 17810 if (VA.getValVT().isFixedLengthVector() && LocVT.isScalableVector()) 17811 Val = convertToScalableVector(LocVT, Val, DAG, Subtarget); 17812 break; 17813 case CCValAssign::BCvt: 17814 if (LocVT.isInteger() && 17815 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) { 17816 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, LocVT, Val); 17817 } else if (LocVT == MVT::i64 && VA.getValVT() == MVT::f32) { 17818 if (RV64LegalI32) { 17819 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val); 17820 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Val); 17821 } else { 17822 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val); 17823 } 17824 } else { 17825 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val); 17826 } 17827 break; 17828 } 17829 return Val; 17830} 17831 17832// The caller is responsible for loading the full value if the argument is 17833// passed with CCValAssign::Indirect. 17834static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, 17835 const CCValAssign &VA, const SDLoc &DL) { 17836 MachineFunction &MF = DAG.getMachineFunction(); 17837 MachineFrameInfo &MFI = MF.getFrameInfo(); 17838 EVT LocVT = VA.getLocVT(); 17839 EVT ValVT = VA.getValVT(); 17840 EVT PtrVT = MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0)); 17841 if (ValVT.isScalableVector()) { 17842 // When the value is a scalable vector, we save the pointer which points to 17843 // the scalable vector value in the stack. The ValVT will be the pointer 17844 // type, instead of the scalable vector type. 17845 ValVT = LocVT; 17846 } 17847 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(), 17848 /*IsImmutable=*/true); 17849 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 17850 SDValue Val; 17851 17852 ISD::LoadExtType ExtType; 17853 switch (VA.getLocInfo()) { 17854 default: 17855 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 17856 case CCValAssign::Full: 17857 case CCValAssign::Indirect: 17858 case CCValAssign::BCvt: 17859 ExtType = ISD::NON_EXTLOAD; 17860 break; 17861 } 17862 Val = DAG.getExtLoad( 17863 ExtType, DL, LocVT, Chain, FIN, 17864 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT); 17865 return Val; 17866} 17867 17868static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain, 17869 const CCValAssign &VA, 17870 const CCValAssign &HiVA, 17871 const SDLoc &DL) { 17872 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 && 17873 "Unexpected VA"); 17874 MachineFunction &MF = DAG.getMachineFunction(); 17875 MachineFrameInfo &MFI = MF.getFrameInfo(); 17876 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 17877 17878 assert(VA.isRegLoc() && "Expected register VA assignment"); 17879 17880 Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 17881 RegInfo.addLiveIn(VA.getLocReg(), LoVReg); 17882 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32); 17883 SDValue Hi; 17884 if (HiVA.isMemLoc()) { 17885 // Second half of f64 is passed on the stack. 17886 int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(), 17887 /*IsImmutable=*/true); 17888 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); 17889 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN, 17890 MachinePointerInfo::getFixedStack(MF, FI)); 17891 } else { 17892 // Second half of f64 is passed in another GPR. 17893 Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 17894 RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg); 17895 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32); 17896 } 17897 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi); 17898} 17899 17900// FastCC has less than 1% performance improvement for some particular 17901// benchmark. But theoretically, it may has benenfit for some cases. 17902bool RISCV::CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI, 17903 unsigned ValNo, MVT ValVT, MVT LocVT, 17904 CCValAssign::LocInfo LocInfo, 17905 ISD::ArgFlagsTy ArgFlags, CCState &State, 17906 bool IsFixed, bool IsRet, Type *OrigTy, 17907 const RISCVTargetLowering &TLI, 17908 std::optional<unsigned> FirstMaskArgument) { 17909 if (LocVT == MVT::i32 || LocVT == MVT::i64) { 17910 if (unsigned Reg = State.AllocateReg(getFastCCArgGPRs(ABI))) { 17911 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 17912 return false; 17913 } 17914 } 17915 17916 const RISCVSubtarget &Subtarget = TLI.getSubtarget(); 17917 17918 if (LocVT == MVT::f16 && 17919 (Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZfhmin())) { 17920 static const MCPhysReg FPR16List[] = { 17921 RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, RISCV::F14_H, 17922 RISCV::F15_H, RISCV::F16_H, RISCV::F17_H, RISCV::F0_H, RISCV::F1_H, 17923 RISCV::F2_H, RISCV::F3_H, RISCV::F4_H, RISCV::F5_H, RISCV::F6_H, 17924 RISCV::F7_H, RISCV::F28_H, RISCV::F29_H, RISCV::F30_H, RISCV::F31_H}; 17925 if (unsigned Reg = State.AllocateReg(FPR16List)) { 17926 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 17927 return false; 17928 } 17929 } 17930 17931 if (LocVT == MVT::f32 && Subtarget.hasStdExtF()) { 17932 static const MCPhysReg FPR32List[] = { 17933 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F, 17934 RISCV::F15_F, RISCV::F16_F, RISCV::F17_F, RISCV::F0_F, RISCV::F1_F, 17935 RISCV::F2_F, RISCV::F3_F, RISCV::F4_F, RISCV::F5_F, RISCV::F6_F, 17936 RISCV::F7_F, RISCV::F28_F, RISCV::F29_F, RISCV::F30_F, RISCV::F31_F}; 17937 if (unsigned Reg = State.AllocateReg(FPR32List)) { 17938 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 17939 return false; 17940 } 17941 } 17942 17943 if (LocVT == MVT::f64 && Subtarget.hasStdExtD()) { 17944 static const MCPhysReg FPR64List[] = { 17945 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, RISCV::F14_D, 17946 RISCV::F15_D, RISCV::F16_D, RISCV::F17_D, RISCV::F0_D, RISCV::F1_D, 17947 RISCV::F2_D, RISCV::F3_D, RISCV::F4_D, RISCV::F5_D, RISCV::F6_D, 17948 RISCV::F7_D, RISCV::F28_D, RISCV::F29_D, RISCV::F30_D, RISCV::F31_D}; 17949 if (unsigned Reg = State.AllocateReg(FPR64List)) { 17950 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 17951 return false; 17952 } 17953 } 17954 17955 // Check if there is an available GPR before hitting the stack. 17956 if ((LocVT == MVT::f16 && 17957 (Subtarget.hasStdExtZhinx() || Subtarget.hasStdExtZhinxmin())) || 17958 (LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) || 17959 (LocVT == MVT::f64 && Subtarget.is64Bit() && 17960 Subtarget.hasStdExtZdinx())) { 17961 if (unsigned Reg = State.AllocateReg(getFastCCArgGPRs(ABI))) { 17962 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 17963 return false; 17964 } 17965 } 17966 17967 if (LocVT == MVT::f16) { 17968 unsigned Offset2 = State.AllocateStack(2, Align(2)); 17969 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset2, LocVT, LocInfo)); 17970 return false; 17971 } 17972 17973 if (LocVT == MVT::i32 || LocVT == MVT::f32) { 17974 unsigned Offset4 = State.AllocateStack(4, Align(4)); 17975 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset4, LocVT, LocInfo)); 17976 return false; 17977 } 17978 17979 if (LocVT == MVT::i64 || LocVT == MVT::f64) { 17980 unsigned Offset5 = State.AllocateStack(8, Align(8)); 17981 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset5, LocVT, LocInfo)); 17982 return false; 17983 } 17984 17985 if (LocVT.isVector()) { 17986 if (unsigned Reg = 17987 allocateRVVReg(ValVT, ValNo, FirstMaskArgument, State, TLI)) { 17988 // Fixed-length vectors are located in the corresponding scalable-vector 17989 // container types. 17990 if (ValVT.isFixedLengthVector()) 17991 LocVT = TLI.getContainerForFixedLengthVector(LocVT); 17992 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 17993 } else { 17994 // Try and pass the address via a "fast" GPR. 17995 if (unsigned GPRReg = State.AllocateReg(getFastCCArgGPRs(ABI))) { 17996 LocInfo = CCValAssign::Indirect; 17997 LocVT = TLI.getSubtarget().getXLenVT(); 17998 State.addLoc(CCValAssign::getReg(ValNo, ValVT, GPRReg, LocVT, LocInfo)); 17999 } else if (ValVT.isFixedLengthVector()) { 18000 auto StackAlign = 18001 MaybeAlign(ValVT.getScalarSizeInBits() / 8).valueOrOne(); 18002 unsigned StackOffset = 18003 State.AllocateStack(ValVT.getStoreSize(), StackAlign); 18004 State.addLoc( 18005 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); 18006 } else { 18007 // Can't pass scalable vectors on the stack. 18008 return true; 18009 } 18010 } 18011 18012 return false; 18013 } 18014 18015 return true; // CC didn't match. 18016} 18017 18018bool RISCV::CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, 18019 CCValAssign::LocInfo LocInfo, 18020 ISD::ArgFlagsTy ArgFlags, CCState &State) { 18021 if (ArgFlags.isNest()) { 18022 report_fatal_error( 18023 "Attribute 'nest' is not supported in GHC calling convention"); 18024 } 18025 18026 static const MCPhysReg GPRList[] = { 18027 RISCV::X9, RISCV::X18, RISCV::X19, RISCV::X20, RISCV::X21, RISCV::X22, 18028 RISCV::X23, RISCV::X24, RISCV::X25, RISCV::X26, RISCV::X27}; 18029 18030 if (LocVT == MVT::i32 || LocVT == MVT::i64) { 18031 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, R7, SpLim 18032 // s1 s2 s3 s4 s5 s6 s7 s8 s9 s10 s11 18033 if (unsigned Reg = State.AllocateReg(GPRList)) { 18034 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 18035 return false; 18036 } 18037 } 18038 18039 const RISCVSubtarget &Subtarget = 18040 State.getMachineFunction().getSubtarget<RISCVSubtarget>(); 18041 18042 if (LocVT == MVT::f32 && Subtarget.hasStdExtF()) { 18043 // Pass in STG registers: F1, ..., F6 18044 // fs0 ... fs5 18045 static const MCPhysReg FPR32List[] = {RISCV::F8_F, RISCV::F9_F, 18046 RISCV::F18_F, RISCV::F19_F, 18047 RISCV::F20_F, RISCV::F21_F}; 18048 if (unsigned Reg = State.AllocateReg(FPR32List)) { 18049 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 18050 return false; 18051 } 18052 } 18053 18054 if (LocVT == MVT::f64 && Subtarget.hasStdExtD()) { 18055 // Pass in STG registers: D1, ..., D6 18056 // fs6 ... fs11 18057 static const MCPhysReg FPR64List[] = {RISCV::F22_D, RISCV::F23_D, 18058 RISCV::F24_D, RISCV::F25_D, 18059 RISCV::F26_D, RISCV::F27_D}; 18060 if (unsigned Reg = State.AllocateReg(FPR64List)) { 18061 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 18062 return false; 18063 } 18064 } 18065 18066 if ((LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) || 18067 (LocVT == MVT::f64 && Subtarget.hasStdExtZdinx() && 18068 Subtarget.is64Bit())) { 18069 if (unsigned Reg = State.AllocateReg(GPRList)) { 18070 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 18071 return false; 18072 } 18073 } 18074 18075 report_fatal_error("No registers left in GHC calling convention"); 18076 return true; 18077} 18078 18079// Transform physical registers into virtual registers. 18080SDValue RISCVTargetLowering::LowerFormalArguments( 18081 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, 18082 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL, 18083 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 18084 18085 MachineFunction &MF = DAG.getMachineFunction(); 18086 18087 switch (CallConv) { 18088 default: 18089 report_fatal_error("Unsupported calling convention"); 18090 case CallingConv::C: 18091 case CallingConv::Fast: 18092 case CallingConv::SPIR_KERNEL: 18093 case CallingConv::GRAAL: 18094 break; 18095 case CallingConv::GHC: 18096 if (Subtarget.isRVE()) 18097 report_fatal_error("GHC calling convention is not supported on RVE!"); 18098 if (!Subtarget.hasStdExtFOrZfinx() || !Subtarget.hasStdExtDOrZdinx()) 18099 report_fatal_error("GHC calling convention requires the (Zfinx/F) and " 18100 "(Zdinx/D) instruction set extensions"); 18101 } 18102 18103 const Function &Func = MF.getFunction(); 18104 if (Func.hasFnAttribute("interrupt")) { 18105 if (!Func.arg_empty()) 18106 report_fatal_error( 18107 "Functions with the interrupt attribute cannot have arguments!"); 18108 18109 StringRef Kind = 18110 MF.getFunction().getFnAttribute("interrupt").getValueAsString(); 18111 18112 if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine")) 18113 report_fatal_error( 18114 "Function interrupt attribute argument not supported!"); 18115 } 18116 18117 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 18118 MVT XLenVT = Subtarget.getXLenVT(); 18119 unsigned XLenInBytes = Subtarget.getXLen() / 8; 18120 // Used with vargs to acumulate store chains. 18121 std::vector<SDValue> OutChains; 18122 18123 // Assign locations to all of the incoming arguments. 18124 SmallVector<CCValAssign, 16> ArgLocs; 18125 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 18126 18127 if (CallConv == CallingConv::GHC) 18128 CCInfo.AnalyzeFormalArguments(Ins, RISCV::CC_RISCV_GHC); 18129 else 18130 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, 18131 CallConv == CallingConv::Fast ? RISCV::CC_RISCV_FastCC 18132 : RISCV::CC_RISCV); 18133 18134 for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) { 18135 CCValAssign &VA = ArgLocs[i]; 18136 SDValue ArgValue; 18137 // Passing f64 on RV32D with a soft float ABI must be handled as a special 18138 // case. 18139 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { 18140 assert(VA.needsCustom()); 18141 ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL); 18142 } else if (VA.isRegLoc()) 18143 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[InsIdx], *this); 18144 else 18145 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL); 18146 18147 if (VA.getLocInfo() == CCValAssign::Indirect) { 18148 // If the original argument was split and passed by reference (e.g. i128 18149 // on RV32), we need to load all parts of it here (using the same 18150 // address). Vectors may be partly split to registers and partly to the 18151 // stack, in which case the base address is partly offset and subsequent 18152 // stores are relative to that. 18153 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue, 18154 MachinePointerInfo())); 18155 unsigned ArgIndex = Ins[InsIdx].OrigArgIndex; 18156 unsigned ArgPartOffset = Ins[InsIdx].PartOffset; 18157 assert(VA.getValVT().isVector() || ArgPartOffset == 0); 18158 while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) { 18159 CCValAssign &PartVA = ArgLocs[i + 1]; 18160 unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset; 18161 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL); 18162 if (PartVA.getValVT().isScalableVector()) 18163 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset); 18164 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset); 18165 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address, 18166 MachinePointerInfo())); 18167 ++i; 18168 ++InsIdx; 18169 } 18170 continue; 18171 } 18172 InVals.push_back(ArgValue); 18173 } 18174 18175 if (any_of(ArgLocs, 18176 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); })) 18177 MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall(); 18178 18179 if (IsVarArg) { 18180 ArrayRef<MCPhysReg> ArgRegs = RISCV::getArgGPRs(Subtarget.getTargetABI()); 18181 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs); 18182 const TargetRegisterClass *RC = &RISCV::GPRRegClass; 18183 MachineFrameInfo &MFI = MF.getFrameInfo(); 18184 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 18185 RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>(); 18186 18187 // Size of the vararg save area. For now, the varargs save area is either 18188 // zero or large enough to hold a0-a7. 18189 int VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx); 18190 int FI; 18191 18192 // If all registers are allocated, then all varargs must be passed on the 18193 // stack and we don't need to save any argregs. 18194 if (VarArgsSaveSize == 0) { 18195 int VaArgOffset = CCInfo.getStackSize(); 18196 FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true); 18197 } else { 18198 int VaArgOffset = -VarArgsSaveSize; 18199 FI = MFI.CreateFixedObject(VarArgsSaveSize, VaArgOffset, true); 18200 18201 // If saving an odd number of registers then create an extra stack slot to 18202 // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures 18203 // offsets to even-numbered registered remain 2*XLEN-aligned. 18204 if (Idx % 2) { 18205 MFI.CreateFixedObject( 18206 XLenInBytes, VaArgOffset - static_cast<int>(XLenInBytes), true); 18207 VarArgsSaveSize += XLenInBytes; 18208 } 18209 18210 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 18211 18212 // Copy the integer registers that may have been used for passing varargs 18213 // to the vararg save area. 18214 for (unsigned I = Idx; I < ArgRegs.size(); ++I) { 18215 const Register Reg = RegInfo.createVirtualRegister(RC); 18216 RegInfo.addLiveIn(ArgRegs[I], Reg); 18217 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT); 18218 SDValue Store = DAG.getStore( 18219 Chain, DL, ArgValue, FIN, 18220 MachinePointerInfo::getFixedStack(MF, FI, (I - Idx) * XLenInBytes)); 18221 OutChains.push_back(Store); 18222 FIN = 18223 DAG.getMemBasePlusOffset(FIN, TypeSize::getFixed(XLenInBytes), DL); 18224 } 18225 } 18226 18227 // Record the frame index of the first variable argument 18228 // which is a value necessary to VASTART. 18229 RVFI->setVarArgsFrameIndex(FI); 18230 RVFI->setVarArgsSaveSize(VarArgsSaveSize); 18231 } 18232 18233 // All stores are grouped in one node to allow the matching between 18234 // the size of Ins and InVals. This only happens for vararg functions. 18235 if (!OutChains.empty()) { 18236 OutChains.push_back(Chain); 18237 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains); 18238 } 18239 18240 return Chain; 18241} 18242 18243/// isEligibleForTailCallOptimization - Check whether the call is eligible 18244/// for tail call optimization. 18245/// Note: This is modelled after ARM's IsEligibleForTailCallOptimization. 18246bool RISCVTargetLowering::isEligibleForTailCallOptimization( 18247 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF, 18248 const SmallVector<CCValAssign, 16> &ArgLocs) const { 18249 18250 auto CalleeCC = CLI.CallConv; 18251 auto &Outs = CLI.Outs; 18252 auto &Caller = MF.getFunction(); 18253 auto CallerCC = Caller.getCallingConv(); 18254 18255 // Exception-handling functions need a special set of instructions to 18256 // indicate a return to the hardware. Tail-calling another function would 18257 // probably break this. 18258 // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This 18259 // should be expanded as new function attributes are introduced. 18260 if (Caller.hasFnAttribute("interrupt")) 18261 return false; 18262 18263 // Do not tail call opt if the stack is used to pass parameters. 18264 if (CCInfo.getStackSize() != 0) 18265 return false; 18266 18267 // Do not tail call opt if any parameters need to be passed indirectly. 18268 // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are 18269 // passed indirectly. So the address of the value will be passed in a 18270 // register, or if not available, then the address is put on the stack. In 18271 // order to pass indirectly, space on the stack often needs to be allocated 18272 // in order to store the value. In this case the CCInfo.getNextStackOffset() 18273 // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs 18274 // are passed CCValAssign::Indirect. 18275 for (auto &VA : ArgLocs) 18276 if (VA.getLocInfo() == CCValAssign::Indirect) 18277 return false; 18278 18279 // Do not tail call opt if either caller or callee uses struct return 18280 // semantics. 18281 auto IsCallerStructRet = Caller.hasStructRetAttr(); 18282 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet(); 18283 if (IsCallerStructRet || IsCalleeStructRet) 18284 return false; 18285 18286 // The callee has to preserve all registers the caller needs to preserve. 18287 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo(); 18288 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); 18289 if (CalleeCC != CallerCC) { 18290 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); 18291 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) 18292 return false; 18293 } 18294 18295 // Byval parameters hand the function a pointer directly into the stack area 18296 // we want to reuse during a tail call. Working around this *is* possible 18297 // but less efficient and uglier in LowerCall. 18298 for (auto &Arg : Outs) 18299 if (Arg.Flags.isByVal()) 18300 return false; 18301 18302 return true; 18303} 18304 18305static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG) { 18306 return DAG.getDataLayout().getPrefTypeAlign( 18307 VT.getTypeForEVT(*DAG.getContext())); 18308} 18309 18310// Lower a call to a callseq_start + CALL + callseq_end chain, and add input 18311// and output parameter nodes. 18312SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, 18313 SmallVectorImpl<SDValue> &InVals) const { 18314 SelectionDAG &DAG = CLI.DAG; 18315 SDLoc &DL = CLI.DL; 18316 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; 18317 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; 18318 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; 18319 SDValue Chain = CLI.Chain; 18320 SDValue Callee = CLI.Callee; 18321 bool &IsTailCall = CLI.IsTailCall; 18322 CallingConv::ID CallConv = CLI.CallConv; 18323 bool IsVarArg = CLI.IsVarArg; 18324 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 18325 MVT XLenVT = Subtarget.getXLenVT(); 18326 18327 MachineFunction &MF = DAG.getMachineFunction(); 18328 18329 // Analyze the operands of the call, assigning locations to each operand. 18330 SmallVector<CCValAssign, 16> ArgLocs; 18331 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 18332 18333 if (CallConv == CallingConv::GHC) { 18334 if (Subtarget.isRVE()) 18335 report_fatal_error("GHC calling convention is not supported on RVE!"); 18336 ArgCCInfo.AnalyzeCallOperands(Outs, RISCV::CC_RISCV_GHC); 18337 } else 18338 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI, 18339 CallConv == CallingConv::Fast ? RISCV::CC_RISCV_FastCC 18340 : RISCV::CC_RISCV); 18341 18342 // Check if it's really possible to do a tail call. 18343 if (IsTailCall) 18344 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs); 18345 18346 if (IsTailCall) 18347 ++NumTailCalls; 18348 else if (CLI.CB && CLI.CB->isMustTailCall()) 18349 report_fatal_error("failed to perform tail call elimination on a call " 18350 "site marked musttail"); 18351 18352 // Get a count of how many bytes are to be pushed on the stack. 18353 unsigned NumBytes = ArgCCInfo.getStackSize(); 18354 18355 // Create local copies for byval args 18356 SmallVector<SDValue, 8> ByValArgs; 18357 for (unsigned i = 0, e = Outs.size(); i != e; ++i) { 18358 ISD::ArgFlagsTy Flags = Outs[i].Flags; 18359 if (!Flags.isByVal()) 18360 continue; 18361 18362 SDValue Arg = OutVals[i]; 18363 unsigned Size = Flags.getByValSize(); 18364 Align Alignment = Flags.getNonZeroByValAlign(); 18365 18366 int FI = 18367 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false); 18368 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); 18369 SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT); 18370 18371 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment, 18372 /*IsVolatile=*/false, 18373 /*AlwaysInline=*/false, IsTailCall, 18374 MachinePointerInfo(), MachinePointerInfo()); 18375 ByValArgs.push_back(FIPtr); 18376 } 18377 18378 if (!IsTailCall) 18379 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL); 18380 18381 // Copy argument values to their designated locations. 18382 SmallVector<std::pair<Register, SDValue>, 8> RegsToPass; 18383 SmallVector<SDValue, 8> MemOpChains; 18384 SDValue StackPtr; 18385 for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e; 18386 ++i, ++OutIdx) { 18387 CCValAssign &VA = ArgLocs[i]; 18388 SDValue ArgValue = OutVals[OutIdx]; 18389 ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags; 18390 18391 // Handle passing f64 on RV32D with a soft float ABI as a special case. 18392 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { 18393 assert(VA.isRegLoc() && "Expected register VA assignment"); 18394 assert(VA.needsCustom()); 18395 SDValue SplitF64 = DAG.getNode( 18396 RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue); 18397 SDValue Lo = SplitF64.getValue(0); 18398 SDValue Hi = SplitF64.getValue(1); 18399 18400 Register RegLo = VA.getLocReg(); 18401 RegsToPass.push_back(std::make_pair(RegLo, Lo)); 18402 18403 // Get the CCValAssign for the Hi part. 18404 CCValAssign &HiVA = ArgLocs[++i]; 18405 18406 if (HiVA.isMemLoc()) { 18407 // Second half of f64 is passed on the stack. 18408 if (!StackPtr.getNode()) 18409 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT); 18410 SDValue Address = 18411 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, 18412 DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL)); 18413 // Emit the store. 18414 MemOpChains.push_back( 18415 DAG.getStore(Chain, DL, Hi, Address, MachinePointerInfo())); 18416 } else { 18417 // Second half of f64 is passed in another GPR. 18418 Register RegHigh = HiVA.getLocReg(); 18419 RegsToPass.push_back(std::make_pair(RegHigh, Hi)); 18420 } 18421 continue; 18422 } 18423 18424 // Promote the value if needed. 18425 // For now, only handle fully promoted and indirect arguments. 18426 if (VA.getLocInfo() == CCValAssign::Indirect) { 18427 // Store the argument in a stack slot and pass its address. 18428 Align StackAlign = 18429 std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG), 18430 getPrefTypeAlign(ArgValue.getValueType(), DAG)); 18431 TypeSize StoredSize = ArgValue.getValueType().getStoreSize(); 18432 // If the original argument was split (e.g. i128), we need 18433 // to store the required parts of it here (and pass just one address). 18434 // Vectors may be partly split to registers and partly to the stack, in 18435 // which case the base address is partly offset and subsequent stores are 18436 // relative to that. 18437 unsigned ArgIndex = Outs[OutIdx].OrigArgIndex; 18438 unsigned ArgPartOffset = Outs[OutIdx].PartOffset; 18439 assert(VA.getValVT().isVector() || ArgPartOffset == 0); 18440 // Calculate the total size to store. We don't have access to what we're 18441 // actually storing other than performing the loop and collecting the 18442 // info. 18443 SmallVector<std::pair<SDValue, SDValue>> Parts; 18444 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) { 18445 SDValue PartValue = OutVals[OutIdx + 1]; 18446 unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset; 18447 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL); 18448 EVT PartVT = PartValue.getValueType(); 18449 if (PartVT.isScalableVector()) 18450 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset); 18451 StoredSize += PartVT.getStoreSize(); 18452 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG)); 18453 Parts.push_back(std::make_pair(PartValue, Offset)); 18454 ++i; 18455 ++OutIdx; 18456 } 18457 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign); 18458 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); 18459 MemOpChains.push_back( 18460 DAG.getStore(Chain, DL, ArgValue, SpillSlot, 18461 MachinePointerInfo::getFixedStack(MF, FI))); 18462 for (const auto &Part : Parts) { 18463 SDValue PartValue = Part.first; 18464 SDValue PartOffset = Part.second; 18465 SDValue Address = 18466 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset); 18467 MemOpChains.push_back( 18468 DAG.getStore(Chain, DL, PartValue, Address, 18469 MachinePointerInfo::getFixedStack(MF, FI))); 18470 } 18471 ArgValue = SpillSlot; 18472 } else { 18473 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL, Subtarget); 18474 } 18475 18476 // Use local copy if it is a byval arg. 18477 if (Flags.isByVal()) 18478 ArgValue = ByValArgs[j++]; 18479 18480 if (VA.isRegLoc()) { 18481 // Queue up the argument copies and emit them at the end. 18482 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue)); 18483 } else { 18484 assert(VA.isMemLoc() && "Argument not register or memory"); 18485 assert(!IsTailCall && "Tail call not allowed if stack is used " 18486 "for passing parameters"); 18487 18488 // Work out the address of the stack slot. 18489 if (!StackPtr.getNode()) 18490 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT); 18491 SDValue Address = 18492 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, 18493 DAG.getIntPtrConstant(VA.getLocMemOffset(), DL)); 18494 18495 // Emit the store. 18496 MemOpChains.push_back( 18497 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo())); 18498 } 18499 } 18500 18501 // Join the stores, which are independent of one another. 18502 if (!MemOpChains.empty()) 18503 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains); 18504 18505 SDValue Glue; 18506 18507 // Build a sequence of copy-to-reg nodes, chained and glued together. 18508 for (auto &Reg : RegsToPass) { 18509 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue); 18510 Glue = Chain.getValue(1); 18511 } 18512 18513 // Validate that none of the argument registers have been marked as 18514 // reserved, if so report an error. Do the same for the return address if this 18515 // is not a tailcall. 18516 validateCCReservedRegs(RegsToPass, MF); 18517 if (!IsTailCall && 18518 MF.getSubtarget<RISCVSubtarget>().isRegisterReservedByUser(RISCV::X1)) 18519 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{ 18520 MF.getFunction(), 18521 "Return address register required, but has been reserved."}); 18522 18523 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a 18524 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't 18525 // split it and then direct call can be matched by PseudoCALL. 18526 if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) { 18527 const GlobalValue *GV = S->getGlobal(); 18528 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, RISCVII::MO_CALL); 18529 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 18530 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, RISCVII::MO_CALL); 18531 } 18532 18533 // The first call operand is the chain and the second is the target address. 18534 SmallVector<SDValue, 8> Ops; 18535 Ops.push_back(Chain); 18536 Ops.push_back(Callee); 18537 18538 // Add argument registers to the end of the list so that they are 18539 // known live into the call. 18540 for (auto &Reg : RegsToPass) 18541 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType())); 18542 18543 if (!IsTailCall) { 18544 // Add a register mask operand representing the call-preserved registers. 18545 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 18546 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv); 18547 assert(Mask && "Missing call preserved mask for calling convention"); 18548 Ops.push_back(DAG.getRegisterMask(Mask)); 18549 } 18550 18551 // Glue the call to the argument copies, if any. 18552 if (Glue.getNode()) 18553 Ops.push_back(Glue); 18554 18555 assert((!CLI.CFIType || CLI.CB->isIndirectCall()) && 18556 "Unexpected CFI type for a direct call"); 18557 18558 // Emit the call. 18559 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 18560 18561 if (IsTailCall) { 18562 MF.getFrameInfo().setHasTailCall(); 18563 SDValue Ret = DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops); 18564 if (CLI.CFIType) 18565 Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue()); 18566 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge); 18567 return Ret; 18568 } 18569 18570 Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops); 18571 if (CLI.CFIType) 18572 Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue()); 18573 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge); 18574 Glue = Chain.getValue(1); 18575 18576 // Mark the end of the call, which is glued to the call itself. 18577 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL); 18578 Glue = Chain.getValue(1); 18579 18580 // Assign locations to each value returned by this call. 18581 SmallVector<CCValAssign, 16> RVLocs; 18582 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext()); 18583 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, RISCV::CC_RISCV); 18584 18585 // Copy all of the result registers out of their specified physreg. 18586 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { 18587 auto &VA = RVLocs[i]; 18588 // Copy the value out 18589 SDValue RetValue = 18590 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue); 18591 // Glue the RetValue to the end of the call sequence 18592 Chain = RetValue.getValue(1); 18593 Glue = RetValue.getValue(2); 18594 18595 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { 18596 assert(VA.needsCustom()); 18597 SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(), 18598 MVT::i32, Glue); 18599 Chain = RetValue2.getValue(1); 18600 Glue = RetValue2.getValue(2); 18601 RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue, 18602 RetValue2); 18603 } 18604 18605 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL, Subtarget); 18606 18607 InVals.push_back(RetValue); 18608 } 18609 18610 return Chain; 18611} 18612 18613bool RISCVTargetLowering::CanLowerReturn( 18614 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, 18615 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const { 18616 SmallVector<CCValAssign, 16> RVLocs; 18617 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context); 18618 18619 std::optional<unsigned> FirstMaskArgument; 18620 if (Subtarget.hasVInstructions()) 18621 FirstMaskArgument = preAssignMask(Outs); 18622 18623 for (unsigned i = 0, e = Outs.size(); i != e; ++i) { 18624 MVT VT = Outs[i].VT; 18625 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; 18626 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); 18627 if (RISCV::CC_RISCV(MF.getDataLayout(), ABI, i, VT, VT, CCValAssign::Full, 18628 ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr, 18629 *this, FirstMaskArgument)) 18630 return false; 18631 } 18632 return true; 18633} 18634 18635SDValue 18636RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, 18637 bool IsVarArg, 18638 const SmallVectorImpl<ISD::OutputArg> &Outs, 18639 const SmallVectorImpl<SDValue> &OutVals, 18640 const SDLoc &DL, SelectionDAG &DAG) const { 18641 MachineFunction &MF = DAG.getMachineFunction(); 18642 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>(); 18643 18644 // Stores the assignment of the return value to a location. 18645 SmallVector<CCValAssign, 16> RVLocs; 18646 18647 // Info about the registers and stack slot. 18648 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs, 18649 *DAG.getContext()); 18650 18651 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true, 18652 nullptr, RISCV::CC_RISCV); 18653 18654 if (CallConv == CallingConv::GHC && !RVLocs.empty()) 18655 report_fatal_error("GHC functions return void only"); 18656 18657 SDValue Glue; 18658 SmallVector<SDValue, 4> RetOps(1, Chain); 18659 18660 // Copy the result values into the output registers. 18661 for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) { 18662 SDValue Val = OutVals[OutIdx]; 18663 CCValAssign &VA = RVLocs[i]; 18664 assert(VA.isRegLoc() && "Can only return in registers!"); 18665 18666 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { 18667 // Handle returning f64 on RV32D with a soft float ABI. 18668 assert(VA.isRegLoc() && "Expected return via registers"); 18669 assert(VA.needsCustom()); 18670 SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL, 18671 DAG.getVTList(MVT::i32, MVT::i32), Val); 18672 SDValue Lo = SplitF64.getValue(0); 18673 SDValue Hi = SplitF64.getValue(1); 18674 Register RegLo = VA.getLocReg(); 18675 Register RegHi = RVLocs[++i].getLocReg(); 18676 18677 if (STI.isRegisterReservedByUser(RegLo) || 18678 STI.isRegisterReservedByUser(RegHi)) 18679 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{ 18680 MF.getFunction(), 18681 "Return value register required, but has been reserved."}); 18682 18683 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue); 18684 Glue = Chain.getValue(1); 18685 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32)); 18686 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue); 18687 Glue = Chain.getValue(1); 18688 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32)); 18689 } else { 18690 // Handle a 'normal' return. 18691 Val = convertValVTToLocVT(DAG, Val, VA, DL, Subtarget); 18692 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue); 18693 18694 if (STI.isRegisterReservedByUser(VA.getLocReg())) 18695 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{ 18696 MF.getFunction(), 18697 "Return value register required, but has been reserved."}); 18698 18699 // Guarantee that all emitted copies are stuck together. 18700 Glue = Chain.getValue(1); 18701 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 18702 } 18703 } 18704 18705 RetOps[0] = Chain; // Update chain. 18706 18707 // Add the glue node if we have it. 18708 if (Glue.getNode()) { 18709 RetOps.push_back(Glue); 18710 } 18711 18712 if (any_of(RVLocs, 18713 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); })) 18714 MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall(); 18715 18716 unsigned RetOpc = RISCVISD::RET_GLUE; 18717 // Interrupt service routines use different return instructions. 18718 const Function &Func = DAG.getMachineFunction().getFunction(); 18719 if (Func.hasFnAttribute("interrupt")) { 18720 if (!Func.getReturnType()->isVoidTy()) 18721 report_fatal_error( 18722 "Functions with the interrupt attribute must have void return type!"); 18723 18724 MachineFunction &MF = DAG.getMachineFunction(); 18725 StringRef Kind = 18726 MF.getFunction().getFnAttribute("interrupt").getValueAsString(); 18727 18728 if (Kind == "supervisor") 18729 RetOpc = RISCVISD::SRET_GLUE; 18730 else 18731 RetOpc = RISCVISD::MRET_GLUE; 18732 } 18733 18734 return DAG.getNode(RetOpc, DL, MVT::Other, RetOps); 18735} 18736 18737void RISCVTargetLowering::validateCCReservedRegs( 18738 const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs, 18739 MachineFunction &MF) const { 18740 const Function &F = MF.getFunction(); 18741 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>(); 18742 18743 if (llvm::any_of(Regs, [&STI](auto Reg) { 18744 return STI.isRegisterReservedByUser(Reg.first); 18745 })) 18746 F.getContext().diagnose(DiagnosticInfoUnsupported{ 18747 F, "Argument register required, but has been reserved."}); 18748} 18749 18750// Check if the result of the node is only used as a return value, as 18751// otherwise we can't perform a tail-call. 18752bool RISCVTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { 18753 if (N->getNumValues() != 1) 18754 return false; 18755 if (!N->hasNUsesOfValue(1, 0)) 18756 return false; 18757 18758 SDNode *Copy = *N->use_begin(); 18759 18760 if (Copy->getOpcode() == ISD::BITCAST) { 18761 return isUsedByReturnOnly(Copy, Chain); 18762 } 18763 18764 // TODO: Handle additional opcodes in order to support tail-calling libcalls 18765 // with soft float ABIs. 18766 if (Copy->getOpcode() != ISD::CopyToReg) { 18767 return false; 18768 } 18769 18770 // If the ISD::CopyToReg has a glue operand, we conservatively assume it 18771 // isn't safe to perform a tail call. 18772 if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() == MVT::Glue) 18773 return false; 18774 18775 // The copy must be used by a RISCVISD::RET_GLUE, and nothing else. 18776 bool HasRet = false; 18777 for (SDNode *Node : Copy->uses()) { 18778 if (Node->getOpcode() != RISCVISD::RET_GLUE) 18779 return false; 18780 HasRet = true; 18781 } 18782 if (!HasRet) 18783 return false; 18784 18785 Chain = Copy->getOperand(0); 18786 return true; 18787} 18788 18789bool RISCVTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { 18790 return CI->isTailCall(); 18791} 18792 18793const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const { 18794#define NODE_NAME_CASE(NODE) \ 18795 case RISCVISD::NODE: \ 18796 return "RISCVISD::" #NODE; 18797 // clang-format off 18798 switch ((RISCVISD::NodeType)Opcode) { 18799 case RISCVISD::FIRST_NUMBER: 18800 break; 18801 NODE_NAME_CASE(RET_GLUE) 18802 NODE_NAME_CASE(SRET_GLUE) 18803 NODE_NAME_CASE(MRET_GLUE) 18804 NODE_NAME_CASE(CALL) 18805 NODE_NAME_CASE(SELECT_CC) 18806 NODE_NAME_CASE(BR_CC) 18807 NODE_NAME_CASE(BuildPairF64) 18808 NODE_NAME_CASE(SplitF64) 18809 NODE_NAME_CASE(TAIL) 18810 NODE_NAME_CASE(ADD_LO) 18811 NODE_NAME_CASE(HI) 18812 NODE_NAME_CASE(LLA) 18813 NODE_NAME_CASE(ADD_TPREL) 18814 NODE_NAME_CASE(MULHSU) 18815 NODE_NAME_CASE(SLLW) 18816 NODE_NAME_CASE(SRAW) 18817 NODE_NAME_CASE(SRLW) 18818 NODE_NAME_CASE(DIVW) 18819 NODE_NAME_CASE(DIVUW) 18820 NODE_NAME_CASE(REMUW) 18821 NODE_NAME_CASE(ROLW) 18822 NODE_NAME_CASE(RORW) 18823 NODE_NAME_CASE(CLZW) 18824 NODE_NAME_CASE(CTZW) 18825 NODE_NAME_CASE(ABSW) 18826 NODE_NAME_CASE(FMV_H_X) 18827 NODE_NAME_CASE(FMV_X_ANYEXTH) 18828 NODE_NAME_CASE(FMV_X_SIGNEXTH) 18829 NODE_NAME_CASE(FMV_W_X_RV64) 18830 NODE_NAME_CASE(FMV_X_ANYEXTW_RV64) 18831 NODE_NAME_CASE(FCVT_X) 18832 NODE_NAME_CASE(FCVT_XU) 18833 NODE_NAME_CASE(FCVT_W_RV64) 18834 NODE_NAME_CASE(FCVT_WU_RV64) 18835 NODE_NAME_CASE(STRICT_FCVT_W_RV64) 18836 NODE_NAME_CASE(STRICT_FCVT_WU_RV64) 18837 NODE_NAME_CASE(FP_ROUND_BF16) 18838 NODE_NAME_CASE(FP_EXTEND_BF16) 18839 NODE_NAME_CASE(FROUND) 18840 NODE_NAME_CASE(FCLASS) 18841 NODE_NAME_CASE(FMAX) 18842 NODE_NAME_CASE(FMIN) 18843 NODE_NAME_CASE(READ_CYCLE_WIDE) 18844 NODE_NAME_CASE(BREV8) 18845 NODE_NAME_CASE(ORC_B) 18846 NODE_NAME_CASE(ZIP) 18847 NODE_NAME_CASE(UNZIP) 18848 NODE_NAME_CASE(CLMUL) 18849 NODE_NAME_CASE(CLMULH) 18850 NODE_NAME_CASE(CLMULR) 18851 NODE_NAME_CASE(SHA256SIG0) 18852 NODE_NAME_CASE(SHA256SIG1) 18853 NODE_NAME_CASE(SHA256SUM0) 18854 NODE_NAME_CASE(SHA256SUM1) 18855 NODE_NAME_CASE(SM4KS) 18856 NODE_NAME_CASE(SM4ED) 18857 NODE_NAME_CASE(SM3P0) 18858 NODE_NAME_CASE(SM3P1) 18859 NODE_NAME_CASE(TH_LWD) 18860 NODE_NAME_CASE(TH_LWUD) 18861 NODE_NAME_CASE(TH_LDD) 18862 NODE_NAME_CASE(TH_SWD) 18863 NODE_NAME_CASE(TH_SDD) 18864 NODE_NAME_CASE(VMV_V_V_VL) 18865 NODE_NAME_CASE(VMV_V_X_VL) 18866 NODE_NAME_CASE(VFMV_V_F_VL) 18867 NODE_NAME_CASE(VMV_X_S) 18868 NODE_NAME_CASE(VMV_S_X_VL) 18869 NODE_NAME_CASE(VFMV_S_F_VL) 18870 NODE_NAME_CASE(SPLAT_VECTOR_SPLIT_I64_VL) 18871 NODE_NAME_CASE(READ_VLENB) 18872 NODE_NAME_CASE(TRUNCATE_VECTOR_VL) 18873 NODE_NAME_CASE(VSLIDEUP_VL) 18874 NODE_NAME_CASE(VSLIDE1UP_VL) 18875 NODE_NAME_CASE(VSLIDEDOWN_VL) 18876 NODE_NAME_CASE(VSLIDE1DOWN_VL) 18877 NODE_NAME_CASE(VFSLIDE1UP_VL) 18878 NODE_NAME_CASE(VFSLIDE1DOWN_VL) 18879 NODE_NAME_CASE(VID_VL) 18880 NODE_NAME_CASE(VFNCVT_ROD_VL) 18881 NODE_NAME_CASE(VECREDUCE_ADD_VL) 18882 NODE_NAME_CASE(VECREDUCE_UMAX_VL) 18883 NODE_NAME_CASE(VECREDUCE_SMAX_VL) 18884 NODE_NAME_CASE(VECREDUCE_UMIN_VL) 18885 NODE_NAME_CASE(VECREDUCE_SMIN_VL) 18886 NODE_NAME_CASE(VECREDUCE_AND_VL) 18887 NODE_NAME_CASE(VECREDUCE_OR_VL) 18888 NODE_NAME_CASE(VECREDUCE_XOR_VL) 18889 NODE_NAME_CASE(VECREDUCE_FADD_VL) 18890 NODE_NAME_CASE(VECREDUCE_SEQ_FADD_VL) 18891 NODE_NAME_CASE(VECREDUCE_FMIN_VL) 18892 NODE_NAME_CASE(VECREDUCE_FMAX_VL) 18893 NODE_NAME_CASE(ADD_VL) 18894 NODE_NAME_CASE(AND_VL) 18895 NODE_NAME_CASE(MUL_VL) 18896 NODE_NAME_CASE(OR_VL) 18897 NODE_NAME_CASE(SDIV_VL) 18898 NODE_NAME_CASE(SHL_VL) 18899 NODE_NAME_CASE(SREM_VL) 18900 NODE_NAME_CASE(SRA_VL) 18901 NODE_NAME_CASE(SRL_VL) 18902 NODE_NAME_CASE(ROTL_VL) 18903 NODE_NAME_CASE(ROTR_VL) 18904 NODE_NAME_CASE(SUB_VL) 18905 NODE_NAME_CASE(UDIV_VL) 18906 NODE_NAME_CASE(UREM_VL) 18907 NODE_NAME_CASE(XOR_VL) 18908 NODE_NAME_CASE(AVGFLOORU_VL) 18909 NODE_NAME_CASE(AVGCEILU_VL) 18910 NODE_NAME_CASE(SADDSAT_VL) 18911 NODE_NAME_CASE(UADDSAT_VL) 18912 NODE_NAME_CASE(SSUBSAT_VL) 18913 NODE_NAME_CASE(USUBSAT_VL) 18914 NODE_NAME_CASE(FADD_VL) 18915 NODE_NAME_CASE(FSUB_VL) 18916 NODE_NAME_CASE(FMUL_VL) 18917 NODE_NAME_CASE(FDIV_VL) 18918 NODE_NAME_CASE(FNEG_VL) 18919 NODE_NAME_CASE(FABS_VL) 18920 NODE_NAME_CASE(FSQRT_VL) 18921 NODE_NAME_CASE(FCLASS_VL) 18922 NODE_NAME_CASE(VFMADD_VL) 18923 NODE_NAME_CASE(VFNMADD_VL) 18924 NODE_NAME_CASE(VFMSUB_VL) 18925 NODE_NAME_CASE(VFNMSUB_VL) 18926 NODE_NAME_CASE(VFWMADD_VL) 18927 NODE_NAME_CASE(VFWNMADD_VL) 18928 NODE_NAME_CASE(VFWMSUB_VL) 18929 NODE_NAME_CASE(VFWNMSUB_VL) 18930 NODE_NAME_CASE(FCOPYSIGN_VL) 18931 NODE_NAME_CASE(SMIN_VL) 18932 NODE_NAME_CASE(SMAX_VL) 18933 NODE_NAME_CASE(UMIN_VL) 18934 NODE_NAME_CASE(UMAX_VL) 18935 NODE_NAME_CASE(BITREVERSE_VL) 18936 NODE_NAME_CASE(BSWAP_VL) 18937 NODE_NAME_CASE(CTLZ_VL) 18938 NODE_NAME_CASE(CTTZ_VL) 18939 NODE_NAME_CASE(CTPOP_VL) 18940 NODE_NAME_CASE(VFMIN_VL) 18941 NODE_NAME_CASE(VFMAX_VL) 18942 NODE_NAME_CASE(MULHS_VL) 18943 NODE_NAME_CASE(MULHU_VL) 18944 NODE_NAME_CASE(VFCVT_RTZ_X_F_VL) 18945 NODE_NAME_CASE(VFCVT_RTZ_XU_F_VL) 18946 NODE_NAME_CASE(VFCVT_RM_X_F_VL) 18947 NODE_NAME_CASE(VFCVT_RM_XU_F_VL) 18948 NODE_NAME_CASE(VFCVT_X_F_VL) 18949 NODE_NAME_CASE(VFCVT_XU_F_VL) 18950 NODE_NAME_CASE(VFROUND_NOEXCEPT_VL) 18951 NODE_NAME_CASE(SINT_TO_FP_VL) 18952 NODE_NAME_CASE(UINT_TO_FP_VL) 18953 NODE_NAME_CASE(VFCVT_RM_F_XU_VL) 18954 NODE_NAME_CASE(VFCVT_RM_F_X_VL) 18955 NODE_NAME_CASE(FP_EXTEND_VL) 18956 NODE_NAME_CASE(FP_ROUND_VL) 18957 NODE_NAME_CASE(STRICT_FADD_VL) 18958 NODE_NAME_CASE(STRICT_FSUB_VL) 18959 NODE_NAME_CASE(STRICT_FMUL_VL) 18960 NODE_NAME_CASE(STRICT_FDIV_VL) 18961 NODE_NAME_CASE(STRICT_FSQRT_VL) 18962 NODE_NAME_CASE(STRICT_VFMADD_VL) 18963 NODE_NAME_CASE(STRICT_VFNMADD_VL) 18964 NODE_NAME_CASE(STRICT_VFMSUB_VL) 18965 NODE_NAME_CASE(STRICT_VFNMSUB_VL) 18966 NODE_NAME_CASE(STRICT_FP_ROUND_VL) 18967 NODE_NAME_CASE(STRICT_FP_EXTEND_VL) 18968 NODE_NAME_CASE(STRICT_VFNCVT_ROD_VL) 18969 NODE_NAME_CASE(STRICT_SINT_TO_FP_VL) 18970 NODE_NAME_CASE(STRICT_UINT_TO_FP_VL) 18971 NODE_NAME_CASE(STRICT_VFCVT_RM_X_F_VL) 18972 NODE_NAME_CASE(STRICT_VFCVT_RTZ_X_F_VL) 18973 NODE_NAME_CASE(STRICT_VFCVT_RTZ_XU_F_VL) 18974 NODE_NAME_CASE(STRICT_FSETCC_VL) 18975 NODE_NAME_CASE(STRICT_FSETCCS_VL) 18976 NODE_NAME_CASE(STRICT_VFROUND_NOEXCEPT_VL) 18977 NODE_NAME_CASE(VWMUL_VL) 18978 NODE_NAME_CASE(VWMULU_VL) 18979 NODE_NAME_CASE(VWMULSU_VL) 18980 NODE_NAME_CASE(VWADD_VL) 18981 NODE_NAME_CASE(VWADDU_VL) 18982 NODE_NAME_CASE(VWSUB_VL) 18983 NODE_NAME_CASE(VWSUBU_VL) 18984 NODE_NAME_CASE(VWADD_W_VL) 18985 NODE_NAME_CASE(VWADDU_W_VL) 18986 NODE_NAME_CASE(VWSUB_W_VL) 18987 NODE_NAME_CASE(VWSUBU_W_VL) 18988 NODE_NAME_CASE(VWSLL_VL) 18989 NODE_NAME_CASE(VFWMUL_VL) 18990 NODE_NAME_CASE(VFWADD_VL) 18991 NODE_NAME_CASE(VFWSUB_VL) 18992 NODE_NAME_CASE(VFWADD_W_VL) 18993 NODE_NAME_CASE(VFWSUB_W_VL) 18994 NODE_NAME_CASE(VWMACC_VL) 18995 NODE_NAME_CASE(VWMACCU_VL) 18996 NODE_NAME_CASE(VWMACCSU_VL) 18997 NODE_NAME_CASE(VNSRL_VL) 18998 NODE_NAME_CASE(SETCC_VL) 18999 NODE_NAME_CASE(VMERGE_VL) 19000 NODE_NAME_CASE(VMAND_VL) 19001 NODE_NAME_CASE(VMOR_VL) 19002 NODE_NAME_CASE(VMXOR_VL) 19003 NODE_NAME_CASE(VMCLR_VL) 19004 NODE_NAME_CASE(VMSET_VL) 19005 NODE_NAME_CASE(VRGATHER_VX_VL) 19006 NODE_NAME_CASE(VRGATHER_VV_VL) 19007 NODE_NAME_CASE(VRGATHEREI16_VV_VL) 19008 NODE_NAME_CASE(VSEXT_VL) 19009 NODE_NAME_CASE(VZEXT_VL) 19010 NODE_NAME_CASE(VCPOP_VL) 19011 NODE_NAME_CASE(VFIRST_VL) 19012 NODE_NAME_CASE(READ_CSR) 19013 NODE_NAME_CASE(WRITE_CSR) 19014 NODE_NAME_CASE(SWAP_CSR) 19015 NODE_NAME_CASE(CZERO_EQZ) 19016 NODE_NAME_CASE(CZERO_NEZ) 19017 } 19018 // clang-format on 19019 return nullptr; 19020#undef NODE_NAME_CASE 19021} 19022 19023/// getConstraintType - Given a constraint letter, return the type of 19024/// constraint it is for this target. 19025RISCVTargetLowering::ConstraintType 19026RISCVTargetLowering::getConstraintType(StringRef Constraint) const { 19027 if (Constraint.size() == 1) { 19028 switch (Constraint[0]) { 19029 default: 19030 break; 19031 case 'f': 19032 return C_RegisterClass; 19033 case 'I': 19034 case 'J': 19035 case 'K': 19036 return C_Immediate; 19037 case 'A': 19038 return C_Memory; 19039 case 'S': // A symbolic address 19040 return C_Other; 19041 } 19042 } else { 19043 if (Constraint == "vr" || Constraint == "vm") 19044 return C_RegisterClass; 19045 } 19046 return TargetLowering::getConstraintType(Constraint); 19047} 19048 19049std::pair<unsigned, const TargetRegisterClass *> 19050RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 19051 StringRef Constraint, 19052 MVT VT) const { 19053 // First, see if this is a constraint that directly corresponds to a RISC-V 19054 // register class. 19055 if (Constraint.size() == 1) { 19056 switch (Constraint[0]) { 19057 case 'r': 19058 // TODO: Support fixed vectors up to XLen for P extension? 19059 if (VT.isVector()) 19060 break; 19061 if (VT == MVT::f16 && Subtarget.hasStdExtZhinxmin()) 19062 return std::make_pair(0U, &RISCV::GPRF16RegClass); 19063 if (VT == MVT::f32 && Subtarget.hasStdExtZfinx()) 19064 return std::make_pair(0U, &RISCV::GPRF32RegClass); 19065 if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit()) 19066 return std::make_pair(0U, &RISCV::GPRPairRegClass); 19067 return std::make_pair(0U, &RISCV::GPRNoX0RegClass); 19068 case 'f': 19069 if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16) 19070 return std::make_pair(0U, &RISCV::FPR16RegClass); 19071 if (Subtarget.hasStdExtF() && VT == MVT::f32) 19072 return std::make_pair(0U, &RISCV::FPR32RegClass); 19073 if (Subtarget.hasStdExtD() && VT == MVT::f64) 19074 return std::make_pair(0U, &RISCV::FPR64RegClass); 19075 break; 19076 default: 19077 break; 19078 } 19079 } else if (Constraint == "vr") { 19080 for (const auto *RC : {&RISCV::VRRegClass, &RISCV::VRM2RegClass, 19081 &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) { 19082 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) 19083 return std::make_pair(0U, RC); 19084 } 19085 } else if (Constraint == "vm") { 19086 if (TRI->isTypeLegalForClass(RISCV::VMV0RegClass, VT.SimpleTy)) 19087 return std::make_pair(0U, &RISCV::VMV0RegClass); 19088 } 19089 19090 // Clang will correctly decode the usage of register name aliases into their 19091 // official names. However, other frontends like `rustc` do not. This allows 19092 // users of these frontends to use the ABI names for registers in LLVM-style 19093 // register constraints. 19094 unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower()) 19095 .Case("{zero}", RISCV::X0) 19096 .Case("{ra}", RISCV::X1) 19097 .Case("{sp}", RISCV::X2) 19098 .Case("{gp}", RISCV::X3) 19099 .Case("{tp}", RISCV::X4) 19100 .Case("{t0}", RISCV::X5) 19101 .Case("{t1}", RISCV::X6) 19102 .Case("{t2}", RISCV::X7) 19103 .Cases("{s0}", "{fp}", RISCV::X8) 19104 .Case("{s1}", RISCV::X9) 19105 .Case("{a0}", RISCV::X10) 19106 .Case("{a1}", RISCV::X11) 19107 .Case("{a2}", RISCV::X12) 19108 .Case("{a3}", RISCV::X13) 19109 .Case("{a4}", RISCV::X14) 19110 .Case("{a5}", RISCV::X15) 19111 .Case("{a6}", RISCV::X16) 19112 .Case("{a7}", RISCV::X17) 19113 .Case("{s2}", RISCV::X18) 19114 .Case("{s3}", RISCV::X19) 19115 .Case("{s4}", RISCV::X20) 19116 .Case("{s5}", RISCV::X21) 19117 .Case("{s6}", RISCV::X22) 19118 .Case("{s7}", RISCV::X23) 19119 .Case("{s8}", RISCV::X24) 19120 .Case("{s9}", RISCV::X25) 19121 .Case("{s10}", RISCV::X26) 19122 .Case("{s11}", RISCV::X27) 19123 .Case("{t3}", RISCV::X28) 19124 .Case("{t4}", RISCV::X29) 19125 .Case("{t5}", RISCV::X30) 19126 .Case("{t6}", RISCV::X31) 19127 .Default(RISCV::NoRegister); 19128 if (XRegFromAlias != RISCV::NoRegister) 19129 return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass); 19130 19131 // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the 19132 // TableGen record rather than the AsmName to choose registers for InlineAsm 19133 // constraints, plus we want to match those names to the widest floating point 19134 // register type available, manually select floating point registers here. 19135 // 19136 // The second case is the ABI name of the register, so that frontends can also 19137 // use the ABI names in register constraint lists. 19138 if (Subtarget.hasStdExtF()) { 19139 unsigned FReg = StringSwitch<unsigned>(Constraint.lower()) 19140 .Cases("{f0}", "{ft0}", RISCV::F0_F) 19141 .Cases("{f1}", "{ft1}", RISCV::F1_F) 19142 .Cases("{f2}", "{ft2}", RISCV::F2_F) 19143 .Cases("{f3}", "{ft3}", RISCV::F3_F) 19144 .Cases("{f4}", "{ft4}", RISCV::F4_F) 19145 .Cases("{f5}", "{ft5}", RISCV::F5_F) 19146 .Cases("{f6}", "{ft6}", RISCV::F6_F) 19147 .Cases("{f7}", "{ft7}", RISCV::F7_F) 19148 .Cases("{f8}", "{fs0}", RISCV::F8_F) 19149 .Cases("{f9}", "{fs1}", RISCV::F9_F) 19150 .Cases("{f10}", "{fa0}", RISCV::F10_F) 19151 .Cases("{f11}", "{fa1}", RISCV::F11_F) 19152 .Cases("{f12}", "{fa2}", RISCV::F12_F) 19153 .Cases("{f13}", "{fa3}", RISCV::F13_F) 19154 .Cases("{f14}", "{fa4}", RISCV::F14_F) 19155 .Cases("{f15}", "{fa5}", RISCV::F15_F) 19156 .Cases("{f16}", "{fa6}", RISCV::F16_F) 19157 .Cases("{f17}", "{fa7}", RISCV::F17_F) 19158 .Cases("{f18}", "{fs2}", RISCV::F18_F) 19159 .Cases("{f19}", "{fs3}", RISCV::F19_F) 19160 .Cases("{f20}", "{fs4}", RISCV::F20_F) 19161 .Cases("{f21}", "{fs5}", RISCV::F21_F) 19162 .Cases("{f22}", "{fs6}", RISCV::F22_F) 19163 .Cases("{f23}", "{fs7}", RISCV::F23_F) 19164 .Cases("{f24}", "{fs8}", RISCV::F24_F) 19165 .Cases("{f25}", "{fs9}", RISCV::F25_F) 19166 .Cases("{f26}", "{fs10}", RISCV::F26_F) 19167 .Cases("{f27}", "{fs11}", RISCV::F27_F) 19168 .Cases("{f28}", "{ft8}", RISCV::F28_F) 19169 .Cases("{f29}", "{ft9}", RISCV::F29_F) 19170 .Cases("{f30}", "{ft10}", RISCV::F30_F) 19171 .Cases("{f31}", "{ft11}", RISCV::F31_F) 19172 .Default(RISCV::NoRegister); 19173 if (FReg != RISCV::NoRegister) { 19174 assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg"); 19175 if (Subtarget.hasStdExtD() && (VT == MVT::f64 || VT == MVT::Other)) { 19176 unsigned RegNo = FReg - RISCV::F0_F; 19177 unsigned DReg = RISCV::F0_D + RegNo; 19178 return std::make_pair(DReg, &RISCV::FPR64RegClass); 19179 } 19180 if (VT == MVT::f32 || VT == MVT::Other) 19181 return std::make_pair(FReg, &RISCV::FPR32RegClass); 19182 if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16) { 19183 unsigned RegNo = FReg - RISCV::F0_F; 19184 unsigned HReg = RISCV::F0_H + RegNo; 19185 return std::make_pair(HReg, &RISCV::FPR16RegClass); 19186 } 19187 } 19188 } 19189 19190 if (Subtarget.hasVInstructions()) { 19191 Register VReg = StringSwitch<Register>(Constraint.lower()) 19192 .Case("{v0}", RISCV::V0) 19193 .Case("{v1}", RISCV::V1) 19194 .Case("{v2}", RISCV::V2) 19195 .Case("{v3}", RISCV::V3) 19196 .Case("{v4}", RISCV::V4) 19197 .Case("{v5}", RISCV::V5) 19198 .Case("{v6}", RISCV::V6) 19199 .Case("{v7}", RISCV::V7) 19200 .Case("{v8}", RISCV::V8) 19201 .Case("{v9}", RISCV::V9) 19202 .Case("{v10}", RISCV::V10) 19203 .Case("{v11}", RISCV::V11) 19204 .Case("{v12}", RISCV::V12) 19205 .Case("{v13}", RISCV::V13) 19206 .Case("{v14}", RISCV::V14) 19207 .Case("{v15}", RISCV::V15) 19208 .Case("{v16}", RISCV::V16) 19209 .Case("{v17}", RISCV::V17) 19210 .Case("{v18}", RISCV::V18) 19211 .Case("{v19}", RISCV::V19) 19212 .Case("{v20}", RISCV::V20) 19213 .Case("{v21}", RISCV::V21) 19214 .Case("{v22}", RISCV::V22) 19215 .Case("{v23}", RISCV::V23) 19216 .Case("{v24}", RISCV::V24) 19217 .Case("{v25}", RISCV::V25) 19218 .Case("{v26}", RISCV::V26) 19219 .Case("{v27}", RISCV::V27) 19220 .Case("{v28}", RISCV::V28) 19221 .Case("{v29}", RISCV::V29) 19222 .Case("{v30}", RISCV::V30) 19223 .Case("{v31}", RISCV::V31) 19224 .Default(RISCV::NoRegister); 19225 if (VReg != RISCV::NoRegister) { 19226 if (TRI->isTypeLegalForClass(RISCV::VMRegClass, VT.SimpleTy)) 19227 return std::make_pair(VReg, &RISCV::VMRegClass); 19228 if (TRI->isTypeLegalForClass(RISCV::VRRegClass, VT.SimpleTy)) 19229 return std::make_pair(VReg, &RISCV::VRRegClass); 19230 for (const auto *RC : 19231 {&RISCV::VRM2RegClass, &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) { 19232 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) { 19233 VReg = TRI->getMatchingSuperReg(VReg, RISCV::sub_vrm1_0, RC); 19234 return std::make_pair(VReg, RC); 19235 } 19236 } 19237 } 19238 } 19239 19240 std::pair<Register, const TargetRegisterClass *> Res = 19241 TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); 19242 19243 // If we picked one of the Zfinx register classes, remap it to the GPR class. 19244 // FIXME: When Zfinx is supported in CodeGen this will need to take the 19245 // Subtarget into account. 19246 if (Res.second == &RISCV::GPRF16RegClass || 19247 Res.second == &RISCV::GPRF32RegClass || 19248 Res.second == &RISCV::GPRPairRegClass) 19249 return std::make_pair(Res.first, &RISCV::GPRRegClass); 19250 19251 return Res; 19252} 19253 19254InlineAsm::ConstraintCode 19255RISCVTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode) const { 19256 // Currently only support length 1 constraints. 19257 if (ConstraintCode.size() == 1) { 19258 switch (ConstraintCode[0]) { 19259 case 'A': 19260 return InlineAsm::ConstraintCode::A; 19261 default: 19262 break; 19263 } 19264 } 19265 19266 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); 19267} 19268 19269void RISCVTargetLowering::LowerAsmOperandForConstraint( 19270 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops, 19271 SelectionDAG &DAG) const { 19272 // Currently only support length 1 constraints. 19273 if (Constraint.size() == 1) { 19274 switch (Constraint[0]) { 19275 case 'I': 19276 // Validate & create a 12-bit signed immediate operand. 19277 if (auto *C = dyn_cast<ConstantSDNode>(Op)) { 19278 uint64_t CVal = C->getSExtValue(); 19279 if (isInt<12>(CVal)) 19280 Ops.push_back( 19281 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT())); 19282 } 19283 return; 19284 case 'J': 19285 // Validate & create an integer zero operand. 19286 if (isNullConstant(Op)) 19287 Ops.push_back( 19288 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT())); 19289 return; 19290 case 'K': 19291 // Validate & create a 5-bit unsigned immediate operand. 19292 if (auto *C = dyn_cast<ConstantSDNode>(Op)) { 19293 uint64_t CVal = C->getZExtValue(); 19294 if (isUInt<5>(CVal)) 19295 Ops.push_back( 19296 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT())); 19297 } 19298 return; 19299 case 'S': 19300 if (const auto *GA = dyn_cast<GlobalAddressSDNode>(Op)) { 19301 Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op), 19302 GA->getValueType(0))); 19303 } else if (const auto *BA = dyn_cast<BlockAddressSDNode>(Op)) { 19304 Ops.push_back(DAG.getTargetBlockAddress(BA->getBlockAddress(), 19305 BA->getValueType(0))); 19306 } 19307 return; 19308 default: 19309 break; 19310 } 19311 } 19312 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 19313} 19314 19315Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilderBase &Builder, 19316 Instruction *Inst, 19317 AtomicOrdering Ord) const { 19318 if (Subtarget.hasStdExtZtso()) { 19319 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent) 19320 return Builder.CreateFence(Ord); 19321 return nullptr; 19322 } 19323 19324 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent) 19325 return Builder.CreateFence(Ord); 19326 if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord)) 19327 return Builder.CreateFence(AtomicOrdering::Release); 19328 return nullptr; 19329} 19330 19331Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilderBase &Builder, 19332 Instruction *Inst, 19333 AtomicOrdering Ord) const { 19334 if (Subtarget.hasStdExtZtso()) { 19335 if (isa<StoreInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent) 19336 return Builder.CreateFence(Ord); 19337 return nullptr; 19338 } 19339 19340 if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord)) 19341 return Builder.CreateFence(AtomicOrdering::Acquire); 19342 if (Subtarget.enableSeqCstTrailingFence() && isa<StoreInst>(Inst) && 19343 Ord == AtomicOrdering::SequentiallyConsistent) 19344 return Builder.CreateFence(AtomicOrdering::SequentiallyConsistent); 19345 return nullptr; 19346} 19347 19348TargetLowering::AtomicExpansionKind 19349RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { 19350 // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating 19351 // point operations can't be used in an lr/sc sequence without breaking the 19352 // forward-progress guarantee. 19353 if (AI->isFloatingPointOperation() || 19354 AI->getOperation() == AtomicRMWInst::UIncWrap || 19355 AI->getOperation() == AtomicRMWInst::UDecWrap) 19356 return AtomicExpansionKind::CmpXChg; 19357 19358 // Don't expand forced atomics, we want to have __sync libcalls instead. 19359 if (Subtarget.hasForcedAtomics()) 19360 return AtomicExpansionKind::None; 19361 19362 unsigned Size = AI->getType()->getPrimitiveSizeInBits(); 19363 if (Size == 8 || Size == 16) 19364 return AtomicExpansionKind::MaskedIntrinsic; 19365 return AtomicExpansionKind::None; 19366} 19367 19368static Intrinsic::ID 19369getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen, AtomicRMWInst::BinOp BinOp) { 19370 if (XLen == 32) { 19371 switch (BinOp) { 19372 default: 19373 llvm_unreachable("Unexpected AtomicRMW BinOp"); 19374 case AtomicRMWInst::Xchg: 19375 return Intrinsic::riscv_masked_atomicrmw_xchg_i32; 19376 case AtomicRMWInst::Add: 19377 return Intrinsic::riscv_masked_atomicrmw_add_i32; 19378 case AtomicRMWInst::Sub: 19379 return Intrinsic::riscv_masked_atomicrmw_sub_i32; 19380 case AtomicRMWInst::Nand: 19381 return Intrinsic::riscv_masked_atomicrmw_nand_i32; 19382 case AtomicRMWInst::Max: 19383 return Intrinsic::riscv_masked_atomicrmw_max_i32; 19384 case AtomicRMWInst::Min: 19385 return Intrinsic::riscv_masked_atomicrmw_min_i32; 19386 case AtomicRMWInst::UMax: 19387 return Intrinsic::riscv_masked_atomicrmw_umax_i32; 19388 case AtomicRMWInst::UMin: 19389 return Intrinsic::riscv_masked_atomicrmw_umin_i32; 19390 } 19391 } 19392 19393 if (XLen == 64) { 19394 switch (BinOp) { 19395 default: 19396 llvm_unreachable("Unexpected AtomicRMW BinOp"); 19397 case AtomicRMWInst::Xchg: 19398 return Intrinsic::riscv_masked_atomicrmw_xchg_i64; 19399 case AtomicRMWInst::Add: 19400 return Intrinsic::riscv_masked_atomicrmw_add_i64; 19401 case AtomicRMWInst::Sub: 19402 return Intrinsic::riscv_masked_atomicrmw_sub_i64; 19403 case AtomicRMWInst::Nand: 19404 return Intrinsic::riscv_masked_atomicrmw_nand_i64; 19405 case AtomicRMWInst::Max: 19406 return Intrinsic::riscv_masked_atomicrmw_max_i64; 19407 case AtomicRMWInst::Min: 19408 return Intrinsic::riscv_masked_atomicrmw_min_i64; 19409 case AtomicRMWInst::UMax: 19410 return Intrinsic::riscv_masked_atomicrmw_umax_i64; 19411 case AtomicRMWInst::UMin: 19412 return Intrinsic::riscv_masked_atomicrmw_umin_i64; 19413 } 19414 } 19415 19416 llvm_unreachable("Unexpected XLen\n"); 19417} 19418 19419Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic( 19420 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, 19421 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const { 19422 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace 19423 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate 19424 // mask, as this produces better code than the LR/SC loop emitted by 19425 // int_riscv_masked_atomicrmw_xchg. 19426 if (AI->getOperation() == AtomicRMWInst::Xchg && 19427 isa<ConstantInt>(AI->getValOperand())) { 19428 ConstantInt *CVal = cast<ConstantInt>(AI->getValOperand()); 19429 if (CVal->isZero()) 19430 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr, 19431 Builder.CreateNot(Mask, "Inv_Mask"), 19432 AI->getAlign(), Ord); 19433 if (CVal->isMinusOne()) 19434 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask, 19435 AI->getAlign(), Ord); 19436 } 19437 19438 unsigned XLen = Subtarget.getXLen(); 19439 Value *Ordering = 19440 Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering())); 19441 Type *Tys[] = {AlignedAddr->getType()}; 19442 Function *LrwOpScwLoop = Intrinsic::getDeclaration( 19443 AI->getModule(), 19444 getIntrinsicForMaskedAtomicRMWBinOp(XLen, AI->getOperation()), Tys); 19445 19446 if (XLen == 64) { 19447 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty()); 19448 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty()); 19449 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty()); 19450 } 19451 19452 Value *Result; 19453 19454 // Must pass the shift amount needed to sign extend the loaded value prior 19455 // to performing a signed comparison for min/max. ShiftAmt is the number of 19456 // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which 19457 // is the number of bits to left+right shift the value in order to 19458 // sign-extend. 19459 if (AI->getOperation() == AtomicRMWInst::Min || 19460 AI->getOperation() == AtomicRMWInst::Max) { 19461 const DataLayout &DL = AI->getModule()->getDataLayout(); 19462 unsigned ValWidth = 19463 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType()); 19464 Value *SextShamt = 19465 Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt); 19466 Result = Builder.CreateCall(LrwOpScwLoop, 19467 {AlignedAddr, Incr, Mask, SextShamt, Ordering}); 19468 } else { 19469 Result = 19470 Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering}); 19471 } 19472 19473 if (XLen == 64) 19474 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); 19475 return Result; 19476} 19477 19478TargetLowering::AtomicExpansionKind 19479RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR( 19480 AtomicCmpXchgInst *CI) const { 19481 // Don't expand forced atomics, we want to have __sync libcalls instead. 19482 if (Subtarget.hasForcedAtomics()) 19483 return AtomicExpansionKind::None; 19484 19485 unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits(); 19486 if (Size == 8 || Size == 16) 19487 return AtomicExpansionKind::MaskedIntrinsic; 19488 return AtomicExpansionKind::None; 19489} 19490 19491Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic( 19492 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, 19493 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const { 19494 unsigned XLen = Subtarget.getXLen(); 19495 Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord)); 19496 Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32; 19497 if (XLen == 64) { 19498 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty()); 19499 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty()); 19500 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty()); 19501 CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64; 19502 } 19503 Type *Tys[] = {AlignedAddr->getType()}; 19504 Function *MaskedCmpXchg = 19505 Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys); 19506 Value *Result = Builder.CreateCall( 19507 MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering}); 19508 if (XLen == 64) 19509 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); 19510 return Result; 19511} 19512 19513bool RISCVTargetLowering::shouldRemoveExtendFromGSIndex(SDValue Extend, 19514 EVT DataVT) const { 19515 // We have indexed loads for all legal index types. Indices are always 19516 // zero extended 19517 return Extend.getOpcode() == ISD::ZERO_EXTEND && 19518 isTypeLegal(Extend.getValueType()) && 19519 isTypeLegal(Extend.getOperand(0).getValueType()); 19520} 19521 19522bool RISCVTargetLowering::shouldConvertFpToSat(unsigned Op, EVT FPVT, 19523 EVT VT) const { 19524 if (!isOperationLegalOrCustom(Op, VT) || !FPVT.isSimple()) 19525 return false; 19526 19527 switch (FPVT.getSimpleVT().SimpleTy) { 19528 case MVT::f16: 19529 return Subtarget.hasStdExtZfhmin(); 19530 case MVT::f32: 19531 return Subtarget.hasStdExtF(); 19532 case MVT::f64: 19533 return Subtarget.hasStdExtD(); 19534 default: 19535 return false; 19536 } 19537} 19538 19539unsigned RISCVTargetLowering::getJumpTableEncoding() const { 19540 // If we are using the small code model, we can reduce size of jump table 19541 // entry to 4 bytes. 19542 if (Subtarget.is64Bit() && !isPositionIndependent() && 19543 getTargetMachine().getCodeModel() == CodeModel::Small) { 19544 return MachineJumpTableInfo::EK_Custom32; 19545 } 19546 return TargetLowering::getJumpTableEncoding(); 19547} 19548 19549const MCExpr *RISCVTargetLowering::LowerCustomJumpTableEntry( 19550 const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, 19551 unsigned uid, MCContext &Ctx) const { 19552 assert(Subtarget.is64Bit() && !isPositionIndependent() && 19553 getTargetMachine().getCodeModel() == CodeModel::Small); 19554 return MCSymbolRefExpr::create(MBB->getSymbol(), Ctx); 19555} 19556 19557bool RISCVTargetLowering::isVScaleKnownToBeAPowerOfTwo() const { 19558 // We define vscale to be VLEN/RVVBitsPerBlock. VLEN is always a power 19559 // of two >= 64, and RVVBitsPerBlock is 64. Thus, vscale must be 19560 // a power of two as well. 19561 // FIXME: This doesn't work for zve32, but that's already broken 19562 // elsewhere for the same reason. 19563 assert(Subtarget.getRealMinVLen() >= 64 && "zve32* unsupported"); 19564 static_assert(RISCV::RVVBitsPerBlock == 64, 19565 "RVVBitsPerBlock changed, audit needed"); 19566 return true; 19567} 19568 19569bool RISCVTargetLowering::getIndexedAddressParts(SDNode *Op, SDValue &Base, 19570 SDValue &Offset, 19571 ISD::MemIndexedMode &AM, 19572 SelectionDAG &DAG) const { 19573 // Target does not support indexed loads. 19574 if (!Subtarget.hasVendorXTHeadMemIdx()) 19575 return false; 19576 19577 if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB) 19578 return false; 19579 19580 Base = Op->getOperand(0); 19581 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) { 19582 int64_t RHSC = RHS->getSExtValue(); 19583 if (Op->getOpcode() == ISD::SUB) 19584 RHSC = -(uint64_t)RHSC; 19585 19586 // The constants that can be encoded in the THeadMemIdx instructions 19587 // are of the form (sign_extend(imm5) << imm2). 19588 bool isLegalIndexedOffset = false; 19589 for (unsigned i = 0; i < 4; i++) 19590 if (isInt<5>(RHSC >> i) && ((RHSC % (1LL << i)) == 0)) { 19591 isLegalIndexedOffset = true; 19592 break; 19593 } 19594 19595 if (!isLegalIndexedOffset) 19596 return false; 19597 19598 Offset = Op->getOperand(1); 19599 return true; 19600 } 19601 19602 return false; 19603} 19604 19605bool RISCVTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, 19606 SDValue &Offset, 19607 ISD::MemIndexedMode &AM, 19608 SelectionDAG &DAG) const { 19609 EVT VT; 19610 SDValue Ptr; 19611 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 19612 VT = LD->getMemoryVT(); 19613 Ptr = LD->getBasePtr(); 19614 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 19615 VT = ST->getMemoryVT(); 19616 Ptr = ST->getBasePtr(); 19617 } else 19618 return false; 19619 19620 if (!getIndexedAddressParts(Ptr.getNode(), Base, Offset, AM, DAG)) 19621 return false; 19622 19623 AM = ISD::PRE_INC; 19624 return true; 19625} 19626 19627bool RISCVTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, 19628 SDValue &Base, 19629 SDValue &Offset, 19630 ISD::MemIndexedMode &AM, 19631 SelectionDAG &DAG) const { 19632 EVT VT; 19633 SDValue Ptr; 19634 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 19635 VT = LD->getMemoryVT(); 19636 Ptr = LD->getBasePtr(); 19637 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 19638 VT = ST->getMemoryVT(); 19639 Ptr = ST->getBasePtr(); 19640 } else 19641 return false; 19642 19643 if (!getIndexedAddressParts(Op, Base, Offset, AM, DAG)) 19644 return false; 19645 // Post-indexing updates the base, so it's not a valid transform 19646 // if that's not the same as the load's pointer. 19647 if (Ptr != Base) 19648 return false; 19649 19650 AM = ISD::POST_INC; 19651 return true; 19652} 19653 19654bool RISCVTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, 19655 EVT VT) const { 19656 EVT SVT = VT.getScalarType(); 19657 19658 if (!SVT.isSimple()) 19659 return false; 19660 19661 switch (SVT.getSimpleVT().SimpleTy) { 19662 case MVT::f16: 19663 return VT.isVector() ? Subtarget.hasVInstructionsF16() 19664 : Subtarget.hasStdExtZfhOrZhinx(); 19665 case MVT::f32: 19666 return Subtarget.hasStdExtFOrZfinx(); 19667 case MVT::f64: 19668 return Subtarget.hasStdExtDOrZdinx(); 19669 default: 19670 break; 19671 } 19672 19673 return false; 19674} 19675 19676ISD::NodeType RISCVTargetLowering::getExtendForAtomicCmpSwapArg() const { 19677 // Zacas will use amocas.w which does not require extension. 19678 return Subtarget.hasStdExtZacas() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND; 19679} 19680 19681Register RISCVTargetLowering::getExceptionPointerRegister( 19682 const Constant *PersonalityFn) const { 19683 return RISCV::X10; 19684} 19685 19686Register RISCVTargetLowering::getExceptionSelectorRegister( 19687 const Constant *PersonalityFn) const { 19688 return RISCV::X11; 19689} 19690 19691bool RISCVTargetLowering::shouldExtendTypeInLibCall(EVT Type) const { 19692 // Return false to suppress the unnecessary extensions if the LibCall 19693 // arguments or return value is a float narrower than XLEN on a soft FP ABI. 19694 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() && 19695 Type.getSizeInBits() < Subtarget.getXLen())) 19696 return false; 19697 19698 return true; 19699} 19700 19701bool RISCVTargetLowering::shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const { 19702 if (Subtarget.is64Bit() && Type == MVT::i32) 19703 return true; 19704 19705 return IsSigned; 19706} 19707 19708bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT, 19709 SDValue C) const { 19710 // Check integral scalar types. 19711 const bool HasExtMOrZmmul = 19712 Subtarget.hasStdExtM() || Subtarget.hasStdExtZmmul(); 19713 if (!VT.isScalarInteger()) 19714 return false; 19715 19716 // Omit the optimization if the sub target has the M extension and the data 19717 // size exceeds XLen. 19718 if (HasExtMOrZmmul && VT.getSizeInBits() > Subtarget.getXLen()) 19719 return false; 19720 19721 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) { 19722 // Break the MUL to a SLLI and an ADD/SUB. 19723 const APInt &Imm = ConstNode->getAPIntValue(); 19724 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() || 19725 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2()) 19726 return true; 19727 19728 // Optimize the MUL to (SH*ADD x, (SLLI x, bits)) if Imm is not simm12. 19729 if (Subtarget.hasStdExtZba() && !Imm.isSignedIntN(12) && 19730 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() || 19731 (Imm - 8).isPowerOf2())) 19732 return true; 19733 19734 // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs 19735 // a pair of LUI/ADDI. 19736 if (!Imm.isSignedIntN(12) && Imm.countr_zero() < 12 && 19737 ConstNode->hasOneUse()) { 19738 APInt ImmS = Imm.ashr(Imm.countr_zero()); 19739 if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() || 19740 (1 - ImmS).isPowerOf2()) 19741 return true; 19742 } 19743 } 19744 19745 return false; 19746} 19747 19748bool RISCVTargetLowering::isMulAddWithConstProfitable(SDValue AddNode, 19749 SDValue ConstNode) const { 19750 // Let the DAGCombiner decide for vectors. 19751 EVT VT = AddNode.getValueType(); 19752 if (VT.isVector()) 19753 return true; 19754 19755 // Let the DAGCombiner decide for larger types. 19756 if (VT.getScalarSizeInBits() > Subtarget.getXLen()) 19757 return true; 19758 19759 // It is worse if c1 is simm12 while c1*c2 is not. 19760 ConstantSDNode *C1Node = cast<ConstantSDNode>(AddNode.getOperand(1)); 19761 ConstantSDNode *C2Node = cast<ConstantSDNode>(ConstNode); 19762 const APInt &C1 = C1Node->getAPIntValue(); 19763 const APInt &C2 = C2Node->getAPIntValue(); 19764 if (C1.isSignedIntN(12) && !(C1 * C2).isSignedIntN(12)) 19765 return false; 19766 19767 // Default to true and let the DAGCombiner decide. 19768 return true; 19769} 19770 19771bool RISCVTargetLowering::allowsMisalignedMemoryAccesses( 19772 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags, 19773 unsigned *Fast) const { 19774 if (!VT.isVector()) { 19775 if (Fast) 19776 *Fast = Subtarget.hasFastUnalignedAccess() || 19777 Subtarget.enableUnalignedScalarMem(); 19778 return Subtarget.hasFastUnalignedAccess() || 19779 Subtarget.enableUnalignedScalarMem(); 19780 } 19781 19782 // All vector implementations must support element alignment 19783 EVT ElemVT = VT.getVectorElementType(); 19784 if (Alignment >= ElemVT.getStoreSize()) { 19785 if (Fast) 19786 *Fast = 1; 19787 return true; 19788 } 19789 19790 // Note: We lower an unmasked unaligned vector access to an equally sized 19791 // e8 element type access. Given this, we effectively support all unmasked 19792 // misaligned accesses. TODO: Work through the codegen implications of 19793 // allowing such accesses to be formed, and considered fast. 19794 if (Fast) 19795 *Fast = Subtarget.hasFastUnalignedAccess(); 19796 return Subtarget.hasFastUnalignedAccess(); 19797} 19798 19799 19800EVT RISCVTargetLowering::getOptimalMemOpType(const MemOp &Op, 19801 const AttributeList &FuncAttributes) const { 19802 if (!Subtarget.hasVInstructions()) 19803 return MVT::Other; 19804 19805 if (FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat)) 19806 return MVT::Other; 19807 19808 // We use LMUL1 memory operations here for a non-obvious reason. Our caller 19809 // has an expansion threshold, and we want the number of hardware memory 19810 // operations to correspond roughly to that threshold. LMUL>1 operations 19811 // are typically expanded linearly internally, and thus correspond to more 19812 // than one actual memory operation. Note that store merging and load 19813 // combining will typically form larger LMUL operations from the LMUL1 19814 // operations emitted here, and that's okay because combining isn't 19815 // introducing new memory operations; it's just merging existing ones. 19816 const unsigned MinVLenInBytes = Subtarget.getRealMinVLen()/8; 19817 if (Op.size() < MinVLenInBytes) 19818 // TODO: Figure out short memops. For the moment, do the default thing 19819 // which ends up using scalar sequences. 19820 return MVT::Other; 19821 19822 // Prefer i8 for non-zero memset as it allows us to avoid materializing 19823 // a large scalar constant and instead use vmv.v.x/i to do the 19824 // broadcast. For everything else, prefer ELenVT to minimize VL and thus 19825 // maximize the chance we can encode the size in the vsetvli. 19826 MVT ELenVT = MVT::getIntegerVT(Subtarget.getELen()); 19827 MVT PreferredVT = (Op.isMemset() && !Op.isZeroMemset()) ? MVT::i8 : ELenVT; 19828 19829 // Do we have sufficient alignment for our preferred VT? If not, revert 19830 // to largest size allowed by our alignment criteria. 19831 if (PreferredVT != MVT::i8 && !Subtarget.hasFastUnalignedAccess()) { 19832 Align RequiredAlign(PreferredVT.getStoreSize()); 19833 if (Op.isFixedDstAlign()) 19834 RequiredAlign = std::min(RequiredAlign, Op.getDstAlign()); 19835 if (Op.isMemcpy()) 19836 RequiredAlign = std::min(RequiredAlign, Op.getSrcAlign()); 19837 PreferredVT = MVT::getIntegerVT(RequiredAlign.value() * 8); 19838 } 19839 return MVT::getVectorVT(PreferredVT, MinVLenInBytes/PreferredVT.getStoreSize()); 19840} 19841 19842bool RISCVTargetLowering::splitValueIntoRegisterParts( 19843 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, 19844 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const { 19845 bool IsABIRegCopy = CC.has_value(); 19846 EVT ValueVT = Val.getValueType(); 19847 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) && 19848 PartVT == MVT::f32) { 19849 // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float 19850 // nan, and cast to f32. 19851 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val); 19852 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val); 19853 Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val, 19854 DAG.getConstant(0xFFFF0000, DL, MVT::i32)); 19855 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val); 19856 Parts[0] = Val; 19857 return true; 19858 } 19859 19860 if (ValueVT.isScalableVector() && PartVT.isScalableVector()) { 19861 LLVMContext &Context = *DAG.getContext(); 19862 EVT ValueEltVT = ValueVT.getVectorElementType(); 19863 EVT PartEltVT = PartVT.getVectorElementType(); 19864 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue(); 19865 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue(); 19866 if (PartVTBitSize % ValueVTBitSize == 0) { 19867 assert(PartVTBitSize >= ValueVTBitSize); 19868 // If the element types are different, bitcast to the same element type of 19869 // PartVT first. 19870 // Give an example here, we want copy a <vscale x 1 x i8> value to 19871 // <vscale x 4 x i16>. 19872 // We need to convert <vscale x 1 x i8> to <vscale x 8 x i8> by insert 19873 // subvector, then we can bitcast to <vscale x 4 x i16>. 19874 if (ValueEltVT != PartEltVT) { 19875 if (PartVTBitSize > ValueVTBitSize) { 19876 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits(); 19877 assert(Count != 0 && "The number of element should not be zero."); 19878 EVT SameEltTypeVT = 19879 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true); 19880 Val = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, SameEltTypeVT, 19881 DAG.getUNDEF(SameEltTypeVT), Val, 19882 DAG.getVectorIdxConstant(0, DL)); 19883 } 19884 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); 19885 } else { 19886 Val = 19887 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, PartVT, DAG.getUNDEF(PartVT), 19888 Val, DAG.getVectorIdxConstant(0, DL)); 19889 } 19890 Parts[0] = Val; 19891 return true; 19892 } 19893 } 19894 return false; 19895} 19896 19897SDValue RISCVTargetLowering::joinRegisterPartsIntoValue( 19898 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, 19899 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const { 19900 bool IsABIRegCopy = CC.has_value(); 19901 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) && 19902 PartVT == MVT::f32) { 19903 SDValue Val = Parts[0]; 19904 19905 // Cast the f32 to i32, truncate to i16, and cast back to [b]f16. 19906 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val); 19907 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val); 19908 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); 19909 return Val; 19910 } 19911 19912 if (ValueVT.isScalableVector() && PartVT.isScalableVector()) { 19913 LLVMContext &Context = *DAG.getContext(); 19914 SDValue Val = Parts[0]; 19915 EVT ValueEltVT = ValueVT.getVectorElementType(); 19916 EVT PartEltVT = PartVT.getVectorElementType(); 19917 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue(); 19918 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue(); 19919 if (PartVTBitSize % ValueVTBitSize == 0) { 19920 assert(PartVTBitSize >= ValueVTBitSize); 19921 EVT SameEltTypeVT = ValueVT; 19922 // If the element types are different, convert it to the same element type 19923 // of PartVT. 19924 // Give an example here, we want copy a <vscale x 1 x i8> value from 19925 // <vscale x 4 x i16>. 19926 // We need to convert <vscale x 4 x i16> to <vscale x 8 x i8> first, 19927 // then we can extract <vscale x 1 x i8>. 19928 if (ValueEltVT != PartEltVT) { 19929 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits(); 19930 assert(Count != 0 && "The number of element should not be zero."); 19931 SameEltTypeVT = 19932 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true); 19933 Val = DAG.getNode(ISD::BITCAST, DL, SameEltTypeVT, Val); 19934 } 19935 Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val, 19936 DAG.getVectorIdxConstant(0, DL)); 19937 return Val; 19938 } 19939 } 19940 return SDValue(); 19941} 19942 19943bool RISCVTargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const { 19944 // When aggressively optimizing for code size, we prefer to use a div 19945 // instruction, as it is usually smaller than the alternative sequence. 19946 // TODO: Add vector division? 19947 bool OptSize = Attr.hasFnAttr(Attribute::MinSize); 19948 return OptSize && !VT.isVector(); 19949} 19950 19951bool RISCVTargetLowering::preferScalarizeSplat(SDNode *N) const { 19952 // Scalarize zero_ext and sign_ext might stop match to widening instruction in 19953 // some situation. 19954 unsigned Opc = N->getOpcode(); 19955 if (Opc == ISD::ZERO_EXTEND || Opc == ISD::SIGN_EXTEND) 19956 return false; 19957 return true; 19958} 19959 19960static Value *useTpOffset(IRBuilderBase &IRB, unsigned Offset) { 19961 Module *M = IRB.GetInsertBlock()->getParent()->getParent(); 19962 Function *ThreadPointerFunc = 19963 Intrinsic::getDeclaration(M, Intrinsic::thread_pointer); 19964 return IRB.CreateConstGEP1_32(IRB.getInt8Ty(), 19965 IRB.CreateCall(ThreadPointerFunc), Offset); 19966} 19967 19968Value *RISCVTargetLowering::getIRStackGuard(IRBuilderBase &IRB) const { 19969 // Fuchsia provides a fixed TLS slot for the stack cookie. 19970 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value. 19971 if (Subtarget.isTargetFuchsia()) 19972 return useTpOffset(IRB, -0x10); 19973 19974 return TargetLowering::getIRStackGuard(IRB); 19975} 19976 19977bool RISCVTargetLowering::isLegalInterleavedAccessType( 19978 VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace, 19979 const DataLayout &DL) const { 19980 EVT VT = getValueType(DL, VTy); 19981 // Don't lower vlseg/vsseg for vector types that can't be split. 19982 if (!isTypeLegal(VT)) 19983 return false; 19984 19985 if (!isLegalElementTypeForRVV(VT.getScalarType()) || 19986 !allowsMemoryAccessForAlignment(VTy->getContext(), DL, VT, AddrSpace, 19987 Alignment)) 19988 return false; 19989 19990 MVT ContainerVT = VT.getSimpleVT(); 19991 19992 if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) { 19993 if (!Subtarget.useRVVForFixedLengthVectors()) 19994 return false; 19995 // Sometimes the interleaved access pass picks up splats as interleaves of 19996 // one element. Don't lower these. 19997 if (FVTy->getNumElements() < 2) 19998 return false; 19999 20000 ContainerVT = getContainerForFixedLengthVector(VT.getSimpleVT()); 20001 } 20002 20003 // Need to make sure that EMUL * NFIELDS ��� 8 20004 auto [LMUL, Fractional] = RISCVVType::decodeVLMUL(getLMUL(ContainerVT)); 20005 if (Fractional) 20006 return true; 20007 return Factor * LMUL <= 8; 20008} 20009 20010bool RISCVTargetLowering::isLegalStridedLoadStore(EVT DataType, 20011 Align Alignment) const { 20012 if (!Subtarget.hasVInstructions()) 20013 return false; 20014 20015 // Only support fixed vectors if we know the minimum vector size. 20016 if (DataType.isFixedLengthVector() && !Subtarget.useRVVForFixedLengthVectors()) 20017 return false; 20018 20019 EVT ScalarType = DataType.getScalarType(); 20020 if (!isLegalElementTypeForRVV(ScalarType)) 20021 return false; 20022 20023 if (!Subtarget.hasFastUnalignedAccess() && 20024 Alignment < ScalarType.getStoreSize()) 20025 return false; 20026 20027 return true; 20028} 20029 20030static const Intrinsic::ID FixedVlsegIntrIds[] = { 20031 Intrinsic::riscv_seg2_load, Intrinsic::riscv_seg3_load, 20032 Intrinsic::riscv_seg4_load, Intrinsic::riscv_seg5_load, 20033 Intrinsic::riscv_seg6_load, Intrinsic::riscv_seg7_load, 20034 Intrinsic::riscv_seg8_load}; 20035 20036/// Lower an interleaved load into a vlsegN intrinsic. 20037/// 20038/// E.g. Lower an interleaved load (Factor = 2): 20039/// %wide.vec = load <8 x i32>, <8 x i32>* %ptr 20040/// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements 20041/// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements 20042/// 20043/// Into: 20044/// %ld2 = { <4 x i32>, <4 x i32> } call llvm.riscv.seg2.load.v4i32.p0.i64( 20045/// %ptr, i64 4) 20046/// %vec0 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 0 20047/// %vec1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 1 20048bool RISCVTargetLowering::lowerInterleavedLoad( 20049 LoadInst *LI, ArrayRef<ShuffleVectorInst *> Shuffles, 20050 ArrayRef<unsigned> Indices, unsigned Factor) const { 20051 IRBuilder<> Builder(LI); 20052 20053 auto *VTy = cast<FixedVectorType>(Shuffles[0]->getType()); 20054 if (!isLegalInterleavedAccessType(VTy, Factor, LI->getAlign(), 20055 LI->getPointerAddressSpace(), 20056 LI->getModule()->getDataLayout())) 20057 return false; 20058 20059 auto *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen()); 20060 20061 Function *VlsegNFunc = 20062 Intrinsic::getDeclaration(LI->getModule(), FixedVlsegIntrIds[Factor - 2], 20063 {VTy, LI->getPointerOperandType(), XLenTy}); 20064 20065 Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements()); 20066 20067 CallInst *VlsegN = 20068 Builder.CreateCall(VlsegNFunc, {LI->getPointerOperand(), VL}); 20069 20070 for (unsigned i = 0; i < Shuffles.size(); i++) { 20071 Value *SubVec = Builder.CreateExtractValue(VlsegN, Indices[i]); 20072 Shuffles[i]->replaceAllUsesWith(SubVec); 20073 } 20074 20075 return true; 20076} 20077 20078static const Intrinsic::ID FixedVssegIntrIds[] = { 20079 Intrinsic::riscv_seg2_store, Intrinsic::riscv_seg3_store, 20080 Intrinsic::riscv_seg4_store, Intrinsic::riscv_seg5_store, 20081 Intrinsic::riscv_seg6_store, Intrinsic::riscv_seg7_store, 20082 Intrinsic::riscv_seg8_store}; 20083 20084/// Lower an interleaved store into a vssegN intrinsic. 20085/// 20086/// E.g. Lower an interleaved store (Factor = 3): 20087/// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1, 20088/// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11> 20089/// store <12 x i32> %i.vec, <12 x i32>* %ptr 20090/// 20091/// Into: 20092/// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3> 20093/// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7> 20094/// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11> 20095/// call void llvm.riscv.seg3.store.v4i32.p0.i64(%sub.v0, %sub.v1, %sub.v2, 20096/// %ptr, i32 4) 20097/// 20098/// Note that the new shufflevectors will be removed and we'll only generate one 20099/// vsseg3 instruction in CodeGen. 20100bool RISCVTargetLowering::lowerInterleavedStore(StoreInst *SI, 20101 ShuffleVectorInst *SVI, 20102 unsigned Factor) const { 20103 IRBuilder<> Builder(SI); 20104 auto *ShuffleVTy = cast<FixedVectorType>(SVI->getType()); 20105 // Given SVI : <n*factor x ty>, then VTy : <n x ty> 20106 auto *VTy = FixedVectorType::get(ShuffleVTy->getElementType(), 20107 ShuffleVTy->getNumElements() / Factor); 20108 if (!isLegalInterleavedAccessType(VTy, Factor, SI->getAlign(), 20109 SI->getPointerAddressSpace(), 20110 SI->getModule()->getDataLayout())) 20111 return false; 20112 20113 auto *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen()); 20114 20115 Function *VssegNFunc = 20116 Intrinsic::getDeclaration(SI->getModule(), FixedVssegIntrIds[Factor - 2], 20117 {VTy, SI->getPointerOperandType(), XLenTy}); 20118 20119 auto Mask = SVI->getShuffleMask(); 20120 SmallVector<Value *, 10> Ops; 20121 20122 for (unsigned i = 0; i < Factor; i++) { 20123 Value *Shuffle = Builder.CreateShuffleVector( 20124 SVI->getOperand(0), SVI->getOperand(1), 20125 createSequentialMask(Mask[i], VTy->getNumElements(), 0)); 20126 Ops.push_back(Shuffle); 20127 } 20128 // This VL should be OK (should be executable in one vsseg instruction, 20129 // potentially under larger LMULs) because we checked that the fixed vector 20130 // type fits in isLegalInterleavedAccessType 20131 Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements()); 20132 Ops.append({SI->getPointerOperand(), VL}); 20133 20134 Builder.CreateCall(VssegNFunc, Ops); 20135 20136 return true; 20137} 20138 20139bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *DI, 20140 LoadInst *LI) const { 20141 assert(LI->isSimple()); 20142 IRBuilder<> Builder(LI); 20143 20144 // Only deinterleave2 supported at present. 20145 if (DI->getIntrinsicID() != Intrinsic::experimental_vector_deinterleave2) 20146 return false; 20147 20148 unsigned Factor = 2; 20149 20150 VectorType *VTy = cast<VectorType>(DI->getOperand(0)->getType()); 20151 VectorType *ResVTy = cast<VectorType>(DI->getType()->getContainedType(0)); 20152 20153 if (!isLegalInterleavedAccessType(ResVTy, Factor, LI->getAlign(), 20154 LI->getPointerAddressSpace(), 20155 LI->getModule()->getDataLayout())) 20156 return false; 20157 20158 Function *VlsegNFunc; 20159 Value *VL; 20160 Type *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen()); 20161 SmallVector<Value *, 10> Ops; 20162 20163 if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) { 20164 VlsegNFunc = Intrinsic::getDeclaration( 20165 LI->getModule(), FixedVlsegIntrIds[Factor - 2], 20166 {ResVTy, LI->getPointerOperandType(), XLenTy}); 20167 VL = ConstantInt::get(XLenTy, FVTy->getNumElements()); 20168 } else { 20169 static const Intrinsic::ID IntrIds[] = { 20170 Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3, 20171 Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5, 20172 Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7, 20173 Intrinsic::riscv_vlseg8}; 20174 20175 VlsegNFunc = Intrinsic::getDeclaration(LI->getModule(), IntrIds[Factor - 2], 20176 {ResVTy, XLenTy}); 20177 VL = Constant::getAllOnesValue(XLenTy); 20178 Ops.append(Factor, PoisonValue::get(ResVTy)); 20179 } 20180 20181 Ops.append({LI->getPointerOperand(), VL}); 20182 20183 Value *Vlseg = Builder.CreateCall(VlsegNFunc, Ops); 20184 DI->replaceAllUsesWith(Vlseg); 20185 20186 return true; 20187} 20188 20189bool RISCVTargetLowering::lowerInterleaveIntrinsicToStore(IntrinsicInst *II, 20190 StoreInst *SI) const { 20191 assert(SI->isSimple()); 20192 IRBuilder<> Builder(SI); 20193 20194 // Only interleave2 supported at present. 20195 if (II->getIntrinsicID() != Intrinsic::experimental_vector_interleave2) 20196 return false; 20197 20198 unsigned Factor = 2; 20199 20200 VectorType *VTy = cast<VectorType>(II->getType()); 20201 VectorType *InVTy = cast<VectorType>(II->getOperand(0)->getType()); 20202 20203 if (!isLegalInterleavedAccessType(InVTy, Factor, SI->getAlign(), 20204 SI->getPointerAddressSpace(), 20205 SI->getModule()->getDataLayout())) 20206 return false; 20207 20208 Function *VssegNFunc; 20209 Value *VL; 20210 Type *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen()); 20211 20212 if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) { 20213 VssegNFunc = Intrinsic::getDeclaration( 20214 SI->getModule(), FixedVssegIntrIds[Factor - 2], 20215 {InVTy, SI->getPointerOperandType(), XLenTy}); 20216 VL = ConstantInt::get(XLenTy, FVTy->getNumElements()); 20217 } else { 20218 static const Intrinsic::ID IntrIds[] = { 20219 Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3, 20220 Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5, 20221 Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7, 20222 Intrinsic::riscv_vsseg8}; 20223 20224 VssegNFunc = Intrinsic::getDeclaration(SI->getModule(), IntrIds[Factor - 2], 20225 {InVTy, XLenTy}); 20226 VL = Constant::getAllOnesValue(XLenTy); 20227 } 20228 20229 Builder.CreateCall(VssegNFunc, {II->getOperand(0), II->getOperand(1), 20230 SI->getPointerOperand(), VL}); 20231 20232 return true; 20233} 20234 20235MachineInstr * 20236RISCVTargetLowering::EmitKCFICheck(MachineBasicBlock &MBB, 20237 MachineBasicBlock::instr_iterator &MBBI, 20238 const TargetInstrInfo *TII) const { 20239 assert(MBBI->isCall() && MBBI->getCFIType() && 20240 "Invalid call instruction for a KCFI check"); 20241 assert(is_contained({RISCV::PseudoCALLIndirect, RISCV::PseudoTAILIndirect}, 20242 MBBI->getOpcode())); 20243 20244 MachineOperand &Target = MBBI->getOperand(0); 20245 Target.setIsRenamable(false); 20246 20247 return BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(RISCV::KCFI_CHECK)) 20248 .addReg(Target.getReg()) 20249 .addImm(MBBI->getCFIType()) 20250 .getInstr(); 20251} 20252 20253#define GET_REGISTER_MATCHER 20254#include "RISCVGenAsmMatcher.inc" 20255 20256Register 20257RISCVTargetLowering::getRegisterByName(const char *RegName, LLT VT, 20258 const MachineFunction &MF) const { 20259 Register Reg = MatchRegisterAltName(RegName); 20260 if (Reg == RISCV::NoRegister) 20261 Reg = MatchRegisterName(RegName); 20262 if (Reg == RISCV::NoRegister) 20263 report_fatal_error( 20264 Twine("Invalid register name \"" + StringRef(RegName) + "\".")); 20265 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF); 20266 if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg)) 20267 report_fatal_error(Twine("Trying to obtain non-reserved register \"" + 20268 StringRef(RegName) + "\".")); 20269 return Reg; 20270} 20271 20272MachineMemOperand::Flags 20273RISCVTargetLowering::getTargetMMOFlags(const Instruction &I) const { 20274 const MDNode *NontemporalInfo = I.getMetadata(LLVMContext::MD_nontemporal); 20275 20276 if (NontemporalInfo == nullptr) 20277 return MachineMemOperand::MONone; 20278 20279 // 1 for default value work as __RISCV_NTLH_ALL 20280 // 2 -> __RISCV_NTLH_INNERMOST_PRIVATE 20281 // 3 -> __RISCV_NTLH_ALL_PRIVATE 20282 // 4 -> __RISCV_NTLH_INNERMOST_SHARED 20283 // 5 -> __RISCV_NTLH_ALL 20284 int NontemporalLevel = 5; 20285 const MDNode *RISCVNontemporalInfo = 20286 I.getMetadata("riscv-nontemporal-domain"); 20287 if (RISCVNontemporalInfo != nullptr) 20288 NontemporalLevel = 20289 cast<ConstantInt>( 20290 cast<ConstantAsMetadata>(RISCVNontemporalInfo->getOperand(0)) 20291 ->getValue()) 20292 ->getZExtValue(); 20293 20294 assert((1 <= NontemporalLevel && NontemporalLevel <= 5) && 20295 "RISC-V target doesn't support this non-temporal domain."); 20296 20297 NontemporalLevel -= 2; 20298 MachineMemOperand::Flags Flags = MachineMemOperand::MONone; 20299 if (NontemporalLevel & 0b1) 20300 Flags |= MONontemporalBit0; 20301 if (NontemporalLevel & 0b10) 20302 Flags |= MONontemporalBit1; 20303 20304 return Flags; 20305} 20306 20307MachineMemOperand::Flags 20308RISCVTargetLowering::getTargetMMOFlags(const MemSDNode &Node) const { 20309 20310 MachineMemOperand::Flags NodeFlags = Node.getMemOperand()->getFlags(); 20311 MachineMemOperand::Flags TargetFlags = MachineMemOperand::MONone; 20312 TargetFlags |= (NodeFlags & MONontemporalBit0); 20313 TargetFlags |= (NodeFlags & MONontemporalBit1); 20314 20315 return TargetFlags; 20316} 20317 20318bool RISCVTargetLowering::areTwoSDNodeTargetMMOFlagsMergeable( 20319 const MemSDNode &NodeX, const MemSDNode &NodeY) const { 20320 return getTargetMMOFlags(NodeX) == getTargetMMOFlags(NodeY); 20321} 20322 20323bool RISCVTargetLowering::isCtpopFast(EVT VT) const { 20324 if (VT.isScalableVector()) 20325 return isTypeLegal(VT) && Subtarget.hasStdExtZvbb(); 20326 if (VT.isFixedLengthVector() && Subtarget.hasStdExtZvbb()) 20327 return true; 20328 return Subtarget.hasStdExtZbb() && 20329 (VT == MVT::i32 || VT == MVT::i64 || VT.isFixedLengthVector()); 20330} 20331 20332unsigned RISCVTargetLowering::getCustomCtpopCost(EVT VT, 20333 ISD::CondCode Cond) const { 20334 return isCtpopFast(VT) ? 0 : 1; 20335} 20336 20337bool RISCVTargetLowering::fallBackToDAGISel(const Instruction &Inst) const { 20338 20339 // GISel support is in progress or complete for G_ADD, G_SUB, G_AND, G_OR, and 20340 // G_XOR. 20341 unsigned Op = Inst.getOpcode(); 20342 if (Op == Instruction::Add || Op == Instruction::Sub || 20343 Op == Instruction::And || Op == Instruction::Or || Op == Instruction::Xor) 20344 return false; 20345 20346 if (Inst.getType()->isScalableTy()) 20347 return true; 20348 20349 for (unsigned i = 0; i < Inst.getNumOperands(); ++i) 20350 if (Inst.getOperand(i)->getType()->isScalableTy() && 20351 !isa<ReturnInst>(&Inst)) 20352 return true; 20353 20354 if (const AllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) { 20355 if (AI->getAllocatedType()->isScalableTy()) 20356 return true; 20357 } 20358 20359 return false; 20360} 20361 20362SDValue 20363RISCVTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, 20364 SelectionDAG &DAG, 20365 SmallVectorImpl<SDNode *> &Created) const { 20366 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); 20367 if (isIntDivCheap(N->getValueType(0), Attr)) 20368 return SDValue(N, 0); // Lower SDIV as SDIV 20369 20370 // Only perform this transform if short forward branch opt is supported. 20371 if (!Subtarget.hasShortForwardBranchOpt()) 20372 return SDValue(); 20373 EVT VT = N->getValueType(0); 20374 if (!(VT == MVT::i32 || (VT == MVT::i64 && Subtarget.is64Bit()))) 20375 return SDValue(); 20376 20377 // Ensure 2**k-1 < 2048 so that we can just emit a single addi/addiw. 20378 if (Divisor.sgt(2048) || Divisor.slt(-2048)) 20379 return SDValue(); 20380 return TargetLowering::buildSDIVPow2WithCMov(N, Divisor, DAG, Created); 20381} 20382 20383bool RISCVTargetLowering::shouldFoldSelectWithSingleBitTest( 20384 EVT VT, const APInt &AndMask) const { 20385 if (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps()) 20386 return !Subtarget.hasStdExtZbs() && AndMask.ugt(1024); 20387 return TargetLowering::shouldFoldSelectWithSingleBitTest(VT, AndMask); 20388} 20389 20390unsigned RISCVTargetLowering::getMinimumJumpTableEntries() const { 20391 return Subtarget.getMinimumJumpTableEntries(); 20392} 20393 20394namespace llvm::RISCVVIntrinsicsTable { 20395 20396#define GET_RISCVVIntrinsicsTable_IMPL 20397#include "RISCVGenSearchableTables.inc" 20398 20399} // namespace llvm::RISCVVIntrinsicsTable 20400