ARMInstrNEON.td revision 266715
1//===-- ARMInstrNEON.td - NEON support for ARM -------------*- tablegen -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file describes the ARM NEON instruction set. 11// 12//===----------------------------------------------------------------------===// 13 14 15//===----------------------------------------------------------------------===// 16// NEON-specific Operands. 17//===----------------------------------------------------------------------===// 18def nModImm : Operand<i32> { 19 let PrintMethod = "printNEONModImmOperand"; 20} 21 22def nImmSplatI8AsmOperand : AsmOperandClass { let Name = "NEONi8splat"; } 23def nImmSplatI8 : Operand<i32> { 24 let PrintMethod = "printNEONModImmOperand"; 25 let ParserMatchClass = nImmSplatI8AsmOperand; 26} 27def nImmSplatI16AsmOperand : AsmOperandClass { let Name = "NEONi16splat"; } 28def nImmSplatI16 : Operand<i32> { 29 let PrintMethod = "printNEONModImmOperand"; 30 let ParserMatchClass = nImmSplatI16AsmOperand; 31} 32def nImmSplatI32AsmOperand : AsmOperandClass { let Name = "NEONi32splat"; } 33def nImmSplatI32 : Operand<i32> { 34 let PrintMethod = "printNEONModImmOperand"; 35 let ParserMatchClass = nImmSplatI32AsmOperand; 36} 37def nImmVMOVI32AsmOperand : AsmOperandClass { let Name = "NEONi32vmov"; } 38def nImmVMOVI32 : Operand<i32> { 39 let PrintMethod = "printNEONModImmOperand"; 40 let ParserMatchClass = nImmVMOVI32AsmOperand; 41} 42def nImmVMOVI32NegAsmOperand : AsmOperandClass { let Name = "NEONi32vmovNeg"; } 43def nImmVMOVI32Neg : Operand<i32> { 44 let PrintMethod = "printNEONModImmOperand"; 45 let ParserMatchClass = nImmVMOVI32NegAsmOperand; 46} 47def nImmVMOVF32 : Operand<i32> { 48 let PrintMethod = "printFPImmOperand"; 49 let ParserMatchClass = FPImmOperand; 50} 51def nImmSplatI64AsmOperand : AsmOperandClass { let Name = "NEONi64splat"; } 52def nImmSplatI64 : Operand<i32> { 53 let PrintMethod = "printNEONModImmOperand"; 54 let ParserMatchClass = nImmSplatI64AsmOperand; 55} 56 57def VectorIndex8Operand : AsmOperandClass { let Name = "VectorIndex8"; } 58def VectorIndex16Operand : AsmOperandClass { let Name = "VectorIndex16"; } 59def VectorIndex32Operand : AsmOperandClass { let Name = "VectorIndex32"; } 60def VectorIndex8 : Operand<i32>, ImmLeaf<i32, [{ 61 return ((uint64_t)Imm) < 8; 62}]> { 63 let ParserMatchClass = VectorIndex8Operand; 64 let PrintMethod = "printVectorIndex"; 65 let MIOperandInfo = (ops i32imm); 66} 67def VectorIndex16 : Operand<i32>, ImmLeaf<i32, [{ 68 return ((uint64_t)Imm) < 4; 69}]> { 70 let ParserMatchClass = VectorIndex16Operand; 71 let PrintMethod = "printVectorIndex"; 72 let MIOperandInfo = (ops i32imm); 73} 74def VectorIndex32 : Operand<i32>, ImmLeaf<i32, [{ 75 return ((uint64_t)Imm) < 2; 76}]> { 77 let ParserMatchClass = VectorIndex32Operand; 78 let PrintMethod = "printVectorIndex"; 79 let MIOperandInfo = (ops i32imm); 80} 81 82// Register list of one D register. 83def VecListOneDAsmOperand : AsmOperandClass { 84 let Name = "VecListOneD"; 85 let ParserMethod = "parseVectorList"; 86 let RenderMethod = "addVecListOperands"; 87} 88def VecListOneD : RegisterOperand<DPR, "printVectorListOne"> { 89 let ParserMatchClass = VecListOneDAsmOperand; 90} 91// Register list of two sequential D registers. 92def VecListDPairAsmOperand : AsmOperandClass { 93 let Name = "VecListDPair"; 94 let ParserMethod = "parseVectorList"; 95 let RenderMethod = "addVecListOperands"; 96} 97def VecListDPair : RegisterOperand<DPair, "printVectorListTwo"> { 98 let ParserMatchClass = VecListDPairAsmOperand; 99} 100// Register list of three sequential D registers. 101def VecListThreeDAsmOperand : AsmOperandClass { 102 let Name = "VecListThreeD"; 103 let ParserMethod = "parseVectorList"; 104 let RenderMethod = "addVecListOperands"; 105} 106def VecListThreeD : RegisterOperand<DPR, "printVectorListThree"> { 107 let ParserMatchClass = VecListThreeDAsmOperand; 108} 109// Register list of four sequential D registers. 110def VecListFourDAsmOperand : AsmOperandClass { 111 let Name = "VecListFourD"; 112 let ParserMethod = "parseVectorList"; 113 let RenderMethod = "addVecListOperands"; 114} 115def VecListFourD : RegisterOperand<DPR, "printVectorListFour"> { 116 let ParserMatchClass = VecListFourDAsmOperand; 117} 118// Register list of two D registers spaced by 2 (two sequential Q registers). 119def VecListDPairSpacedAsmOperand : AsmOperandClass { 120 let Name = "VecListDPairSpaced"; 121 let ParserMethod = "parseVectorList"; 122 let RenderMethod = "addVecListOperands"; 123} 124def VecListDPairSpaced : RegisterOperand<DPair, "printVectorListTwoSpaced"> { 125 let ParserMatchClass = VecListDPairSpacedAsmOperand; 126} 127// Register list of three D registers spaced by 2 (three Q registers). 128def VecListThreeQAsmOperand : AsmOperandClass { 129 let Name = "VecListThreeQ"; 130 let ParserMethod = "parseVectorList"; 131 let RenderMethod = "addVecListOperands"; 132} 133def VecListThreeQ : RegisterOperand<DPR, "printVectorListThreeSpaced"> { 134 let ParserMatchClass = VecListThreeQAsmOperand; 135} 136// Register list of three D registers spaced by 2 (three Q registers). 137def VecListFourQAsmOperand : AsmOperandClass { 138 let Name = "VecListFourQ"; 139 let ParserMethod = "parseVectorList"; 140 let RenderMethod = "addVecListOperands"; 141} 142def VecListFourQ : RegisterOperand<DPR, "printVectorListFourSpaced"> { 143 let ParserMatchClass = VecListFourQAsmOperand; 144} 145 146// Register list of one D register, with "all lanes" subscripting. 147def VecListOneDAllLanesAsmOperand : AsmOperandClass { 148 let Name = "VecListOneDAllLanes"; 149 let ParserMethod = "parseVectorList"; 150 let RenderMethod = "addVecListOperands"; 151} 152def VecListOneDAllLanes : RegisterOperand<DPR, "printVectorListOneAllLanes"> { 153 let ParserMatchClass = VecListOneDAllLanesAsmOperand; 154} 155// Register list of two D registers, with "all lanes" subscripting. 156def VecListDPairAllLanesAsmOperand : AsmOperandClass { 157 let Name = "VecListDPairAllLanes"; 158 let ParserMethod = "parseVectorList"; 159 let RenderMethod = "addVecListOperands"; 160} 161def VecListDPairAllLanes : RegisterOperand<DPair, 162 "printVectorListTwoAllLanes"> { 163 let ParserMatchClass = VecListDPairAllLanesAsmOperand; 164} 165// Register list of two D registers spaced by 2 (two sequential Q registers). 166def VecListDPairSpacedAllLanesAsmOperand : AsmOperandClass { 167 let Name = "VecListDPairSpacedAllLanes"; 168 let ParserMethod = "parseVectorList"; 169 let RenderMethod = "addVecListOperands"; 170} 171def VecListDPairSpacedAllLanes : RegisterOperand<DPair, 172 "printVectorListTwoSpacedAllLanes"> { 173 let ParserMatchClass = VecListDPairSpacedAllLanesAsmOperand; 174} 175// Register list of three D registers, with "all lanes" subscripting. 176def VecListThreeDAllLanesAsmOperand : AsmOperandClass { 177 let Name = "VecListThreeDAllLanes"; 178 let ParserMethod = "parseVectorList"; 179 let RenderMethod = "addVecListOperands"; 180} 181def VecListThreeDAllLanes : RegisterOperand<DPR, 182 "printVectorListThreeAllLanes"> { 183 let ParserMatchClass = VecListThreeDAllLanesAsmOperand; 184} 185// Register list of three D registers spaced by 2 (three sequential Q regs). 186def VecListThreeQAllLanesAsmOperand : AsmOperandClass { 187 let Name = "VecListThreeQAllLanes"; 188 let ParserMethod = "parseVectorList"; 189 let RenderMethod = "addVecListOperands"; 190} 191def VecListThreeQAllLanes : RegisterOperand<DPR, 192 "printVectorListThreeSpacedAllLanes"> { 193 let ParserMatchClass = VecListThreeQAllLanesAsmOperand; 194} 195// Register list of four D registers, with "all lanes" subscripting. 196def VecListFourDAllLanesAsmOperand : AsmOperandClass { 197 let Name = "VecListFourDAllLanes"; 198 let ParserMethod = "parseVectorList"; 199 let RenderMethod = "addVecListOperands"; 200} 201def VecListFourDAllLanes : RegisterOperand<DPR, "printVectorListFourAllLanes"> { 202 let ParserMatchClass = VecListFourDAllLanesAsmOperand; 203} 204// Register list of four D registers spaced by 2 (four sequential Q regs). 205def VecListFourQAllLanesAsmOperand : AsmOperandClass { 206 let Name = "VecListFourQAllLanes"; 207 let ParserMethod = "parseVectorList"; 208 let RenderMethod = "addVecListOperands"; 209} 210def VecListFourQAllLanes : RegisterOperand<DPR, 211 "printVectorListFourSpacedAllLanes"> { 212 let ParserMatchClass = VecListFourQAllLanesAsmOperand; 213} 214 215 216// Register list of one D register, with byte lane subscripting. 217def VecListOneDByteIndexAsmOperand : AsmOperandClass { 218 let Name = "VecListOneDByteIndexed"; 219 let ParserMethod = "parseVectorList"; 220 let RenderMethod = "addVecListIndexedOperands"; 221} 222def VecListOneDByteIndexed : Operand<i32> { 223 let ParserMatchClass = VecListOneDByteIndexAsmOperand; 224 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 225} 226// ...with half-word lane subscripting. 227def VecListOneDHWordIndexAsmOperand : AsmOperandClass { 228 let Name = "VecListOneDHWordIndexed"; 229 let ParserMethod = "parseVectorList"; 230 let RenderMethod = "addVecListIndexedOperands"; 231} 232def VecListOneDHWordIndexed : Operand<i32> { 233 let ParserMatchClass = VecListOneDHWordIndexAsmOperand; 234 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 235} 236// ...with word lane subscripting. 237def VecListOneDWordIndexAsmOperand : AsmOperandClass { 238 let Name = "VecListOneDWordIndexed"; 239 let ParserMethod = "parseVectorList"; 240 let RenderMethod = "addVecListIndexedOperands"; 241} 242def VecListOneDWordIndexed : Operand<i32> { 243 let ParserMatchClass = VecListOneDWordIndexAsmOperand; 244 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 245} 246 247// Register list of two D registers with byte lane subscripting. 248def VecListTwoDByteIndexAsmOperand : AsmOperandClass { 249 let Name = "VecListTwoDByteIndexed"; 250 let ParserMethod = "parseVectorList"; 251 let RenderMethod = "addVecListIndexedOperands"; 252} 253def VecListTwoDByteIndexed : Operand<i32> { 254 let ParserMatchClass = VecListTwoDByteIndexAsmOperand; 255 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 256} 257// ...with half-word lane subscripting. 258def VecListTwoDHWordIndexAsmOperand : AsmOperandClass { 259 let Name = "VecListTwoDHWordIndexed"; 260 let ParserMethod = "parseVectorList"; 261 let RenderMethod = "addVecListIndexedOperands"; 262} 263def VecListTwoDHWordIndexed : Operand<i32> { 264 let ParserMatchClass = VecListTwoDHWordIndexAsmOperand; 265 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 266} 267// ...with word lane subscripting. 268def VecListTwoDWordIndexAsmOperand : AsmOperandClass { 269 let Name = "VecListTwoDWordIndexed"; 270 let ParserMethod = "parseVectorList"; 271 let RenderMethod = "addVecListIndexedOperands"; 272} 273def VecListTwoDWordIndexed : Operand<i32> { 274 let ParserMatchClass = VecListTwoDWordIndexAsmOperand; 275 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 276} 277// Register list of two Q registers with half-word lane subscripting. 278def VecListTwoQHWordIndexAsmOperand : AsmOperandClass { 279 let Name = "VecListTwoQHWordIndexed"; 280 let ParserMethod = "parseVectorList"; 281 let RenderMethod = "addVecListIndexedOperands"; 282} 283def VecListTwoQHWordIndexed : Operand<i32> { 284 let ParserMatchClass = VecListTwoQHWordIndexAsmOperand; 285 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 286} 287// ...with word lane subscripting. 288def VecListTwoQWordIndexAsmOperand : AsmOperandClass { 289 let Name = "VecListTwoQWordIndexed"; 290 let ParserMethod = "parseVectorList"; 291 let RenderMethod = "addVecListIndexedOperands"; 292} 293def VecListTwoQWordIndexed : Operand<i32> { 294 let ParserMatchClass = VecListTwoQWordIndexAsmOperand; 295 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 296} 297 298 299// Register list of three D registers with byte lane subscripting. 300def VecListThreeDByteIndexAsmOperand : AsmOperandClass { 301 let Name = "VecListThreeDByteIndexed"; 302 let ParserMethod = "parseVectorList"; 303 let RenderMethod = "addVecListIndexedOperands"; 304} 305def VecListThreeDByteIndexed : Operand<i32> { 306 let ParserMatchClass = VecListThreeDByteIndexAsmOperand; 307 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 308} 309// ...with half-word lane subscripting. 310def VecListThreeDHWordIndexAsmOperand : AsmOperandClass { 311 let Name = "VecListThreeDHWordIndexed"; 312 let ParserMethod = "parseVectorList"; 313 let RenderMethod = "addVecListIndexedOperands"; 314} 315def VecListThreeDHWordIndexed : Operand<i32> { 316 let ParserMatchClass = VecListThreeDHWordIndexAsmOperand; 317 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 318} 319// ...with word lane subscripting. 320def VecListThreeDWordIndexAsmOperand : AsmOperandClass { 321 let Name = "VecListThreeDWordIndexed"; 322 let ParserMethod = "parseVectorList"; 323 let RenderMethod = "addVecListIndexedOperands"; 324} 325def VecListThreeDWordIndexed : Operand<i32> { 326 let ParserMatchClass = VecListThreeDWordIndexAsmOperand; 327 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 328} 329// Register list of three Q registers with half-word lane subscripting. 330def VecListThreeQHWordIndexAsmOperand : AsmOperandClass { 331 let Name = "VecListThreeQHWordIndexed"; 332 let ParserMethod = "parseVectorList"; 333 let RenderMethod = "addVecListIndexedOperands"; 334} 335def VecListThreeQHWordIndexed : Operand<i32> { 336 let ParserMatchClass = VecListThreeQHWordIndexAsmOperand; 337 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 338} 339// ...with word lane subscripting. 340def VecListThreeQWordIndexAsmOperand : AsmOperandClass { 341 let Name = "VecListThreeQWordIndexed"; 342 let ParserMethod = "parseVectorList"; 343 let RenderMethod = "addVecListIndexedOperands"; 344} 345def VecListThreeQWordIndexed : Operand<i32> { 346 let ParserMatchClass = VecListThreeQWordIndexAsmOperand; 347 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 348} 349 350// Register list of four D registers with byte lane subscripting. 351def VecListFourDByteIndexAsmOperand : AsmOperandClass { 352 let Name = "VecListFourDByteIndexed"; 353 let ParserMethod = "parseVectorList"; 354 let RenderMethod = "addVecListIndexedOperands"; 355} 356def VecListFourDByteIndexed : Operand<i32> { 357 let ParserMatchClass = VecListFourDByteIndexAsmOperand; 358 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 359} 360// ...with half-word lane subscripting. 361def VecListFourDHWordIndexAsmOperand : AsmOperandClass { 362 let Name = "VecListFourDHWordIndexed"; 363 let ParserMethod = "parseVectorList"; 364 let RenderMethod = "addVecListIndexedOperands"; 365} 366def VecListFourDHWordIndexed : Operand<i32> { 367 let ParserMatchClass = VecListFourDHWordIndexAsmOperand; 368 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 369} 370// ...with word lane subscripting. 371def VecListFourDWordIndexAsmOperand : AsmOperandClass { 372 let Name = "VecListFourDWordIndexed"; 373 let ParserMethod = "parseVectorList"; 374 let RenderMethod = "addVecListIndexedOperands"; 375} 376def VecListFourDWordIndexed : Operand<i32> { 377 let ParserMatchClass = VecListFourDWordIndexAsmOperand; 378 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 379} 380// Register list of four Q registers with half-word lane subscripting. 381def VecListFourQHWordIndexAsmOperand : AsmOperandClass { 382 let Name = "VecListFourQHWordIndexed"; 383 let ParserMethod = "parseVectorList"; 384 let RenderMethod = "addVecListIndexedOperands"; 385} 386def VecListFourQHWordIndexed : Operand<i32> { 387 let ParserMatchClass = VecListFourQHWordIndexAsmOperand; 388 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 389} 390// ...with word lane subscripting. 391def VecListFourQWordIndexAsmOperand : AsmOperandClass { 392 let Name = "VecListFourQWordIndexed"; 393 let ParserMethod = "parseVectorList"; 394 let RenderMethod = "addVecListIndexedOperands"; 395} 396def VecListFourQWordIndexed : Operand<i32> { 397 let ParserMatchClass = VecListFourQWordIndexAsmOperand; 398 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 399} 400 401def dword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 402 return cast<LoadSDNode>(N)->getAlignment() >= 8; 403}]>; 404def dword_alignedstore : PatFrag<(ops node:$val, node:$ptr), 405 (store node:$val, node:$ptr), [{ 406 return cast<StoreSDNode>(N)->getAlignment() >= 8; 407}]>; 408def word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 409 return cast<LoadSDNode>(N)->getAlignment() == 4; 410}]>; 411def word_alignedstore : PatFrag<(ops node:$val, node:$ptr), 412 (store node:$val, node:$ptr), [{ 413 return cast<StoreSDNode>(N)->getAlignment() == 4; 414}]>; 415def hword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 416 return cast<LoadSDNode>(N)->getAlignment() == 2; 417}]>; 418def hword_alignedstore : PatFrag<(ops node:$val, node:$ptr), 419 (store node:$val, node:$ptr), [{ 420 return cast<StoreSDNode>(N)->getAlignment() == 2; 421}]>; 422def byte_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 423 return cast<LoadSDNode>(N)->getAlignment() == 1; 424}]>; 425def byte_alignedstore : PatFrag<(ops node:$val, node:$ptr), 426 (store node:$val, node:$ptr), [{ 427 return cast<StoreSDNode>(N)->getAlignment() == 1; 428}]>; 429def non_word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 430 return cast<LoadSDNode>(N)->getAlignment() < 4; 431}]>; 432def non_word_alignedstore : PatFrag<(ops node:$val, node:$ptr), 433 (store node:$val, node:$ptr), [{ 434 return cast<StoreSDNode>(N)->getAlignment() < 4; 435}]>; 436 437//===----------------------------------------------------------------------===// 438// NEON-specific DAG Nodes. 439//===----------------------------------------------------------------------===// 440 441def SDTARMVCMP : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>; 442def SDTARMVCMPZ : SDTypeProfile<1, 1, []>; 443 444def NEONvceq : SDNode<"ARMISD::VCEQ", SDTARMVCMP>; 445def NEONvceqz : SDNode<"ARMISD::VCEQZ", SDTARMVCMPZ>; 446def NEONvcge : SDNode<"ARMISD::VCGE", SDTARMVCMP>; 447def NEONvcgez : SDNode<"ARMISD::VCGEZ", SDTARMVCMPZ>; 448def NEONvclez : SDNode<"ARMISD::VCLEZ", SDTARMVCMPZ>; 449def NEONvcgeu : SDNode<"ARMISD::VCGEU", SDTARMVCMP>; 450def NEONvcgt : SDNode<"ARMISD::VCGT", SDTARMVCMP>; 451def NEONvcgtz : SDNode<"ARMISD::VCGTZ", SDTARMVCMPZ>; 452def NEONvcltz : SDNode<"ARMISD::VCLTZ", SDTARMVCMPZ>; 453def NEONvcgtu : SDNode<"ARMISD::VCGTU", SDTARMVCMP>; 454def NEONvtst : SDNode<"ARMISD::VTST", SDTARMVCMP>; 455 456// Types for vector shift by immediates. The "SHX" version is for long and 457// narrow operations where the source and destination vectors have different 458// types. The "SHINS" version is for shift and insert operations. 459def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>, 460 SDTCisVT<2, i32>]>; 461def SDTARMVSHX : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, 462 SDTCisVT<2, i32>]>; 463def SDTARMVSHINS : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>, 464 SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>; 465 466def NEONvshl : SDNode<"ARMISD::VSHL", SDTARMVSH>; 467def NEONvshrs : SDNode<"ARMISD::VSHRs", SDTARMVSH>; 468def NEONvshru : SDNode<"ARMISD::VSHRu", SDTARMVSH>; 469def NEONvshlls : SDNode<"ARMISD::VSHLLs", SDTARMVSHX>; 470def NEONvshllu : SDNode<"ARMISD::VSHLLu", SDTARMVSHX>; 471def NEONvshlli : SDNode<"ARMISD::VSHLLi", SDTARMVSHX>; 472def NEONvshrn : SDNode<"ARMISD::VSHRN", SDTARMVSHX>; 473 474def NEONvrshrs : SDNode<"ARMISD::VRSHRs", SDTARMVSH>; 475def NEONvrshru : SDNode<"ARMISD::VRSHRu", SDTARMVSH>; 476def NEONvrshrn : SDNode<"ARMISD::VRSHRN", SDTARMVSHX>; 477 478def NEONvqshls : SDNode<"ARMISD::VQSHLs", SDTARMVSH>; 479def NEONvqshlu : SDNode<"ARMISD::VQSHLu", SDTARMVSH>; 480def NEONvqshlsu : SDNode<"ARMISD::VQSHLsu", SDTARMVSH>; 481def NEONvqshrns : SDNode<"ARMISD::VQSHRNs", SDTARMVSHX>; 482def NEONvqshrnu : SDNode<"ARMISD::VQSHRNu", SDTARMVSHX>; 483def NEONvqshrnsu : SDNode<"ARMISD::VQSHRNsu", SDTARMVSHX>; 484 485def NEONvqrshrns : SDNode<"ARMISD::VQRSHRNs", SDTARMVSHX>; 486def NEONvqrshrnu : SDNode<"ARMISD::VQRSHRNu", SDTARMVSHX>; 487def NEONvqrshrnsu : SDNode<"ARMISD::VQRSHRNsu", SDTARMVSHX>; 488 489def NEONvsli : SDNode<"ARMISD::VSLI", SDTARMVSHINS>; 490def NEONvsri : SDNode<"ARMISD::VSRI", SDTARMVSHINS>; 491 492def SDTARMVGETLN : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisInt<1>, 493 SDTCisVT<2, i32>]>; 494def NEONvgetlaneu : SDNode<"ARMISD::VGETLANEu", SDTARMVGETLN>; 495def NEONvgetlanes : SDNode<"ARMISD::VGETLANEs", SDTARMVGETLN>; 496 497def SDTARMVMOVIMM : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>; 498def NEONvmovImm : SDNode<"ARMISD::VMOVIMM", SDTARMVMOVIMM>; 499def NEONvmvnImm : SDNode<"ARMISD::VMVNIMM", SDTARMVMOVIMM>; 500def NEONvmovFPImm : SDNode<"ARMISD::VMOVFPIMM", SDTARMVMOVIMM>; 501 502def SDTARMVORRIMM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, 503 SDTCisVT<2, i32>]>; 504def NEONvorrImm : SDNode<"ARMISD::VORRIMM", SDTARMVORRIMM>; 505def NEONvbicImm : SDNode<"ARMISD::VBICIMM", SDTARMVORRIMM>; 506 507def NEONvbsl : SDNode<"ARMISD::VBSL", 508 SDTypeProfile<1, 3, [SDTCisVec<0>, 509 SDTCisSameAs<0, 1>, 510 SDTCisSameAs<0, 2>, 511 SDTCisSameAs<0, 3>]>>; 512 513def NEONvdup : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>; 514 515// VDUPLANE can produce a quad-register result from a double-register source, 516// so the result is not constrained to match the source. 517def NEONvduplane : SDNode<"ARMISD::VDUPLANE", 518 SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, 519 SDTCisVT<2, i32>]>>; 520 521def SDTARMVEXT : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>, 522 SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>; 523def NEONvext : SDNode<"ARMISD::VEXT", SDTARMVEXT>; 524 525def SDTARMVSHUF : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0, 1>]>; 526def NEONvrev64 : SDNode<"ARMISD::VREV64", SDTARMVSHUF>; 527def NEONvrev32 : SDNode<"ARMISD::VREV32", SDTARMVSHUF>; 528def NEONvrev16 : SDNode<"ARMISD::VREV16", SDTARMVSHUF>; 529 530def SDTARMVSHUF2 : SDTypeProfile<2, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, 531 SDTCisSameAs<0, 2>, 532 SDTCisSameAs<0, 3>]>; 533def NEONzip : SDNode<"ARMISD::VZIP", SDTARMVSHUF2>; 534def NEONuzp : SDNode<"ARMISD::VUZP", SDTARMVSHUF2>; 535def NEONtrn : SDNode<"ARMISD::VTRN", SDTARMVSHUF2>; 536 537def SDTARMVMULL : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, 538 SDTCisSameAs<1, 2>]>; 539def NEONvmulls : SDNode<"ARMISD::VMULLs", SDTARMVMULL>; 540def NEONvmullu : SDNode<"ARMISD::VMULLu", SDTARMVMULL>; 541 542def SDTARMFMAX : SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisSameAs<0, 1>, 543 SDTCisSameAs<0, 2>]>; 544def NEONfmax : SDNode<"ARMISD::FMAX", SDTARMFMAX>; 545def NEONfmin : SDNode<"ARMISD::FMIN", SDTARMFMAX>; 546 547def NEONimmAllZerosV: PatLeaf<(NEONvmovImm (i32 timm)), [{ 548 ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0)); 549 unsigned EltBits = 0; 550 uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits); 551 return (EltBits == 32 && EltVal == 0); 552}]>; 553 554def NEONimmAllOnesV: PatLeaf<(NEONvmovImm (i32 timm)), [{ 555 ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0)); 556 unsigned EltBits = 0; 557 uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits); 558 return (EltBits == 8 && EltVal == 0xff); 559}]>; 560 561//===----------------------------------------------------------------------===// 562// NEON load / store instructions 563//===----------------------------------------------------------------------===// 564 565// Use VLDM to load a Q register as a D register pair. 566// This is a pseudo instruction that is expanded to VLDMD after reg alloc. 567def VLDMQIA 568 : PseudoVFPLdStM<(outs DPair:$dst), (ins GPR:$Rn), 569 IIC_fpLoad_m, "", 570 [(set DPair:$dst, (v2f64 (load GPR:$Rn)))]>; 571 572// Use VSTM to store a Q register as a D register pair. 573// This is a pseudo instruction that is expanded to VSTMD after reg alloc. 574def VSTMQIA 575 : PseudoVFPLdStM<(outs), (ins DPair:$src, GPR:$Rn), 576 IIC_fpStore_m, "", 577 [(store (v2f64 DPair:$src), GPR:$Rn)]>; 578 579// Classes for VLD* pseudo-instructions with multi-register operands. 580// These are expanded to real instructions after register allocation. 581class VLDQPseudo<InstrItinClass itin> 582 : PseudoNLdSt<(outs QPR:$dst), (ins addrmode6:$addr), itin, "">; 583class VLDQWBPseudo<InstrItinClass itin> 584 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 585 (ins addrmode6:$addr, am6offset:$offset), itin, 586 "$addr.addr = $wb">; 587class VLDQWBfixedPseudo<InstrItinClass itin> 588 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 589 (ins addrmode6:$addr), itin, 590 "$addr.addr = $wb">; 591class VLDQWBregisterPseudo<InstrItinClass itin> 592 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 593 (ins addrmode6:$addr, rGPR:$offset), itin, 594 "$addr.addr = $wb">; 595 596class VLDQQPseudo<InstrItinClass itin> 597 : PseudoNLdSt<(outs QQPR:$dst), (ins addrmode6:$addr), itin, "">; 598class VLDQQWBPseudo<InstrItinClass itin> 599 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), 600 (ins addrmode6:$addr, am6offset:$offset), itin, 601 "$addr.addr = $wb">; 602class VLDQQWBfixedPseudo<InstrItinClass itin> 603 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), 604 (ins addrmode6:$addr), itin, 605 "$addr.addr = $wb">; 606class VLDQQWBregisterPseudo<InstrItinClass itin> 607 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), 608 (ins addrmode6:$addr, rGPR:$offset), itin, 609 "$addr.addr = $wb">; 610 611 612class VLDQQQQPseudo<InstrItinClass itin> 613 : PseudoNLdSt<(outs QQQQPR:$dst), (ins addrmode6:$addr, QQQQPR:$src),itin, 614 "$src = $dst">; 615class VLDQQQQWBPseudo<InstrItinClass itin> 616 : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb), 617 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin, 618 "$addr.addr = $wb, $src = $dst">; 619 620let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { 621 622// VLD1 : Vector Load (multiple single elements) 623class VLD1D<bits<4> op7_4, string Dt> 624 : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd), 625 (ins addrmode6:$Rn), IIC_VLD1, 626 "vld1", Dt, "$Vd, $Rn", "", []> { 627 let Rm = 0b1111; 628 let Inst{4} = Rn{4}; 629 let DecoderMethod = "DecodeVLDST1Instruction"; 630} 631class VLD1Q<bits<4> op7_4, string Dt> 632 : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd), 633 (ins addrmode6:$Rn), IIC_VLD1x2, 634 "vld1", Dt, "$Vd, $Rn", "", []> { 635 let Rm = 0b1111; 636 let Inst{5-4} = Rn{5-4}; 637 let DecoderMethod = "DecodeVLDST1Instruction"; 638} 639 640def VLD1d8 : VLD1D<{0,0,0,?}, "8">; 641def VLD1d16 : VLD1D<{0,1,0,?}, "16">; 642def VLD1d32 : VLD1D<{1,0,0,?}, "32">; 643def VLD1d64 : VLD1D<{1,1,0,?}, "64">; 644 645def VLD1q8 : VLD1Q<{0,0,?,?}, "8">; 646def VLD1q16 : VLD1Q<{0,1,?,?}, "16">; 647def VLD1q32 : VLD1Q<{1,0,?,?}, "32">; 648def VLD1q64 : VLD1Q<{1,1,?,?}, "64">; 649 650// ...with address register writeback: 651multiclass VLD1DWB<bits<4> op7_4, string Dt> { 652 def _fixed : NLdSt<0,0b10, 0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb), 653 (ins addrmode6:$Rn), IIC_VLD1u, 654 "vld1", Dt, "$Vd, $Rn!", 655 "$Rn.addr = $wb", []> { 656 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 657 let Inst{4} = Rn{4}; 658 let DecoderMethod = "DecodeVLDST1Instruction"; 659 } 660 def _register : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb), 661 (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1u, 662 "vld1", Dt, "$Vd, $Rn, $Rm", 663 "$Rn.addr = $wb", []> { 664 let Inst{4} = Rn{4}; 665 let DecoderMethod = "DecodeVLDST1Instruction"; 666 } 667} 668multiclass VLD1QWB<bits<4> op7_4, string Dt> { 669 def _fixed : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb), 670 (ins addrmode6:$Rn), IIC_VLD1x2u, 671 "vld1", Dt, "$Vd, $Rn!", 672 "$Rn.addr = $wb", []> { 673 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 674 let Inst{5-4} = Rn{5-4}; 675 let DecoderMethod = "DecodeVLDST1Instruction"; 676 } 677 def _register : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb), 678 (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u, 679 "vld1", Dt, "$Vd, $Rn, $Rm", 680 "$Rn.addr = $wb", []> { 681 let Inst{5-4} = Rn{5-4}; 682 let DecoderMethod = "DecodeVLDST1Instruction"; 683 } 684} 685 686defm VLD1d8wb : VLD1DWB<{0,0,0,?}, "8">; 687defm VLD1d16wb : VLD1DWB<{0,1,0,?}, "16">; 688defm VLD1d32wb : VLD1DWB<{1,0,0,?}, "32">; 689defm VLD1d64wb : VLD1DWB<{1,1,0,?}, "64">; 690defm VLD1q8wb : VLD1QWB<{0,0,?,?}, "8">; 691defm VLD1q16wb : VLD1QWB<{0,1,?,?}, "16">; 692defm VLD1q32wb : VLD1QWB<{1,0,?,?}, "32">; 693defm VLD1q64wb : VLD1QWB<{1,1,?,?}, "64">; 694 695// ...with 3 registers 696class VLD1D3<bits<4> op7_4, string Dt> 697 : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd), 698 (ins addrmode6:$Rn), IIC_VLD1x3, "vld1", Dt, 699 "$Vd, $Rn", "", []> { 700 let Rm = 0b1111; 701 let Inst{4} = Rn{4}; 702 let DecoderMethod = "DecodeVLDST1Instruction"; 703} 704multiclass VLD1D3WB<bits<4> op7_4, string Dt> { 705 def _fixed : NLdSt<0,0b10,0b0110, op7_4, (outs VecListThreeD:$Vd, GPR:$wb), 706 (ins addrmode6:$Rn), IIC_VLD1x2u, 707 "vld1", Dt, "$Vd, $Rn!", 708 "$Rn.addr = $wb", []> { 709 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 710 let Inst{4} = Rn{4}; 711 let DecoderMethod = "DecodeVLDST1Instruction"; 712 } 713 def _register : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd, GPR:$wb), 714 (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u, 715 "vld1", Dt, "$Vd, $Rn, $Rm", 716 "$Rn.addr = $wb", []> { 717 let Inst{4} = Rn{4}; 718 let DecoderMethod = "DecodeVLDST1Instruction"; 719 } 720} 721 722def VLD1d8T : VLD1D3<{0,0,0,?}, "8">; 723def VLD1d16T : VLD1D3<{0,1,0,?}, "16">; 724def VLD1d32T : VLD1D3<{1,0,0,?}, "32">; 725def VLD1d64T : VLD1D3<{1,1,0,?}, "64">; 726 727defm VLD1d8Twb : VLD1D3WB<{0,0,0,?}, "8">; 728defm VLD1d16Twb : VLD1D3WB<{0,1,0,?}, "16">; 729defm VLD1d32Twb : VLD1D3WB<{1,0,0,?}, "32">; 730defm VLD1d64Twb : VLD1D3WB<{1,1,0,?}, "64">; 731 732def VLD1d64TPseudo : VLDQQPseudo<IIC_VLD1x3>; 733def VLD1d64TPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x3>; 734def VLD1d64TPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x3>; 735 736// ...with 4 registers 737class VLD1D4<bits<4> op7_4, string Dt> 738 : NLdSt<0, 0b10, 0b0010, op7_4, (outs VecListFourD:$Vd), 739 (ins addrmode6:$Rn), IIC_VLD1x4, "vld1", Dt, 740 "$Vd, $Rn", "", []> { 741 let Rm = 0b1111; 742 let Inst{5-4} = Rn{5-4}; 743 let DecoderMethod = "DecodeVLDST1Instruction"; 744} 745multiclass VLD1D4WB<bits<4> op7_4, string Dt> { 746 def _fixed : NLdSt<0,0b10,0b0010, op7_4, (outs VecListFourD:$Vd, GPR:$wb), 747 (ins addrmode6:$Rn), IIC_VLD1x2u, 748 "vld1", Dt, "$Vd, $Rn!", 749 "$Rn.addr = $wb", []> { 750 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 751 let Inst{5-4} = Rn{5-4}; 752 let DecoderMethod = "DecodeVLDST1Instruction"; 753 } 754 def _register : NLdSt<0,0b10,0b0010,op7_4, (outs VecListFourD:$Vd, GPR:$wb), 755 (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u, 756 "vld1", Dt, "$Vd, $Rn, $Rm", 757 "$Rn.addr = $wb", []> { 758 let Inst{5-4} = Rn{5-4}; 759 let DecoderMethod = "DecodeVLDST1Instruction"; 760 } 761} 762 763def VLD1d8Q : VLD1D4<{0,0,?,?}, "8">; 764def VLD1d16Q : VLD1D4<{0,1,?,?}, "16">; 765def VLD1d32Q : VLD1D4<{1,0,?,?}, "32">; 766def VLD1d64Q : VLD1D4<{1,1,?,?}, "64">; 767 768defm VLD1d8Qwb : VLD1D4WB<{0,0,?,?}, "8">; 769defm VLD1d16Qwb : VLD1D4WB<{0,1,?,?}, "16">; 770defm VLD1d32Qwb : VLD1D4WB<{1,0,?,?}, "32">; 771defm VLD1d64Qwb : VLD1D4WB<{1,1,?,?}, "64">; 772 773def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>; 774def VLD1d64QPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x4>; 775def VLD1d64QPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x4>; 776 777// VLD2 : Vector Load (multiple 2-element structures) 778class VLD2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, 779 InstrItinClass itin> 780 : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd), 781 (ins addrmode6:$Rn), itin, 782 "vld2", Dt, "$Vd, $Rn", "", []> { 783 let Rm = 0b1111; 784 let Inst{5-4} = Rn{5-4}; 785 let DecoderMethod = "DecodeVLDST2Instruction"; 786} 787 788def VLD2d8 : VLD2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2>; 789def VLD2d16 : VLD2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2>; 790def VLD2d32 : VLD2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2>; 791 792def VLD2q8 : VLD2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2>; 793def VLD2q16 : VLD2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2>; 794def VLD2q32 : VLD2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2>; 795 796def VLD2q8Pseudo : VLDQQPseudo<IIC_VLD2x2>; 797def VLD2q16Pseudo : VLDQQPseudo<IIC_VLD2x2>; 798def VLD2q32Pseudo : VLDQQPseudo<IIC_VLD2x2>; 799 800// ...with address register writeback: 801multiclass VLD2WB<bits<4> op11_8, bits<4> op7_4, string Dt, 802 RegisterOperand VdTy, InstrItinClass itin> { 803 def _fixed : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb), 804 (ins addrmode6:$Rn), itin, 805 "vld2", Dt, "$Vd, $Rn!", 806 "$Rn.addr = $wb", []> { 807 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 808 let Inst{5-4} = Rn{5-4}; 809 let DecoderMethod = "DecodeVLDST2Instruction"; 810 } 811 def _register : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb), 812 (ins addrmode6:$Rn, rGPR:$Rm), itin, 813 "vld2", Dt, "$Vd, $Rn, $Rm", 814 "$Rn.addr = $wb", []> { 815 let Inst{5-4} = Rn{5-4}; 816 let DecoderMethod = "DecodeVLDST2Instruction"; 817 } 818} 819 820defm VLD2d8wb : VLD2WB<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2u>; 821defm VLD2d16wb : VLD2WB<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2u>; 822defm VLD2d32wb : VLD2WB<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2u>; 823 824defm VLD2q8wb : VLD2WB<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2u>; 825defm VLD2q16wb : VLD2WB<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2u>; 826defm VLD2q32wb : VLD2WB<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2u>; 827 828def VLD2q8PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>; 829def VLD2q16PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>; 830def VLD2q32PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>; 831def VLD2q8PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>; 832def VLD2q16PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>; 833def VLD2q32PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>; 834 835// ...with double-spaced registers 836def VLD2b8 : VLD2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2>; 837def VLD2b16 : VLD2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2>; 838def VLD2b32 : VLD2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2>; 839defm VLD2b8wb : VLD2WB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2u>; 840defm VLD2b16wb : VLD2WB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2u>; 841defm VLD2b32wb : VLD2WB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2u>; 842 843// VLD3 : Vector Load (multiple 3-element structures) 844class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt> 845 : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), 846 (ins addrmode6:$Rn), IIC_VLD3, 847 "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []> { 848 let Rm = 0b1111; 849 let Inst{4} = Rn{4}; 850 let DecoderMethod = "DecodeVLDST3Instruction"; 851} 852 853def VLD3d8 : VLD3D<0b0100, {0,0,0,?}, "8">; 854def VLD3d16 : VLD3D<0b0100, {0,1,0,?}, "16">; 855def VLD3d32 : VLD3D<0b0100, {1,0,0,?}, "32">; 856 857def VLD3d8Pseudo : VLDQQPseudo<IIC_VLD3>; 858def VLD3d16Pseudo : VLDQQPseudo<IIC_VLD3>; 859def VLD3d32Pseudo : VLDQQPseudo<IIC_VLD3>; 860 861// ...with address register writeback: 862class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 863 : NLdSt<0, 0b10, op11_8, op7_4, 864 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), 865 (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD3u, 866 "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm", 867 "$Rn.addr = $wb", []> { 868 let Inst{4} = Rn{4}; 869 let DecoderMethod = "DecodeVLDST3Instruction"; 870} 871 872def VLD3d8_UPD : VLD3DWB<0b0100, {0,0,0,?}, "8">; 873def VLD3d16_UPD : VLD3DWB<0b0100, {0,1,0,?}, "16">; 874def VLD3d32_UPD : VLD3DWB<0b0100, {1,0,0,?}, "32">; 875 876def VLD3d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>; 877def VLD3d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>; 878def VLD3d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>; 879 880// ...with double-spaced registers: 881def VLD3q8 : VLD3D<0b0101, {0,0,0,?}, "8">; 882def VLD3q16 : VLD3D<0b0101, {0,1,0,?}, "16">; 883def VLD3q32 : VLD3D<0b0101, {1,0,0,?}, "32">; 884def VLD3q8_UPD : VLD3DWB<0b0101, {0,0,0,?}, "8">; 885def VLD3q16_UPD : VLD3DWB<0b0101, {0,1,0,?}, "16">; 886def VLD3q32_UPD : VLD3DWB<0b0101, {1,0,0,?}, "32">; 887 888def VLD3q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; 889def VLD3q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; 890def VLD3q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; 891 892// ...alternate versions to be allocated odd register numbers: 893def VLD3q8oddPseudo : VLDQQQQPseudo<IIC_VLD3>; 894def VLD3q16oddPseudo : VLDQQQQPseudo<IIC_VLD3>; 895def VLD3q32oddPseudo : VLDQQQQPseudo<IIC_VLD3>; 896 897def VLD3q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; 898def VLD3q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; 899def VLD3q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; 900 901// VLD4 : Vector Load (multiple 4-element structures) 902class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt> 903 : NLdSt<0, 0b10, op11_8, op7_4, 904 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), 905 (ins addrmode6:$Rn), IIC_VLD4, 906 "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> { 907 let Rm = 0b1111; 908 let Inst{5-4} = Rn{5-4}; 909 let DecoderMethod = "DecodeVLDST4Instruction"; 910} 911 912def VLD4d8 : VLD4D<0b0000, {0,0,?,?}, "8">; 913def VLD4d16 : VLD4D<0b0000, {0,1,?,?}, "16">; 914def VLD4d32 : VLD4D<0b0000, {1,0,?,?}, "32">; 915 916def VLD4d8Pseudo : VLDQQPseudo<IIC_VLD4>; 917def VLD4d16Pseudo : VLDQQPseudo<IIC_VLD4>; 918def VLD4d32Pseudo : VLDQQPseudo<IIC_VLD4>; 919 920// ...with address register writeback: 921class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 922 : NLdSt<0, 0b10, op11_8, op7_4, 923 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), 924 (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD4u, 925 "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm", 926 "$Rn.addr = $wb", []> { 927 let Inst{5-4} = Rn{5-4}; 928 let DecoderMethod = "DecodeVLDST4Instruction"; 929} 930 931def VLD4d8_UPD : VLD4DWB<0b0000, {0,0,?,?}, "8">; 932def VLD4d16_UPD : VLD4DWB<0b0000, {0,1,?,?}, "16">; 933def VLD4d32_UPD : VLD4DWB<0b0000, {1,0,?,?}, "32">; 934 935def VLD4d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>; 936def VLD4d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>; 937def VLD4d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>; 938 939// ...with double-spaced registers: 940def VLD4q8 : VLD4D<0b0001, {0,0,?,?}, "8">; 941def VLD4q16 : VLD4D<0b0001, {0,1,?,?}, "16">; 942def VLD4q32 : VLD4D<0b0001, {1,0,?,?}, "32">; 943def VLD4q8_UPD : VLD4DWB<0b0001, {0,0,?,?}, "8">; 944def VLD4q16_UPD : VLD4DWB<0b0001, {0,1,?,?}, "16">; 945def VLD4q32_UPD : VLD4DWB<0b0001, {1,0,?,?}, "32">; 946 947def VLD4q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; 948def VLD4q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; 949def VLD4q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; 950 951// ...alternate versions to be allocated odd register numbers: 952def VLD4q8oddPseudo : VLDQQQQPseudo<IIC_VLD4>; 953def VLD4q16oddPseudo : VLDQQQQPseudo<IIC_VLD4>; 954def VLD4q32oddPseudo : VLDQQQQPseudo<IIC_VLD4>; 955 956def VLD4q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; 957def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; 958def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; 959 960} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 961 962// Classes for VLD*LN pseudo-instructions with multi-register operands. 963// These are expanded to real instructions after register allocation. 964class VLDQLNPseudo<InstrItinClass itin> 965 : PseudoNLdSt<(outs QPR:$dst), 966 (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane), 967 itin, "$src = $dst">; 968class VLDQLNWBPseudo<InstrItinClass itin> 969 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 970 (ins addrmode6:$addr, am6offset:$offset, QPR:$src, 971 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">; 972class VLDQQLNPseudo<InstrItinClass itin> 973 : PseudoNLdSt<(outs QQPR:$dst), 974 (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane), 975 itin, "$src = $dst">; 976class VLDQQLNWBPseudo<InstrItinClass itin> 977 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), 978 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src, 979 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">; 980class VLDQQQQLNPseudo<InstrItinClass itin> 981 : PseudoNLdSt<(outs QQQQPR:$dst), 982 (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane), 983 itin, "$src = $dst">; 984class VLDQQQQLNWBPseudo<InstrItinClass itin> 985 : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb), 986 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src, 987 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">; 988 989// VLD1LN : Vector Load (single element to one lane) 990class VLD1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 991 PatFrag LoadOp> 992 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd), 993 (ins addrmode6:$Rn, DPR:$src, nohash_imm:$lane), 994 IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn", 995 "$src = $Vd", 996 [(set DPR:$Vd, (vector_insert (Ty DPR:$src), 997 (i32 (LoadOp addrmode6:$Rn)), 998 imm:$lane))]> { 999 let Rm = 0b1111; 1000 let DecoderMethod = "DecodeVLD1LN"; 1001} 1002class VLD1LN32<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 1003 PatFrag LoadOp> 1004 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd), 1005 (ins addrmode6oneL32:$Rn, DPR:$src, nohash_imm:$lane), 1006 IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn", 1007 "$src = $Vd", 1008 [(set DPR:$Vd, (vector_insert (Ty DPR:$src), 1009 (i32 (LoadOp addrmode6oneL32:$Rn)), 1010 imm:$lane))]> { 1011 let Rm = 0b1111; 1012 let DecoderMethod = "DecodeVLD1LN"; 1013} 1014class VLD1QLNPseudo<ValueType Ty, PatFrag LoadOp> : VLDQLNPseudo<IIC_VLD1ln> { 1015 let Pattern = [(set QPR:$dst, (vector_insert (Ty QPR:$src), 1016 (i32 (LoadOp addrmode6:$addr)), 1017 imm:$lane))]; 1018} 1019 1020def VLD1LNd8 : VLD1LN<0b0000, {?,?,?,0}, "8", v8i8, extloadi8> { 1021 let Inst{7-5} = lane{2-0}; 1022} 1023def VLD1LNd16 : VLD1LN<0b0100, {?,?,0,?}, "16", v4i16, extloadi16> { 1024 let Inst{7-6} = lane{1-0}; 1025 let Inst{5-4} = Rn{5-4}; 1026} 1027def VLD1LNd32 : VLD1LN32<0b1000, {?,0,?,?}, "32", v2i32, load> { 1028 let Inst{7} = lane{0}; 1029 let Inst{5-4} = Rn{5-4}; 1030} 1031 1032def VLD1LNq8Pseudo : VLD1QLNPseudo<v16i8, extloadi8>; 1033def VLD1LNq16Pseudo : VLD1QLNPseudo<v8i16, extloadi16>; 1034def VLD1LNq32Pseudo : VLD1QLNPseudo<v4i32, load>; 1035 1036def : Pat<(vector_insert (v2f32 DPR:$src), 1037 (f32 (load addrmode6:$addr)), imm:$lane), 1038 (VLD1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>; 1039def : Pat<(vector_insert (v4f32 QPR:$src), 1040 (f32 (load addrmode6:$addr)), imm:$lane), 1041 (VLD1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>; 1042 1043let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { 1044 1045// ...with address register writeback: 1046class VLD1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1047 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, GPR:$wb), 1048 (ins addrmode6:$Rn, am6offset:$Rm, 1049 DPR:$src, nohash_imm:$lane), IIC_VLD1lnu, "vld1", Dt, 1050 "\\{$Vd[$lane]\\}, $Rn$Rm", 1051 "$src = $Vd, $Rn.addr = $wb", []> { 1052 let DecoderMethod = "DecodeVLD1LN"; 1053} 1054 1055def VLD1LNd8_UPD : VLD1LNWB<0b0000, {?,?,?,0}, "8"> { 1056 let Inst{7-5} = lane{2-0}; 1057} 1058def VLD1LNd16_UPD : VLD1LNWB<0b0100, {?,?,0,?}, "16"> { 1059 let Inst{7-6} = lane{1-0}; 1060 let Inst{4} = Rn{4}; 1061} 1062def VLD1LNd32_UPD : VLD1LNWB<0b1000, {?,0,?,?}, "32"> { 1063 let Inst{7} = lane{0}; 1064 let Inst{5} = Rn{4}; 1065 let Inst{4} = Rn{4}; 1066} 1067 1068def VLD1LNq8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>; 1069def VLD1LNq16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>; 1070def VLD1LNq32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>; 1071 1072// VLD2LN : Vector Load (single 2-element structure to one lane) 1073class VLD2LN<bits<4> op11_8, bits<4> op7_4, string Dt> 1074 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2), 1075 (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, nohash_imm:$lane), 1076 IIC_VLD2ln, "vld2", Dt, "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn", 1077 "$src1 = $Vd, $src2 = $dst2", []> { 1078 let Rm = 0b1111; 1079 let Inst{4} = Rn{4}; 1080 let DecoderMethod = "DecodeVLD2LN"; 1081} 1082 1083def VLD2LNd8 : VLD2LN<0b0001, {?,?,?,?}, "8"> { 1084 let Inst{7-5} = lane{2-0}; 1085} 1086def VLD2LNd16 : VLD2LN<0b0101, {?,?,0,?}, "16"> { 1087 let Inst{7-6} = lane{1-0}; 1088} 1089def VLD2LNd32 : VLD2LN<0b1001, {?,0,0,?}, "32"> { 1090 let Inst{7} = lane{0}; 1091} 1092 1093def VLD2LNd8Pseudo : VLDQLNPseudo<IIC_VLD2ln>; 1094def VLD2LNd16Pseudo : VLDQLNPseudo<IIC_VLD2ln>; 1095def VLD2LNd32Pseudo : VLDQLNPseudo<IIC_VLD2ln>; 1096 1097// ...with double-spaced registers: 1098def VLD2LNq16 : VLD2LN<0b0101, {?,?,1,?}, "16"> { 1099 let Inst{7-6} = lane{1-0}; 1100} 1101def VLD2LNq32 : VLD2LN<0b1001, {?,1,0,?}, "32"> { 1102 let Inst{7} = lane{0}; 1103} 1104 1105def VLD2LNq16Pseudo : VLDQQLNPseudo<IIC_VLD2ln>; 1106def VLD2LNq32Pseudo : VLDQQLNPseudo<IIC_VLD2ln>; 1107 1108// ...with address register writeback: 1109class VLD2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1110 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb), 1111 (ins addrmode6:$Rn, am6offset:$Rm, 1112 DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VLD2lnu, "vld2", Dt, 1113 "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn$Rm", 1114 "$src1 = $Vd, $src2 = $dst2, $Rn.addr = $wb", []> { 1115 let Inst{4} = Rn{4}; 1116 let DecoderMethod = "DecodeVLD2LN"; 1117} 1118 1119def VLD2LNd8_UPD : VLD2LNWB<0b0001, {?,?,?,?}, "8"> { 1120 let Inst{7-5} = lane{2-0}; 1121} 1122def VLD2LNd16_UPD : VLD2LNWB<0b0101, {?,?,0,?}, "16"> { 1123 let Inst{7-6} = lane{1-0}; 1124} 1125def VLD2LNd32_UPD : VLD2LNWB<0b1001, {?,0,0,?}, "32"> { 1126 let Inst{7} = lane{0}; 1127} 1128 1129def VLD2LNd8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>; 1130def VLD2LNd16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>; 1131def VLD2LNd32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>; 1132 1133def VLD2LNq16_UPD : VLD2LNWB<0b0101, {?,?,1,?}, "16"> { 1134 let Inst{7-6} = lane{1-0}; 1135} 1136def VLD2LNq32_UPD : VLD2LNWB<0b1001, {?,1,0,?}, "32"> { 1137 let Inst{7} = lane{0}; 1138} 1139 1140def VLD2LNq16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>; 1141def VLD2LNq32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>; 1142 1143// VLD3LN : Vector Load (single 3-element structure to one lane) 1144class VLD3LN<bits<4> op11_8, bits<4> op7_4, string Dt> 1145 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), 1146 (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, 1147 nohash_imm:$lane), IIC_VLD3ln, "vld3", Dt, 1148 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn", 1149 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3", []> { 1150 let Rm = 0b1111; 1151 let DecoderMethod = "DecodeVLD3LN"; 1152} 1153 1154def VLD3LNd8 : VLD3LN<0b0010, {?,?,?,0}, "8"> { 1155 let Inst{7-5} = lane{2-0}; 1156} 1157def VLD3LNd16 : VLD3LN<0b0110, {?,?,0,0}, "16"> { 1158 let Inst{7-6} = lane{1-0}; 1159} 1160def VLD3LNd32 : VLD3LN<0b1010, {?,0,0,0}, "32"> { 1161 let Inst{7} = lane{0}; 1162} 1163 1164def VLD3LNd8Pseudo : VLDQQLNPseudo<IIC_VLD3ln>; 1165def VLD3LNd16Pseudo : VLDQQLNPseudo<IIC_VLD3ln>; 1166def VLD3LNd32Pseudo : VLDQQLNPseudo<IIC_VLD3ln>; 1167 1168// ...with double-spaced registers: 1169def VLD3LNq16 : VLD3LN<0b0110, {?,?,1,0}, "16"> { 1170 let Inst{7-6} = lane{1-0}; 1171} 1172def VLD3LNq32 : VLD3LN<0b1010, {?,1,0,0}, "32"> { 1173 let Inst{7} = lane{0}; 1174} 1175 1176def VLD3LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>; 1177def VLD3LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>; 1178 1179// ...with address register writeback: 1180class VLD3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1181 : NLdStLn<1, 0b10, op11_8, op7_4, 1182 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), 1183 (ins addrmode6:$Rn, am6offset:$Rm, 1184 DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane), 1185 IIC_VLD3lnu, "vld3", Dt, 1186 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn$Rm", 1187 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $Rn.addr = $wb", 1188 []> { 1189 let DecoderMethod = "DecodeVLD3LN"; 1190} 1191 1192def VLD3LNd8_UPD : VLD3LNWB<0b0010, {?,?,?,0}, "8"> { 1193 let Inst{7-5} = lane{2-0}; 1194} 1195def VLD3LNd16_UPD : VLD3LNWB<0b0110, {?,?,0,0}, "16"> { 1196 let Inst{7-6} = lane{1-0}; 1197} 1198def VLD3LNd32_UPD : VLD3LNWB<0b1010, {?,0,0,0}, "32"> { 1199 let Inst{7} = lane{0}; 1200} 1201 1202def VLD3LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>; 1203def VLD3LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>; 1204def VLD3LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>; 1205 1206def VLD3LNq16_UPD : VLD3LNWB<0b0110, {?,?,1,0}, "16"> { 1207 let Inst{7-6} = lane{1-0}; 1208} 1209def VLD3LNq32_UPD : VLD3LNWB<0b1010, {?,1,0,0}, "32"> { 1210 let Inst{7} = lane{0}; 1211} 1212 1213def VLD3LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>; 1214def VLD3LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>; 1215 1216// VLD4LN : Vector Load (single 4-element structure to one lane) 1217class VLD4LN<bits<4> op11_8, bits<4> op7_4, string Dt> 1218 : NLdStLn<1, 0b10, op11_8, op7_4, 1219 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), 1220 (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, 1221 nohash_imm:$lane), IIC_VLD4ln, "vld4", Dt, 1222 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn", 1223 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []> { 1224 let Rm = 0b1111; 1225 let Inst{4} = Rn{4}; 1226 let DecoderMethod = "DecodeVLD4LN"; 1227} 1228 1229def VLD4LNd8 : VLD4LN<0b0011, {?,?,?,?}, "8"> { 1230 let Inst{7-5} = lane{2-0}; 1231} 1232def VLD4LNd16 : VLD4LN<0b0111, {?,?,0,?}, "16"> { 1233 let Inst{7-6} = lane{1-0}; 1234} 1235def VLD4LNd32 : VLD4LN<0b1011, {?,0,?,?}, "32"> { 1236 let Inst{7} = lane{0}; 1237 let Inst{5} = Rn{5}; 1238} 1239 1240def VLD4LNd8Pseudo : VLDQQLNPseudo<IIC_VLD4ln>; 1241def VLD4LNd16Pseudo : VLDQQLNPseudo<IIC_VLD4ln>; 1242def VLD4LNd32Pseudo : VLDQQLNPseudo<IIC_VLD4ln>; 1243 1244// ...with double-spaced registers: 1245def VLD4LNq16 : VLD4LN<0b0111, {?,?,1,?}, "16"> { 1246 let Inst{7-6} = lane{1-0}; 1247} 1248def VLD4LNq32 : VLD4LN<0b1011, {?,1,?,?}, "32"> { 1249 let Inst{7} = lane{0}; 1250 let Inst{5} = Rn{5}; 1251} 1252 1253def VLD4LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>; 1254def VLD4LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>; 1255 1256// ...with address register writeback: 1257class VLD4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1258 : NLdStLn<1, 0b10, op11_8, op7_4, 1259 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), 1260 (ins addrmode6:$Rn, am6offset:$Rm, 1261 DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), 1262 IIC_VLD4lnu, "vld4", Dt, 1263"\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn$Rm", 1264"$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4, $Rn.addr = $wb", 1265 []> { 1266 let Inst{4} = Rn{4}; 1267 let DecoderMethod = "DecodeVLD4LN" ; 1268} 1269 1270def VLD4LNd8_UPD : VLD4LNWB<0b0011, {?,?,?,?}, "8"> { 1271 let Inst{7-5} = lane{2-0}; 1272} 1273def VLD4LNd16_UPD : VLD4LNWB<0b0111, {?,?,0,?}, "16"> { 1274 let Inst{7-6} = lane{1-0}; 1275} 1276def VLD4LNd32_UPD : VLD4LNWB<0b1011, {?,0,?,?}, "32"> { 1277 let Inst{7} = lane{0}; 1278 let Inst{5} = Rn{5}; 1279} 1280 1281def VLD4LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>; 1282def VLD4LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>; 1283def VLD4LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>; 1284 1285def VLD4LNq16_UPD : VLD4LNWB<0b0111, {?,?,1,?}, "16"> { 1286 let Inst{7-6} = lane{1-0}; 1287} 1288def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32"> { 1289 let Inst{7} = lane{0}; 1290 let Inst{5} = Rn{5}; 1291} 1292 1293def VLD4LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>; 1294def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>; 1295 1296} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 1297 1298// VLD1DUP : Vector Load (single element to all lanes) 1299class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp> 1300 : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListOneDAllLanes:$Vd), 1301 (ins addrmode6dup:$Rn), 1302 IIC_VLD1dup, "vld1", Dt, "$Vd, $Rn", "", 1303 [(set VecListOneDAllLanes:$Vd, 1304 (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> { 1305 let Rm = 0b1111; 1306 let Inst{4} = Rn{4}; 1307 let DecoderMethod = "DecodeVLD1DupInstruction"; 1308} 1309def VLD1DUPd8 : VLD1DUP<{0,0,0,?}, "8", v8i8, extloadi8>; 1310def VLD1DUPd16 : VLD1DUP<{0,1,0,?}, "16", v4i16, extloadi16>; 1311def VLD1DUPd32 : VLD1DUP<{1,0,0,?}, "32", v2i32, load>; 1312 1313def : Pat<(v2f32 (NEONvdup (f32 (load addrmode6dup:$addr)))), 1314 (VLD1DUPd32 addrmode6:$addr)>; 1315 1316class VLD1QDUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp> 1317 : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListDPairAllLanes:$Vd), 1318 (ins addrmode6dup:$Rn), IIC_VLD1dup, 1319 "vld1", Dt, "$Vd, $Rn", "", 1320 [(set VecListDPairAllLanes:$Vd, 1321 (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> { 1322 let Rm = 0b1111; 1323 let Inst{4} = Rn{4}; 1324 let DecoderMethod = "DecodeVLD1DupInstruction"; 1325} 1326 1327def VLD1DUPq8 : VLD1QDUP<{0,0,1,0}, "8", v16i8, extloadi8>; 1328def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16", v8i16, extloadi16>; 1329def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32", v4i32, load>; 1330 1331def : Pat<(v4f32 (NEONvdup (f32 (load addrmode6dup:$addr)))), 1332 (VLD1DUPq32 addrmode6:$addr)>; 1333 1334let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { 1335// ...with address register writeback: 1336multiclass VLD1DUPWB<bits<4> op7_4, string Dt> { 1337 def _fixed : NLdSt<1, 0b10, 0b1100, op7_4, 1338 (outs VecListOneDAllLanes:$Vd, GPR:$wb), 1339 (ins addrmode6dup:$Rn), IIC_VLD1dupu, 1340 "vld1", Dt, "$Vd, $Rn!", 1341 "$Rn.addr = $wb", []> { 1342 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1343 let Inst{4} = Rn{4}; 1344 let DecoderMethod = "DecodeVLD1DupInstruction"; 1345 } 1346 def _register : NLdSt<1, 0b10, 0b1100, op7_4, 1347 (outs VecListOneDAllLanes:$Vd, GPR:$wb), 1348 (ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD1dupu, 1349 "vld1", Dt, "$Vd, $Rn, $Rm", 1350 "$Rn.addr = $wb", []> { 1351 let Inst{4} = Rn{4}; 1352 let DecoderMethod = "DecodeVLD1DupInstruction"; 1353 } 1354} 1355multiclass VLD1QDUPWB<bits<4> op7_4, string Dt> { 1356 def _fixed : NLdSt<1, 0b10, 0b1100, op7_4, 1357 (outs VecListDPairAllLanes:$Vd, GPR:$wb), 1358 (ins addrmode6dup:$Rn), IIC_VLD1dupu, 1359 "vld1", Dt, "$Vd, $Rn!", 1360 "$Rn.addr = $wb", []> { 1361 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1362 let Inst{4} = Rn{4}; 1363 let DecoderMethod = "DecodeVLD1DupInstruction"; 1364 } 1365 def _register : NLdSt<1, 0b10, 0b1100, op7_4, 1366 (outs VecListDPairAllLanes:$Vd, GPR:$wb), 1367 (ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD1dupu, 1368 "vld1", Dt, "$Vd, $Rn, $Rm", 1369 "$Rn.addr = $wb", []> { 1370 let Inst{4} = Rn{4}; 1371 let DecoderMethod = "DecodeVLD1DupInstruction"; 1372 } 1373} 1374 1375defm VLD1DUPd8wb : VLD1DUPWB<{0,0,0,0}, "8">; 1376defm VLD1DUPd16wb : VLD1DUPWB<{0,1,0,?}, "16">; 1377defm VLD1DUPd32wb : VLD1DUPWB<{1,0,0,?}, "32">; 1378 1379defm VLD1DUPq8wb : VLD1QDUPWB<{0,0,1,0}, "8">; 1380defm VLD1DUPq16wb : VLD1QDUPWB<{0,1,1,?}, "16">; 1381defm VLD1DUPq32wb : VLD1QDUPWB<{1,0,1,?}, "32">; 1382 1383// VLD2DUP : Vector Load (single 2-element structure to all lanes) 1384class VLD2DUP<bits<4> op7_4, string Dt, RegisterOperand VdTy> 1385 : NLdSt<1, 0b10, 0b1101, op7_4, (outs VdTy:$Vd), 1386 (ins addrmode6dup:$Rn), IIC_VLD2dup, 1387 "vld2", Dt, "$Vd, $Rn", "", []> { 1388 let Rm = 0b1111; 1389 let Inst{4} = Rn{4}; 1390 let DecoderMethod = "DecodeVLD2DupInstruction"; 1391} 1392 1393def VLD2DUPd8 : VLD2DUP<{0,0,0,?}, "8", VecListDPairAllLanes>; 1394def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16", VecListDPairAllLanes>; 1395def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32", VecListDPairAllLanes>; 1396 1397// ...with double-spaced registers 1398def VLD2DUPd8x2 : VLD2DUP<{0,0,1,?}, "8", VecListDPairSpacedAllLanes>; 1399def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16", VecListDPairSpacedAllLanes>; 1400def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32", VecListDPairSpacedAllLanes>; 1401 1402// ...with address register writeback: 1403multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy> { 1404 def _fixed : NLdSt<1, 0b10, 0b1101, op7_4, 1405 (outs VdTy:$Vd, GPR:$wb), 1406 (ins addrmode6dup:$Rn), IIC_VLD2dupu, 1407 "vld2", Dt, "$Vd, $Rn!", 1408 "$Rn.addr = $wb", []> { 1409 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1410 let Inst{4} = Rn{4}; 1411 let DecoderMethod = "DecodeVLD2DupInstruction"; 1412 } 1413 def _register : NLdSt<1, 0b10, 0b1101, op7_4, 1414 (outs VdTy:$Vd, GPR:$wb), 1415 (ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD2dupu, 1416 "vld2", Dt, "$Vd, $Rn, $Rm", 1417 "$Rn.addr = $wb", []> { 1418 let Inst{4} = Rn{4}; 1419 let DecoderMethod = "DecodeVLD2DupInstruction"; 1420 } 1421} 1422 1423defm VLD2DUPd8wb : VLD2DUPWB<{0,0,0,0}, "8", VecListDPairAllLanes>; 1424defm VLD2DUPd16wb : VLD2DUPWB<{0,1,0,?}, "16", VecListDPairAllLanes>; 1425defm VLD2DUPd32wb : VLD2DUPWB<{1,0,0,?}, "32", VecListDPairAllLanes>; 1426 1427defm VLD2DUPd8x2wb : VLD2DUPWB<{0,0,1,0}, "8", VecListDPairSpacedAllLanes>; 1428defm VLD2DUPd16x2wb : VLD2DUPWB<{0,1,1,?}, "16", VecListDPairSpacedAllLanes>; 1429defm VLD2DUPd32x2wb : VLD2DUPWB<{1,0,1,?}, "32", VecListDPairSpacedAllLanes>; 1430 1431// VLD3DUP : Vector Load (single 3-element structure to all lanes) 1432class VLD3DUP<bits<4> op7_4, string Dt> 1433 : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), 1434 (ins addrmode6dup:$Rn), IIC_VLD3dup, 1435 "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn", "", []> { 1436 let Rm = 0b1111; 1437 let Inst{4} = 0; 1438 let DecoderMethod = "DecodeVLD3DupInstruction"; 1439} 1440 1441def VLD3DUPd8 : VLD3DUP<{0,0,0,?}, "8">; 1442def VLD3DUPd16 : VLD3DUP<{0,1,0,?}, "16">; 1443def VLD3DUPd32 : VLD3DUP<{1,0,0,?}, "32">; 1444 1445def VLD3DUPd8Pseudo : VLDQQPseudo<IIC_VLD3dup>; 1446def VLD3DUPd16Pseudo : VLDQQPseudo<IIC_VLD3dup>; 1447def VLD3DUPd32Pseudo : VLDQQPseudo<IIC_VLD3dup>; 1448 1449// ...with double-spaced registers (not used for codegen): 1450def VLD3DUPq8 : VLD3DUP<{0,0,1,?}, "8">; 1451def VLD3DUPq16 : VLD3DUP<{0,1,1,?}, "16">; 1452def VLD3DUPq32 : VLD3DUP<{1,0,1,?}, "32">; 1453 1454// ...with address register writeback: 1455class VLD3DUPWB<bits<4> op7_4, string Dt> 1456 : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), 1457 (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD3dupu, 1458 "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn$Rm", 1459 "$Rn.addr = $wb", []> { 1460 let Inst{4} = 0; 1461 let DecoderMethod = "DecodeVLD3DupInstruction"; 1462} 1463 1464def VLD3DUPd8_UPD : VLD3DUPWB<{0,0,0,0}, "8">; 1465def VLD3DUPd16_UPD : VLD3DUPWB<{0,1,0,?}, "16">; 1466def VLD3DUPd32_UPD : VLD3DUPWB<{1,0,0,?}, "32">; 1467 1468def VLD3DUPq8_UPD : VLD3DUPWB<{0,0,1,0}, "8">; 1469def VLD3DUPq16_UPD : VLD3DUPWB<{0,1,1,?}, "16">; 1470def VLD3DUPq32_UPD : VLD3DUPWB<{1,0,1,?}, "32">; 1471 1472def VLD3DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>; 1473def VLD3DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>; 1474def VLD3DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>; 1475 1476// VLD4DUP : Vector Load (single 4-element structure to all lanes) 1477class VLD4DUP<bits<4> op7_4, string Dt> 1478 : NLdSt<1, 0b10, 0b1111, op7_4, 1479 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), 1480 (ins addrmode6dup:$Rn), IIC_VLD4dup, 1481 "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn", "", []> { 1482 let Rm = 0b1111; 1483 let Inst{4} = Rn{4}; 1484 let DecoderMethod = "DecodeVLD4DupInstruction"; 1485} 1486 1487def VLD4DUPd8 : VLD4DUP<{0,0,0,?}, "8">; 1488def VLD4DUPd16 : VLD4DUP<{0,1,0,?}, "16">; 1489def VLD4DUPd32 : VLD4DUP<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; } 1490 1491def VLD4DUPd8Pseudo : VLDQQPseudo<IIC_VLD4dup>; 1492def VLD4DUPd16Pseudo : VLDQQPseudo<IIC_VLD4dup>; 1493def VLD4DUPd32Pseudo : VLDQQPseudo<IIC_VLD4dup>; 1494 1495// ...with double-spaced registers (not used for codegen): 1496def VLD4DUPq8 : VLD4DUP<{0,0,1,?}, "8">; 1497def VLD4DUPq16 : VLD4DUP<{0,1,1,?}, "16">; 1498def VLD4DUPq32 : VLD4DUP<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; } 1499 1500// ...with address register writeback: 1501class VLD4DUPWB<bits<4> op7_4, string Dt> 1502 : NLdSt<1, 0b10, 0b1111, op7_4, 1503 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), 1504 (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD4dupu, 1505 "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn$Rm", 1506 "$Rn.addr = $wb", []> { 1507 let Inst{4} = Rn{4}; 1508 let DecoderMethod = "DecodeVLD4DupInstruction"; 1509} 1510 1511def VLD4DUPd8_UPD : VLD4DUPWB<{0,0,0,0}, "8">; 1512def VLD4DUPd16_UPD : VLD4DUPWB<{0,1,0,?}, "16">; 1513def VLD4DUPd32_UPD : VLD4DUPWB<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; } 1514 1515def VLD4DUPq8_UPD : VLD4DUPWB<{0,0,1,0}, "8">; 1516def VLD4DUPq16_UPD : VLD4DUPWB<{0,1,1,?}, "16">; 1517def VLD4DUPq32_UPD : VLD4DUPWB<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; } 1518 1519def VLD4DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>; 1520def VLD4DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>; 1521def VLD4DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>; 1522 1523} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 1524 1525let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in { 1526 1527// Classes for VST* pseudo-instructions with multi-register operands. 1528// These are expanded to real instructions after register allocation. 1529class VSTQPseudo<InstrItinClass itin> 1530 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src), itin, "">; 1531class VSTQWBPseudo<InstrItinClass itin> 1532 : PseudoNLdSt<(outs GPR:$wb), 1533 (ins addrmode6:$addr, am6offset:$offset, QPR:$src), itin, 1534 "$addr.addr = $wb">; 1535class VSTQWBfixedPseudo<InstrItinClass itin> 1536 : PseudoNLdSt<(outs GPR:$wb), 1537 (ins addrmode6:$addr, QPR:$src), itin, 1538 "$addr.addr = $wb">; 1539class VSTQWBregisterPseudo<InstrItinClass itin> 1540 : PseudoNLdSt<(outs GPR:$wb), 1541 (ins addrmode6:$addr, rGPR:$offset, QPR:$src), itin, 1542 "$addr.addr = $wb">; 1543class VSTQQPseudo<InstrItinClass itin> 1544 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src), itin, "">; 1545class VSTQQWBPseudo<InstrItinClass itin> 1546 : PseudoNLdSt<(outs GPR:$wb), 1547 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src), itin, 1548 "$addr.addr = $wb">; 1549class VSTQQWBfixedPseudo<InstrItinClass itin> 1550 : PseudoNLdSt<(outs GPR:$wb), 1551 (ins addrmode6:$addr, QQPR:$src), itin, 1552 "$addr.addr = $wb">; 1553class VSTQQWBregisterPseudo<InstrItinClass itin> 1554 : PseudoNLdSt<(outs GPR:$wb), 1555 (ins addrmode6:$addr, rGPR:$offset, QQPR:$src), itin, 1556 "$addr.addr = $wb">; 1557 1558class VSTQQQQPseudo<InstrItinClass itin> 1559 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src), itin, "">; 1560class VSTQQQQWBPseudo<InstrItinClass itin> 1561 : PseudoNLdSt<(outs GPR:$wb), 1562 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin, 1563 "$addr.addr = $wb">; 1564 1565// VST1 : Vector Store (multiple single elements) 1566class VST1D<bits<4> op7_4, string Dt> 1567 : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$Rn, VecListOneD:$Vd), 1568 IIC_VST1, "vst1", Dt, "$Vd, $Rn", "", []> { 1569 let Rm = 0b1111; 1570 let Inst{4} = Rn{4}; 1571 let DecoderMethod = "DecodeVLDST1Instruction"; 1572} 1573class VST1Q<bits<4> op7_4, string Dt> 1574 : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins addrmode6:$Rn, VecListDPair:$Vd), 1575 IIC_VST1x2, "vst1", Dt, "$Vd, $Rn", "", []> { 1576 let Rm = 0b1111; 1577 let Inst{5-4} = Rn{5-4}; 1578 let DecoderMethod = "DecodeVLDST1Instruction"; 1579} 1580 1581def VST1d8 : VST1D<{0,0,0,?}, "8">; 1582def VST1d16 : VST1D<{0,1,0,?}, "16">; 1583def VST1d32 : VST1D<{1,0,0,?}, "32">; 1584def VST1d64 : VST1D<{1,1,0,?}, "64">; 1585 1586def VST1q8 : VST1Q<{0,0,?,?}, "8">; 1587def VST1q16 : VST1Q<{0,1,?,?}, "16">; 1588def VST1q32 : VST1Q<{1,0,?,?}, "32">; 1589def VST1q64 : VST1Q<{1,1,?,?}, "64">; 1590 1591// ...with address register writeback: 1592multiclass VST1DWB<bits<4> op7_4, string Dt> { 1593 def _fixed : NLdSt<0,0b00, 0b0111,op7_4, (outs GPR:$wb), 1594 (ins addrmode6:$Rn, VecListOneD:$Vd), IIC_VLD1u, 1595 "vst1", Dt, "$Vd, $Rn!", 1596 "$Rn.addr = $wb", []> { 1597 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1598 let Inst{4} = Rn{4}; 1599 let DecoderMethod = "DecodeVLDST1Instruction"; 1600 } 1601 def _register : NLdSt<0,0b00,0b0111,op7_4, (outs GPR:$wb), 1602 (ins addrmode6:$Rn, rGPR:$Rm, VecListOneD:$Vd), 1603 IIC_VLD1u, 1604 "vst1", Dt, "$Vd, $Rn, $Rm", 1605 "$Rn.addr = $wb", []> { 1606 let Inst{4} = Rn{4}; 1607 let DecoderMethod = "DecodeVLDST1Instruction"; 1608 } 1609} 1610multiclass VST1QWB<bits<4> op7_4, string Dt> { 1611 def _fixed : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb), 1612 (ins addrmode6:$Rn, VecListDPair:$Vd), IIC_VLD1x2u, 1613 "vst1", Dt, "$Vd, $Rn!", 1614 "$Rn.addr = $wb", []> { 1615 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1616 let Inst{5-4} = Rn{5-4}; 1617 let DecoderMethod = "DecodeVLDST1Instruction"; 1618 } 1619 def _register : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb), 1620 (ins addrmode6:$Rn, rGPR:$Rm, VecListDPair:$Vd), 1621 IIC_VLD1x2u, 1622 "vst1", Dt, "$Vd, $Rn, $Rm", 1623 "$Rn.addr = $wb", []> { 1624 let Inst{5-4} = Rn{5-4}; 1625 let DecoderMethod = "DecodeVLDST1Instruction"; 1626 } 1627} 1628 1629defm VST1d8wb : VST1DWB<{0,0,0,?}, "8">; 1630defm VST1d16wb : VST1DWB<{0,1,0,?}, "16">; 1631defm VST1d32wb : VST1DWB<{1,0,0,?}, "32">; 1632defm VST1d64wb : VST1DWB<{1,1,0,?}, "64">; 1633 1634defm VST1q8wb : VST1QWB<{0,0,?,?}, "8">; 1635defm VST1q16wb : VST1QWB<{0,1,?,?}, "16">; 1636defm VST1q32wb : VST1QWB<{1,0,?,?}, "32">; 1637defm VST1q64wb : VST1QWB<{1,1,?,?}, "64">; 1638 1639// ...with 3 registers 1640class VST1D3<bits<4> op7_4, string Dt> 1641 : NLdSt<0, 0b00, 0b0110, op7_4, (outs), 1642 (ins addrmode6:$Rn, VecListThreeD:$Vd), 1643 IIC_VST1x3, "vst1", Dt, "$Vd, $Rn", "", []> { 1644 let Rm = 0b1111; 1645 let Inst{4} = Rn{4}; 1646 let DecoderMethod = "DecodeVLDST1Instruction"; 1647} 1648multiclass VST1D3WB<bits<4> op7_4, string Dt> { 1649 def _fixed : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb), 1650 (ins addrmode6:$Rn, VecListThreeD:$Vd), IIC_VLD1x3u, 1651 "vst1", Dt, "$Vd, $Rn!", 1652 "$Rn.addr = $wb", []> { 1653 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1654 let Inst{5-4} = Rn{5-4}; 1655 let DecoderMethod = "DecodeVLDST1Instruction"; 1656 } 1657 def _register : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb), 1658 (ins addrmode6:$Rn, rGPR:$Rm, VecListThreeD:$Vd), 1659 IIC_VLD1x3u, 1660 "vst1", Dt, "$Vd, $Rn, $Rm", 1661 "$Rn.addr = $wb", []> { 1662 let Inst{5-4} = Rn{5-4}; 1663 let DecoderMethod = "DecodeVLDST1Instruction"; 1664 } 1665} 1666 1667def VST1d8T : VST1D3<{0,0,0,?}, "8">; 1668def VST1d16T : VST1D3<{0,1,0,?}, "16">; 1669def VST1d32T : VST1D3<{1,0,0,?}, "32">; 1670def VST1d64T : VST1D3<{1,1,0,?}, "64">; 1671 1672defm VST1d8Twb : VST1D3WB<{0,0,0,?}, "8">; 1673defm VST1d16Twb : VST1D3WB<{0,1,0,?}, "16">; 1674defm VST1d32Twb : VST1D3WB<{1,0,0,?}, "32">; 1675defm VST1d64Twb : VST1D3WB<{1,1,0,?}, "64">; 1676 1677def VST1d64TPseudo : VSTQQPseudo<IIC_VST1x3>; 1678def VST1d64TPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x3u>; 1679def VST1d64TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>; 1680 1681// ...with 4 registers 1682class VST1D4<bits<4> op7_4, string Dt> 1683 : NLdSt<0, 0b00, 0b0010, op7_4, (outs), 1684 (ins addrmode6:$Rn, VecListFourD:$Vd), 1685 IIC_VST1x4, "vst1", Dt, "$Vd, $Rn", "", 1686 []> { 1687 let Rm = 0b1111; 1688 let Inst{5-4} = Rn{5-4}; 1689 let DecoderMethod = "DecodeVLDST1Instruction"; 1690} 1691multiclass VST1D4WB<bits<4> op7_4, string Dt> { 1692 def _fixed : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb), 1693 (ins addrmode6:$Rn, VecListFourD:$Vd), IIC_VLD1x4u, 1694 "vst1", Dt, "$Vd, $Rn!", 1695 "$Rn.addr = $wb", []> { 1696 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1697 let Inst{5-4} = Rn{5-4}; 1698 let DecoderMethod = "DecodeVLDST1Instruction"; 1699 } 1700 def _register : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb), 1701 (ins addrmode6:$Rn, rGPR:$Rm, VecListFourD:$Vd), 1702 IIC_VLD1x4u, 1703 "vst1", Dt, "$Vd, $Rn, $Rm", 1704 "$Rn.addr = $wb", []> { 1705 let Inst{5-4} = Rn{5-4}; 1706 let DecoderMethod = "DecodeVLDST1Instruction"; 1707 } 1708} 1709 1710def VST1d8Q : VST1D4<{0,0,?,?}, "8">; 1711def VST1d16Q : VST1D4<{0,1,?,?}, "16">; 1712def VST1d32Q : VST1D4<{1,0,?,?}, "32">; 1713def VST1d64Q : VST1D4<{1,1,?,?}, "64">; 1714 1715defm VST1d8Qwb : VST1D4WB<{0,0,?,?}, "8">; 1716defm VST1d16Qwb : VST1D4WB<{0,1,?,?}, "16">; 1717defm VST1d32Qwb : VST1D4WB<{1,0,?,?}, "32">; 1718defm VST1d64Qwb : VST1D4WB<{1,1,?,?}, "64">; 1719 1720def VST1d64QPseudo : VSTQQPseudo<IIC_VST1x4>; 1721def VST1d64QPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x4u>; 1722def VST1d64QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>; 1723 1724// VST2 : Vector Store (multiple 2-element structures) 1725class VST2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, 1726 InstrItinClass itin> 1727 : NLdSt<0, 0b00, op11_8, op7_4, (outs), (ins addrmode6:$Rn, VdTy:$Vd), 1728 itin, "vst2", Dt, "$Vd, $Rn", "", []> { 1729 let Rm = 0b1111; 1730 let Inst{5-4} = Rn{5-4}; 1731 let DecoderMethod = "DecodeVLDST2Instruction"; 1732} 1733 1734def VST2d8 : VST2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VST2>; 1735def VST2d16 : VST2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VST2>; 1736def VST2d32 : VST2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VST2>; 1737 1738def VST2q8 : VST2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VST2x2>; 1739def VST2q16 : VST2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VST2x2>; 1740def VST2q32 : VST2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VST2x2>; 1741 1742def VST2q8Pseudo : VSTQQPseudo<IIC_VST2x2>; 1743def VST2q16Pseudo : VSTQQPseudo<IIC_VST2x2>; 1744def VST2q32Pseudo : VSTQQPseudo<IIC_VST2x2>; 1745 1746// ...with address register writeback: 1747multiclass VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt, 1748 RegisterOperand VdTy> { 1749 def _fixed : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 1750 (ins addrmode6:$Rn, VdTy:$Vd), IIC_VLD1u, 1751 "vst2", Dt, "$Vd, $Rn!", 1752 "$Rn.addr = $wb", []> { 1753 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1754 let Inst{5-4} = Rn{5-4}; 1755 let DecoderMethod = "DecodeVLDST2Instruction"; 1756 } 1757 def _register : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 1758 (ins addrmode6:$Rn, rGPR:$Rm, VdTy:$Vd), IIC_VLD1u, 1759 "vst2", Dt, "$Vd, $Rn, $Rm", 1760 "$Rn.addr = $wb", []> { 1761 let Inst{5-4} = Rn{5-4}; 1762 let DecoderMethod = "DecodeVLDST2Instruction"; 1763 } 1764} 1765multiclass VST2QWB<bits<4> op7_4, string Dt> { 1766 def _fixed : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), 1767 (ins addrmode6:$Rn, VecListFourD:$Vd), IIC_VLD1u, 1768 "vst2", Dt, "$Vd, $Rn!", 1769 "$Rn.addr = $wb", []> { 1770 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1771 let Inst{5-4} = Rn{5-4}; 1772 let DecoderMethod = "DecodeVLDST2Instruction"; 1773 } 1774 def _register : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), 1775 (ins addrmode6:$Rn, rGPR:$Rm, VecListFourD:$Vd), 1776 IIC_VLD1u, 1777 "vst2", Dt, "$Vd, $Rn, $Rm", 1778 "$Rn.addr = $wb", []> { 1779 let Inst{5-4} = Rn{5-4}; 1780 let DecoderMethod = "DecodeVLDST2Instruction"; 1781 } 1782} 1783 1784defm VST2d8wb : VST2DWB<0b1000, {0,0,?,?}, "8", VecListDPair>; 1785defm VST2d16wb : VST2DWB<0b1000, {0,1,?,?}, "16", VecListDPair>; 1786defm VST2d32wb : VST2DWB<0b1000, {1,0,?,?}, "32", VecListDPair>; 1787 1788defm VST2q8wb : VST2QWB<{0,0,?,?}, "8">; 1789defm VST2q16wb : VST2QWB<{0,1,?,?}, "16">; 1790defm VST2q32wb : VST2QWB<{1,0,?,?}, "32">; 1791 1792def VST2q8PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>; 1793def VST2q16PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>; 1794def VST2q32PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>; 1795def VST2q8PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>; 1796def VST2q16PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>; 1797def VST2q32PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>; 1798 1799// ...with double-spaced registers 1800def VST2b8 : VST2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VST2>; 1801def VST2b16 : VST2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VST2>; 1802def VST2b32 : VST2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VST2>; 1803defm VST2b8wb : VST2DWB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced>; 1804defm VST2b16wb : VST2DWB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced>; 1805defm VST2b32wb : VST2DWB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced>; 1806 1807// VST3 : Vector Store (multiple 3-element structures) 1808class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt> 1809 : NLdSt<0, 0b00, op11_8, op7_4, (outs), 1810 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3, 1811 "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []> { 1812 let Rm = 0b1111; 1813 let Inst{4} = Rn{4}; 1814 let DecoderMethod = "DecodeVLDST3Instruction"; 1815} 1816 1817def VST3d8 : VST3D<0b0100, {0,0,0,?}, "8">; 1818def VST3d16 : VST3D<0b0100, {0,1,0,?}, "16">; 1819def VST3d32 : VST3D<0b0100, {1,0,0,?}, "32">; 1820 1821def VST3d8Pseudo : VSTQQPseudo<IIC_VST3>; 1822def VST3d16Pseudo : VSTQQPseudo<IIC_VST3>; 1823def VST3d32Pseudo : VSTQQPseudo<IIC_VST3>; 1824 1825// ...with address register writeback: 1826class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1827 : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 1828 (ins addrmode6:$Rn, am6offset:$Rm, 1829 DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3u, 1830 "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm", 1831 "$Rn.addr = $wb", []> { 1832 let Inst{4} = Rn{4}; 1833 let DecoderMethod = "DecodeVLDST3Instruction"; 1834} 1835 1836def VST3d8_UPD : VST3DWB<0b0100, {0,0,0,?}, "8">; 1837def VST3d16_UPD : VST3DWB<0b0100, {0,1,0,?}, "16">; 1838def VST3d32_UPD : VST3DWB<0b0100, {1,0,0,?}, "32">; 1839 1840def VST3d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>; 1841def VST3d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>; 1842def VST3d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>; 1843 1844// ...with double-spaced registers: 1845def VST3q8 : VST3D<0b0101, {0,0,0,?}, "8">; 1846def VST3q16 : VST3D<0b0101, {0,1,0,?}, "16">; 1847def VST3q32 : VST3D<0b0101, {1,0,0,?}, "32">; 1848def VST3q8_UPD : VST3DWB<0b0101, {0,0,0,?}, "8">; 1849def VST3q16_UPD : VST3DWB<0b0101, {0,1,0,?}, "16">; 1850def VST3q32_UPD : VST3DWB<0b0101, {1,0,0,?}, "32">; 1851 1852def VST3q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; 1853def VST3q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; 1854def VST3q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; 1855 1856// ...alternate versions to be allocated odd register numbers: 1857def VST3q8oddPseudo : VSTQQQQPseudo<IIC_VST3>; 1858def VST3q16oddPseudo : VSTQQQQPseudo<IIC_VST3>; 1859def VST3q32oddPseudo : VSTQQQQPseudo<IIC_VST3>; 1860 1861def VST3q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; 1862def VST3q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; 1863def VST3q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; 1864 1865// VST4 : Vector Store (multiple 4-element structures) 1866class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt> 1867 : NLdSt<0, 0b00, op11_8, op7_4, (outs), 1868 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), 1869 IIC_VST4, "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn", 1870 "", []> { 1871 let Rm = 0b1111; 1872 let Inst{5-4} = Rn{5-4}; 1873 let DecoderMethod = "DecodeVLDST4Instruction"; 1874} 1875 1876def VST4d8 : VST4D<0b0000, {0,0,?,?}, "8">; 1877def VST4d16 : VST4D<0b0000, {0,1,?,?}, "16">; 1878def VST4d32 : VST4D<0b0000, {1,0,?,?}, "32">; 1879 1880def VST4d8Pseudo : VSTQQPseudo<IIC_VST4>; 1881def VST4d16Pseudo : VSTQQPseudo<IIC_VST4>; 1882def VST4d32Pseudo : VSTQQPseudo<IIC_VST4>; 1883 1884// ...with address register writeback: 1885class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1886 : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 1887 (ins addrmode6:$Rn, am6offset:$Rm, 1888 DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST4u, 1889 "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm", 1890 "$Rn.addr = $wb", []> { 1891 let Inst{5-4} = Rn{5-4}; 1892 let DecoderMethod = "DecodeVLDST4Instruction"; 1893} 1894 1895def VST4d8_UPD : VST4DWB<0b0000, {0,0,?,?}, "8">; 1896def VST4d16_UPD : VST4DWB<0b0000, {0,1,?,?}, "16">; 1897def VST4d32_UPD : VST4DWB<0b0000, {1,0,?,?}, "32">; 1898 1899def VST4d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>; 1900def VST4d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>; 1901def VST4d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>; 1902 1903// ...with double-spaced registers: 1904def VST4q8 : VST4D<0b0001, {0,0,?,?}, "8">; 1905def VST4q16 : VST4D<0b0001, {0,1,?,?}, "16">; 1906def VST4q32 : VST4D<0b0001, {1,0,?,?}, "32">; 1907def VST4q8_UPD : VST4DWB<0b0001, {0,0,?,?}, "8">; 1908def VST4q16_UPD : VST4DWB<0b0001, {0,1,?,?}, "16">; 1909def VST4q32_UPD : VST4DWB<0b0001, {1,0,?,?}, "32">; 1910 1911def VST4q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; 1912def VST4q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; 1913def VST4q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; 1914 1915// ...alternate versions to be allocated odd register numbers: 1916def VST4q8oddPseudo : VSTQQQQPseudo<IIC_VST4>; 1917def VST4q16oddPseudo : VSTQQQQPseudo<IIC_VST4>; 1918def VST4q32oddPseudo : VSTQQQQPseudo<IIC_VST4>; 1919 1920def VST4q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; 1921def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; 1922def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; 1923 1924} // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 1925 1926// Classes for VST*LN pseudo-instructions with multi-register operands. 1927// These are expanded to real instructions after register allocation. 1928class VSTQLNPseudo<InstrItinClass itin> 1929 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane), 1930 itin, "">; 1931class VSTQLNWBPseudo<InstrItinClass itin> 1932 : PseudoNLdSt<(outs GPR:$wb), 1933 (ins addrmode6:$addr, am6offset:$offset, QPR:$src, 1934 nohash_imm:$lane), itin, "$addr.addr = $wb">; 1935class VSTQQLNPseudo<InstrItinClass itin> 1936 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane), 1937 itin, "">; 1938class VSTQQLNWBPseudo<InstrItinClass itin> 1939 : PseudoNLdSt<(outs GPR:$wb), 1940 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src, 1941 nohash_imm:$lane), itin, "$addr.addr = $wb">; 1942class VSTQQQQLNPseudo<InstrItinClass itin> 1943 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane), 1944 itin, "">; 1945class VSTQQQQLNWBPseudo<InstrItinClass itin> 1946 : PseudoNLdSt<(outs GPR:$wb), 1947 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src, 1948 nohash_imm:$lane), itin, "$addr.addr = $wb">; 1949 1950// VST1LN : Vector Store (single element from one lane) 1951class VST1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 1952 PatFrag StoreOp, SDNode ExtractOp, Operand AddrMode> 1953 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 1954 (ins AddrMode:$Rn, DPR:$Vd, nohash_imm:$lane), 1955 IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "", 1956 [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), AddrMode:$Rn)]> { 1957 let Rm = 0b1111; 1958 let DecoderMethod = "DecodeVST1LN"; 1959} 1960class VST1QLNPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp> 1961 : VSTQLNPseudo<IIC_VST1ln> { 1962 let Pattern = [(StoreOp (ExtractOp (Ty QPR:$src), imm:$lane), 1963 addrmode6:$addr)]; 1964} 1965 1966def VST1LNd8 : VST1LN<0b0000, {?,?,?,0}, "8", v8i8, truncstorei8, 1967 NEONvgetlaneu, addrmode6> { 1968 let Inst{7-5} = lane{2-0}; 1969} 1970def VST1LNd16 : VST1LN<0b0100, {?,?,0,?}, "16", v4i16, truncstorei16, 1971 NEONvgetlaneu, addrmode6> { 1972 let Inst{7-6} = lane{1-0}; 1973 let Inst{4} = Rn{4}; 1974} 1975 1976def VST1LNd32 : VST1LN<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt, 1977 addrmode6oneL32> { 1978 let Inst{7} = lane{0}; 1979 let Inst{5-4} = Rn{5-4}; 1980} 1981 1982def VST1LNq8Pseudo : VST1QLNPseudo<v16i8, truncstorei8, NEONvgetlaneu>; 1983def VST1LNq16Pseudo : VST1QLNPseudo<v8i16, truncstorei16, NEONvgetlaneu>; 1984def VST1LNq32Pseudo : VST1QLNPseudo<v4i32, store, extractelt>; 1985 1986def : Pat<(store (extractelt (v2f32 DPR:$src), imm:$lane), addrmode6:$addr), 1987 (VST1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>; 1988def : Pat<(store (extractelt (v4f32 QPR:$src), imm:$lane), addrmode6:$addr), 1989 (VST1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>; 1990 1991// ...with address register writeback: 1992class VST1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 1993 PatFrag StoreOp, SDNode ExtractOp, Operand AdrMode> 1994 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 1995 (ins AdrMode:$Rn, am6offset:$Rm, 1996 DPR:$Vd, nohash_imm:$lane), IIC_VST1lnu, "vst1", Dt, 1997 "\\{$Vd[$lane]\\}, $Rn$Rm", 1998 "$Rn.addr = $wb", 1999 [(set GPR:$wb, (StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), 2000 AdrMode:$Rn, am6offset:$Rm))]> { 2001 let DecoderMethod = "DecodeVST1LN"; 2002} 2003class VST1QLNWBPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp> 2004 : VSTQLNWBPseudo<IIC_VST1lnu> { 2005 let Pattern = [(set GPR:$wb, (StoreOp (ExtractOp (Ty QPR:$src), imm:$lane), 2006 addrmode6:$addr, am6offset:$offset))]; 2007} 2008 2009def VST1LNd8_UPD : VST1LNWB<0b0000, {?,?,?,0}, "8", v8i8, post_truncsti8, 2010 NEONvgetlaneu, addrmode6> { 2011 let Inst{7-5} = lane{2-0}; 2012} 2013def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16", v4i16, post_truncsti16, 2014 NEONvgetlaneu, addrmode6> { 2015 let Inst{7-6} = lane{1-0}; 2016 let Inst{4} = Rn{4}; 2017} 2018def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32", v2i32, post_store, 2019 extractelt, addrmode6oneL32> { 2020 let Inst{7} = lane{0}; 2021 let Inst{5-4} = Rn{5-4}; 2022} 2023 2024def VST1LNq8Pseudo_UPD : VST1QLNWBPseudo<v16i8, post_truncsti8, NEONvgetlaneu>; 2025def VST1LNq16Pseudo_UPD : VST1QLNWBPseudo<v8i16, post_truncsti16,NEONvgetlaneu>; 2026def VST1LNq32Pseudo_UPD : VST1QLNWBPseudo<v4i32, post_store, extractelt>; 2027 2028let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in { 2029 2030// VST2LN : Vector Store (single 2-element structure from one lane) 2031class VST2LN<bits<4> op11_8, bits<4> op7_4, string Dt> 2032 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 2033 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, nohash_imm:$lane), 2034 IIC_VST2ln, "vst2", Dt, "\\{$Vd[$lane], $src2[$lane]\\}, $Rn", 2035 "", []> { 2036 let Rm = 0b1111; 2037 let Inst{4} = Rn{4}; 2038 let DecoderMethod = "DecodeVST2LN"; 2039} 2040 2041def VST2LNd8 : VST2LN<0b0001, {?,?,?,?}, "8"> { 2042 let Inst{7-5} = lane{2-0}; 2043} 2044def VST2LNd16 : VST2LN<0b0101, {?,?,0,?}, "16"> { 2045 let Inst{7-6} = lane{1-0}; 2046} 2047def VST2LNd32 : VST2LN<0b1001, {?,0,0,?}, "32"> { 2048 let Inst{7} = lane{0}; 2049} 2050 2051def VST2LNd8Pseudo : VSTQLNPseudo<IIC_VST2ln>; 2052def VST2LNd16Pseudo : VSTQLNPseudo<IIC_VST2ln>; 2053def VST2LNd32Pseudo : VSTQLNPseudo<IIC_VST2ln>; 2054 2055// ...with double-spaced registers: 2056def VST2LNq16 : VST2LN<0b0101, {?,?,1,?}, "16"> { 2057 let Inst{7-6} = lane{1-0}; 2058 let Inst{4} = Rn{4}; 2059} 2060def VST2LNq32 : VST2LN<0b1001, {?,1,0,?}, "32"> { 2061 let Inst{7} = lane{0}; 2062 let Inst{4} = Rn{4}; 2063} 2064 2065def VST2LNq16Pseudo : VSTQQLNPseudo<IIC_VST2ln>; 2066def VST2LNq32Pseudo : VSTQQLNPseudo<IIC_VST2ln>; 2067 2068// ...with address register writeback: 2069class VST2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 2070 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 2071 (ins addrmode6:$Rn, am6offset:$Rm, 2072 DPR:$Vd, DPR:$src2, nohash_imm:$lane), IIC_VST2lnu, "vst2", Dt, 2073 "\\{$Vd[$lane], $src2[$lane]\\}, $Rn$Rm", 2074 "$Rn.addr = $wb", []> { 2075 let Inst{4} = Rn{4}; 2076 let DecoderMethod = "DecodeVST2LN"; 2077} 2078 2079def VST2LNd8_UPD : VST2LNWB<0b0001, {?,?,?,?}, "8"> { 2080 let Inst{7-5} = lane{2-0}; 2081} 2082def VST2LNd16_UPD : VST2LNWB<0b0101, {?,?,0,?}, "16"> { 2083 let Inst{7-6} = lane{1-0}; 2084} 2085def VST2LNd32_UPD : VST2LNWB<0b1001, {?,0,0,?}, "32"> { 2086 let Inst{7} = lane{0}; 2087} 2088 2089def VST2LNd8Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>; 2090def VST2LNd16Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>; 2091def VST2LNd32Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>; 2092 2093def VST2LNq16_UPD : VST2LNWB<0b0101, {?,?,1,?}, "16"> { 2094 let Inst{7-6} = lane{1-0}; 2095} 2096def VST2LNq32_UPD : VST2LNWB<0b1001, {?,1,0,?}, "32"> { 2097 let Inst{7} = lane{0}; 2098} 2099 2100def VST2LNq16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>; 2101def VST2LNq32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>; 2102 2103// VST3LN : Vector Store (single 3-element structure from one lane) 2104class VST3LN<bits<4> op11_8, bits<4> op7_4, string Dt> 2105 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 2106 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, 2107 nohash_imm:$lane), IIC_VST3ln, "vst3", Dt, 2108 "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn", "", []> { 2109 let Rm = 0b1111; 2110 let DecoderMethod = "DecodeVST3LN"; 2111} 2112 2113def VST3LNd8 : VST3LN<0b0010, {?,?,?,0}, "8"> { 2114 let Inst{7-5} = lane{2-0}; 2115} 2116def VST3LNd16 : VST3LN<0b0110, {?,?,0,0}, "16"> { 2117 let Inst{7-6} = lane{1-0}; 2118} 2119def VST3LNd32 : VST3LN<0b1010, {?,0,0,0}, "32"> { 2120 let Inst{7} = lane{0}; 2121} 2122 2123def VST3LNd8Pseudo : VSTQQLNPseudo<IIC_VST3ln>; 2124def VST3LNd16Pseudo : VSTQQLNPseudo<IIC_VST3ln>; 2125def VST3LNd32Pseudo : VSTQQLNPseudo<IIC_VST3ln>; 2126 2127// ...with double-spaced registers: 2128def VST3LNq16 : VST3LN<0b0110, {?,?,1,0}, "16"> { 2129 let Inst{7-6} = lane{1-0}; 2130} 2131def VST3LNq32 : VST3LN<0b1010, {?,1,0,0}, "32"> { 2132 let Inst{7} = lane{0}; 2133} 2134 2135def VST3LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>; 2136def VST3LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>; 2137 2138// ...with address register writeback: 2139class VST3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 2140 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 2141 (ins addrmode6:$Rn, am6offset:$Rm, 2142 DPR:$Vd, DPR:$src2, DPR:$src3, nohash_imm:$lane), 2143 IIC_VST3lnu, "vst3", Dt, 2144 "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn$Rm", 2145 "$Rn.addr = $wb", []> { 2146 let DecoderMethod = "DecodeVST3LN"; 2147} 2148 2149def VST3LNd8_UPD : VST3LNWB<0b0010, {?,?,?,0}, "8"> { 2150 let Inst{7-5} = lane{2-0}; 2151} 2152def VST3LNd16_UPD : VST3LNWB<0b0110, {?,?,0,0}, "16"> { 2153 let Inst{7-6} = lane{1-0}; 2154} 2155def VST3LNd32_UPD : VST3LNWB<0b1010, {?,0,0,0}, "32"> { 2156 let Inst{7} = lane{0}; 2157} 2158 2159def VST3LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>; 2160def VST3LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>; 2161def VST3LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>; 2162 2163def VST3LNq16_UPD : VST3LNWB<0b0110, {?,?,1,0}, "16"> { 2164 let Inst{7-6} = lane{1-0}; 2165} 2166def VST3LNq32_UPD : VST3LNWB<0b1010, {?,1,0,0}, "32"> { 2167 let Inst{7} = lane{0}; 2168} 2169 2170def VST3LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>; 2171def VST3LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>; 2172 2173// VST4LN : Vector Store (single 4-element structure from one lane) 2174class VST4LN<bits<4> op11_8, bits<4> op7_4, string Dt> 2175 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 2176 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4, 2177 nohash_imm:$lane), IIC_VST4ln, "vst4", Dt, 2178 "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn", 2179 "", []> { 2180 let Rm = 0b1111; 2181 let Inst{4} = Rn{4}; 2182 let DecoderMethod = "DecodeVST4LN"; 2183} 2184 2185def VST4LNd8 : VST4LN<0b0011, {?,?,?,?}, "8"> { 2186 let Inst{7-5} = lane{2-0}; 2187} 2188def VST4LNd16 : VST4LN<0b0111, {?,?,0,?}, "16"> { 2189 let Inst{7-6} = lane{1-0}; 2190} 2191def VST4LNd32 : VST4LN<0b1011, {?,0,?,?}, "32"> { 2192 let Inst{7} = lane{0}; 2193 let Inst{5} = Rn{5}; 2194} 2195 2196def VST4LNd8Pseudo : VSTQQLNPseudo<IIC_VST4ln>; 2197def VST4LNd16Pseudo : VSTQQLNPseudo<IIC_VST4ln>; 2198def VST4LNd32Pseudo : VSTQQLNPseudo<IIC_VST4ln>; 2199 2200// ...with double-spaced registers: 2201def VST4LNq16 : VST4LN<0b0111, {?,?,1,?}, "16"> { 2202 let Inst{7-6} = lane{1-0}; 2203} 2204def VST4LNq32 : VST4LN<0b1011, {?,1,?,?}, "32"> { 2205 let Inst{7} = lane{0}; 2206 let Inst{5} = Rn{5}; 2207} 2208 2209def VST4LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>; 2210def VST4LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>; 2211 2212// ...with address register writeback: 2213class VST4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 2214 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 2215 (ins addrmode6:$Rn, am6offset:$Rm, 2216 DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), 2217 IIC_VST4lnu, "vst4", Dt, 2218 "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn$Rm", 2219 "$Rn.addr = $wb", []> { 2220 let Inst{4} = Rn{4}; 2221 let DecoderMethod = "DecodeVST4LN"; 2222} 2223 2224def VST4LNd8_UPD : VST4LNWB<0b0011, {?,?,?,?}, "8"> { 2225 let Inst{7-5} = lane{2-0}; 2226} 2227def VST4LNd16_UPD : VST4LNWB<0b0111, {?,?,0,?}, "16"> { 2228 let Inst{7-6} = lane{1-0}; 2229} 2230def VST4LNd32_UPD : VST4LNWB<0b1011, {?,0,?,?}, "32"> { 2231 let Inst{7} = lane{0}; 2232 let Inst{5} = Rn{5}; 2233} 2234 2235def VST4LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>; 2236def VST4LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>; 2237def VST4LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>; 2238 2239def VST4LNq16_UPD : VST4LNWB<0b0111, {?,?,1,?}, "16"> { 2240 let Inst{7-6} = lane{1-0}; 2241} 2242def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32"> { 2243 let Inst{7} = lane{0}; 2244 let Inst{5} = Rn{5}; 2245} 2246 2247def VST4LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>; 2248def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>; 2249 2250} // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 2251 2252// Use vld1/vst1 for unaligned f64 load / store 2253def : Pat<(f64 (hword_alignedload addrmode6:$addr)), 2254 (VLD1d16 addrmode6:$addr)>, Requires<[IsLE]>; 2255def : Pat<(hword_alignedstore (f64 DPR:$value), addrmode6:$addr), 2256 (VST1d16 addrmode6:$addr, DPR:$value)>, Requires<[IsLE]>; 2257def : Pat<(f64 (byte_alignedload addrmode6:$addr)), 2258 (VLD1d8 addrmode6:$addr)>, Requires<[IsLE]>; 2259def : Pat<(byte_alignedstore (f64 DPR:$value), addrmode6:$addr), 2260 (VST1d8 addrmode6:$addr, DPR:$value)>, Requires<[IsLE]>; 2261def : Pat<(f64 (non_word_alignedload addrmode6:$addr)), 2262 (VLD1d64 addrmode6:$addr)>, Requires<[IsBE]>; 2263def : Pat<(non_word_alignedstore (f64 DPR:$value), addrmode6:$addr), 2264 (VST1d64 addrmode6:$addr, DPR:$value)>, Requires<[IsBE]>; 2265 2266// Use vld1/vst1 for Q and QQ. Also use them for unaligned v2f64 2267// load / store if it's legal. 2268def : Pat<(v2f64 (dword_alignedload addrmode6:$addr)), 2269 (VLD1q64 addrmode6:$addr)>; 2270def : Pat<(dword_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 2271 (VST1q64 addrmode6:$addr, QPR:$value)>; 2272def : Pat<(v2f64 (word_alignedload addrmode6:$addr)), 2273 (VLD1q32 addrmode6:$addr)>; 2274def : Pat<(word_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 2275 (VST1q32 addrmode6:$addr, QPR:$value)>; 2276def : Pat<(v2f64 (hword_alignedload addrmode6:$addr)), 2277 (VLD1q16 addrmode6:$addr)>, Requires<[IsLE]>; 2278def : Pat<(hword_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 2279 (VST1q16 addrmode6:$addr, QPR:$value)>, Requires<[IsLE]>; 2280def : Pat<(v2f64 (byte_alignedload addrmode6:$addr)), 2281 (VLD1q8 addrmode6:$addr)>, Requires<[IsLE]>; 2282def : Pat<(byte_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 2283 (VST1q8 addrmode6:$addr, QPR:$value)>, Requires<[IsLE]>; 2284 2285//===----------------------------------------------------------------------===// 2286// NEON pattern fragments 2287//===----------------------------------------------------------------------===// 2288 2289// Extract D sub-registers of Q registers. 2290def DSubReg_i8_reg : SDNodeXForm<imm, [{ 2291 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); 2292 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/8, MVT::i32); 2293}]>; 2294def DSubReg_i16_reg : SDNodeXForm<imm, [{ 2295 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); 2296 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/4, MVT::i32); 2297}]>; 2298def DSubReg_i32_reg : SDNodeXForm<imm, [{ 2299 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); 2300 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/2, MVT::i32); 2301}]>; 2302def DSubReg_f64_reg : SDNodeXForm<imm, [{ 2303 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); 2304 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue(), MVT::i32); 2305}]>; 2306 2307// Extract S sub-registers of Q/D registers. 2308def SSubReg_f32_reg : SDNodeXForm<imm, [{ 2309 assert(ARM::ssub_3 == ARM::ssub_0+3 && "Unexpected subreg numbering"); 2310 return CurDAG->getTargetConstant(ARM::ssub_0 + N->getZExtValue(), MVT::i32); 2311}]>; 2312 2313// Translate lane numbers from Q registers to D subregs. 2314def SubReg_i8_lane : SDNodeXForm<imm, [{ 2315 return CurDAG->getTargetConstant(N->getZExtValue() & 7, MVT::i32); 2316}]>; 2317def SubReg_i16_lane : SDNodeXForm<imm, [{ 2318 return CurDAG->getTargetConstant(N->getZExtValue() & 3, MVT::i32); 2319}]>; 2320def SubReg_i32_lane : SDNodeXForm<imm, [{ 2321 return CurDAG->getTargetConstant(N->getZExtValue() & 1, MVT::i32); 2322}]>; 2323 2324//===----------------------------------------------------------------------===// 2325// Instruction Classes 2326//===----------------------------------------------------------------------===// 2327 2328// Basic 2-register operations: double- and quad-register. 2329class N2VD<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2330 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 2331 string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> 2332 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd), 2333 (ins DPR:$Vm), IIC_VUNAD, OpcodeStr, Dt,"$Vd, $Vm", "", 2334 [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm))))]>; 2335class N2VQ<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2336 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 2337 string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> 2338 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd), 2339 (ins QPR:$Vm), IIC_VUNAQ, OpcodeStr, Dt,"$Vd, $Vm", "", 2340 [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm))))]>; 2341 2342// Basic 2-register intrinsics, both double- and quad-register. 2343class N2VDInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2344 bits<2> op17_16, bits<5> op11_7, bit op4, 2345 InstrItinClass itin, string OpcodeStr, string Dt, 2346 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2347 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd), 2348 (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2349 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>; 2350class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2351 bits<2> op17_16, bits<5> op11_7, bit op4, 2352 InstrItinClass itin, string OpcodeStr, string Dt, 2353 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2354 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd), 2355 (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2356 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; 2357 2358// Same as above, but not predicated. 2359class N2VDIntnp<bits<2> op17_16, bits<3> op10_8, bit op7, 2360 InstrItinClass itin, string OpcodeStr, string Dt, 2361 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2362 : N2Vnp<0b10, op17_16, op10_8, op7, 0, (outs DPR:$Vd), (ins DPR:$Vm), 2363 itin, OpcodeStr, Dt, ResTy, OpTy, 2364 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>; 2365 2366class N2VQIntnp<bits<2> op17_16, bits<3> op10_8, bit op7, 2367 InstrItinClass itin, string OpcodeStr, string Dt, 2368 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2369 : N2Vnp<0b10, op17_16, op10_8, op7, 1, (outs QPR:$Vd), (ins QPR:$Vm), 2370 itin, OpcodeStr, Dt, ResTy, OpTy, 2371 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; 2372 2373// Similar to NV2VQIntnp with some more encoding bits exposed (crypto). 2374class N2VQIntXnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op6, 2375 bit op7, InstrItinClass itin, string OpcodeStr, string Dt, 2376 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2377 : N2Vnp<op19_18, op17_16, op10_8, op7, op6, (outs QPR:$Vd), (ins QPR:$Vm), 2378 itin, OpcodeStr, Dt, ResTy, OpTy, 2379 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; 2380 2381// Same as N2VQIntXnp but with Vd as a src register. 2382class N2VQIntX2np<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op6, 2383 bit op7, InstrItinClass itin, string OpcodeStr, string Dt, 2384 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2385 : N2Vnp<op19_18, op17_16, op10_8, op7, op6, 2386 (outs QPR:$Vd), (ins QPR:$src, QPR:$Vm), 2387 itin, OpcodeStr, Dt, ResTy, OpTy, 2388 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src), (OpTy QPR:$Vm))))]> { 2389 let Constraints = "$src = $Vd"; 2390} 2391 2392// Narrow 2-register operations. 2393class N2VN<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2394 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 2395 InstrItinClass itin, string OpcodeStr, string Dt, 2396 ValueType TyD, ValueType TyQ, SDNode OpNode> 2397 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd), 2398 (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2399 [(set DPR:$Vd, (TyD (OpNode (TyQ QPR:$Vm))))]>; 2400 2401// Narrow 2-register intrinsics. 2402class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2403 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 2404 InstrItinClass itin, string OpcodeStr, string Dt, 2405 ValueType TyD, ValueType TyQ, SDPatternOperator IntOp> 2406 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd), 2407 (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2408 [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vm))))]>; 2409 2410// Long 2-register operations (currently only used for VMOVL). 2411class N2VL<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2412 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 2413 InstrItinClass itin, string OpcodeStr, string Dt, 2414 ValueType TyQ, ValueType TyD, SDNode OpNode> 2415 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd), 2416 (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2417 [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vm))))]>; 2418 2419// Long 2-register intrinsics. 2420class N2VLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2421 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 2422 InstrItinClass itin, string OpcodeStr, string Dt, 2423 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp> 2424 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd), 2425 (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2426 [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vm))))]>; 2427 2428// 2-register shuffles (VTRN/VZIP/VUZP), both double- and quad-register. 2429class N2VDShuffle<bits<2> op19_18, bits<5> op11_7, string OpcodeStr, string Dt> 2430 : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 0, 0, (outs DPR:$Vd, DPR:$Vm), 2431 (ins DPR:$src1, DPR:$src2), IIC_VPERMD, 2432 OpcodeStr, Dt, "$Vd, $Vm", 2433 "$src1 = $Vd, $src2 = $Vm", []>; 2434class N2VQShuffle<bits<2> op19_18, bits<5> op11_7, 2435 InstrItinClass itin, string OpcodeStr, string Dt> 2436 : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 1, 0, (outs QPR:$Vd, QPR:$Vm), 2437 (ins QPR:$src1, QPR:$src2), itin, OpcodeStr, Dt, "$Vd, $Vm", 2438 "$src1 = $Vd, $src2 = $Vm", []>; 2439 2440// Basic 3-register operations: double- and quad-register. 2441class N3VD<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2442 InstrItinClass itin, string OpcodeStr, string Dt, 2443 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> 2444 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2445 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2446 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2447 [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> { 2448 // All of these have a two-operand InstAlias. 2449 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2450 let isCommutable = Commutable; 2451} 2452// Same as N3VD but no data type. 2453class N3VDX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2454 InstrItinClass itin, string OpcodeStr, 2455 ValueType ResTy, ValueType OpTy, 2456 SDNode OpNode, bit Commutable> 2457 : N3VX<op24, op23, op21_20, op11_8, 0, op4, 2458 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2459 OpcodeStr, "$Vd, $Vn, $Vm", "", 2460 [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>{ 2461 // All of these have a two-operand InstAlias. 2462 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2463 let isCommutable = Commutable; 2464} 2465 2466class N3VDSL<bits<2> op21_20, bits<4> op11_8, 2467 InstrItinClass itin, string OpcodeStr, string Dt, 2468 ValueType Ty, SDNode ShOp> 2469 : N3VLane32<0, 1, op21_20, op11_8, 1, 0, 2470 (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2471 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2472 [(set (Ty DPR:$Vd), 2473 (Ty (ShOp (Ty DPR:$Vn), 2474 (Ty (NEONvduplane (Ty DPR_VFP2:$Vm),imm:$lane)))))]> { 2475 // All of these have a two-operand InstAlias. 2476 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2477 let isCommutable = 0; 2478} 2479class N3VDSL16<bits<2> op21_20, bits<4> op11_8, 2480 string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> 2481 : N3VLane16<0, 1, op21_20, op11_8, 1, 0, 2482 (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2483 NVMulSLFrm, IIC_VMULi16D, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane","", 2484 [(set (Ty DPR:$Vd), 2485 (Ty (ShOp (Ty DPR:$Vn), 2486 (Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> { 2487 // All of these have a two-operand InstAlias. 2488 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2489 let isCommutable = 0; 2490} 2491 2492class N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2493 InstrItinClass itin, string OpcodeStr, string Dt, 2494 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> 2495 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2496 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2497 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2498 [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> { 2499 // All of these have a two-operand InstAlias. 2500 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2501 let isCommutable = Commutable; 2502} 2503class N3VQX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2504 InstrItinClass itin, string OpcodeStr, 2505 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> 2506 : N3VX<op24, op23, op21_20, op11_8, 1, op4, 2507 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2508 OpcodeStr, "$Vd, $Vn, $Vm", "", 2509 [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>{ 2510 // All of these have a two-operand InstAlias. 2511 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2512 let isCommutable = Commutable; 2513} 2514class N3VQSL<bits<2> op21_20, bits<4> op11_8, 2515 InstrItinClass itin, string OpcodeStr, string Dt, 2516 ValueType ResTy, ValueType OpTy, SDNode ShOp> 2517 : N3VLane32<1, 1, op21_20, op11_8, 1, 0, 2518 (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2519 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2520 [(set (ResTy QPR:$Vd), 2521 (ResTy (ShOp (ResTy QPR:$Vn), 2522 (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm), 2523 imm:$lane)))))]> { 2524 // All of these have a two-operand InstAlias. 2525 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2526 let isCommutable = 0; 2527} 2528class N3VQSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt, 2529 ValueType ResTy, ValueType OpTy, SDNode ShOp> 2530 : N3VLane16<1, 1, op21_20, op11_8, 1, 0, 2531 (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2532 NVMulSLFrm, IIC_VMULi16Q, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane", "", 2533 [(set (ResTy QPR:$Vd), 2534 (ResTy (ShOp (ResTy QPR:$Vn), 2535 (ResTy (NEONvduplane (OpTy DPR_8:$Vm), 2536 imm:$lane)))))]> { 2537 // All of these have a two-operand InstAlias. 2538 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2539 let isCommutable = 0; 2540} 2541 2542// Basic 3-register intrinsics, both double- and quad-register. 2543class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2544 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 2545 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable> 2546 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2547 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), f, itin, 2548 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2549 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> { 2550 // All of these have a two-operand InstAlias. 2551 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2552 let isCommutable = Commutable; 2553} 2554 2555class N3VDIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, 2556 bit op4, Format f, InstrItinClass itin, string OpcodeStr, 2557 string Dt, ValueType ResTy, ValueType OpTy, 2558 SDPatternOperator IntOp, bit Commutable> 2559 : N3Vnp<op27_23, op21_20, op11_8, op6, op4, 2560 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt, 2561 ResTy, OpTy, IntOp, Commutable, 2562 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>; 2563 2564class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2565 string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp> 2566 : N3VLane32<0, 1, op21_20, op11_8, 1, 0, 2567 (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2568 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2569 [(set (Ty DPR:$Vd), 2570 (Ty (IntOp (Ty DPR:$Vn), 2571 (Ty (NEONvduplane (Ty DPR_VFP2:$Vm), 2572 imm:$lane)))))]> { 2573 let isCommutable = 0; 2574} 2575 2576class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2577 string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp> 2578 : N3VLane16<0, 1, op21_20, op11_8, 1, 0, 2579 (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2580 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2581 [(set (Ty DPR:$Vd), 2582 (Ty (IntOp (Ty DPR:$Vn), 2583 (Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> { 2584 let isCommutable = 0; 2585} 2586class N3VDIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2587 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 2588 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2589 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2590 (outs DPR:$Vd), (ins DPR:$Vm, DPR:$Vn), f, itin, 2591 OpcodeStr, Dt, "$Vd, $Vm, $Vn", "", 2592 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (OpTy DPR:$Vn))))]> { 2593 let TwoOperandAliasConstraint = "$Vm = $Vd"; 2594 let isCommutable = 0; 2595} 2596 2597class N3VQInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2598 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 2599 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable> 2600 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2601 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin, 2602 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2603 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> { 2604 // All of these have a two-operand InstAlias. 2605 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2606 let isCommutable = Commutable; 2607} 2608 2609class N3VQIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, 2610 bit op4, Format f, InstrItinClass itin, string OpcodeStr, 2611 string Dt, ValueType ResTy, ValueType OpTy, 2612 SDPatternOperator IntOp, bit Commutable> 2613 : N3Vnp<op27_23, op21_20, op11_8, op6, op4, 2614 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin, OpcodeStr, Dt, 2615 ResTy, OpTy, IntOp, Commutable, 2616 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>; 2617 2618// Same as N3VQIntnp but with Vd as a src register. 2619class N3VQInt3np<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, 2620 bit op4, Format f, InstrItinClass itin, string OpcodeStr, 2621 string Dt, ValueType ResTy, ValueType OpTy, 2622 SDPatternOperator IntOp, bit Commutable> 2623 : N3Vnp<op27_23, op21_20, op11_8, op6, op4, 2624 (outs QPR:$Vd), (ins QPR:$src, QPR:$Vn, QPR:$Vm), f, itin, OpcodeStr, 2625 Dt, ResTy, OpTy, IntOp, Commutable, 2626 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src), (OpTy QPR:$Vn), 2627 (OpTy QPR:$Vm))))]> { 2628 let Constraints = "$src = $Vd"; 2629} 2630 2631class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2632 string OpcodeStr, string Dt, 2633 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2634 : N3VLane32<1, 1, op21_20, op11_8, 1, 0, 2635 (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2636 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2637 [(set (ResTy QPR:$Vd), 2638 (ResTy (IntOp (ResTy QPR:$Vn), 2639 (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm), 2640 imm:$lane)))))]> { 2641 let isCommutable = 0; 2642} 2643class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2644 string OpcodeStr, string Dt, 2645 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2646 : N3VLane16<1, 1, op21_20, op11_8, 1, 0, 2647 (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2648 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2649 [(set (ResTy QPR:$Vd), 2650 (ResTy (IntOp (ResTy QPR:$Vn), 2651 (ResTy (NEONvduplane (OpTy DPR_8:$Vm), 2652 imm:$lane)))))]> { 2653 let isCommutable = 0; 2654} 2655class N3VQIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2656 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 2657 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2658 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2659 (outs QPR:$Vd), (ins QPR:$Vm, QPR:$Vn), f, itin, 2660 OpcodeStr, Dt, "$Vd, $Vm, $Vn", "", 2661 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (OpTy QPR:$Vn))))]> { 2662 let TwoOperandAliasConstraint = "$Vm = $Vd"; 2663 let isCommutable = 0; 2664} 2665 2666// Multiply-Add/Sub operations: double- and quad-register. 2667class N3VDMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2668 InstrItinClass itin, string OpcodeStr, string Dt, 2669 ValueType Ty, SDPatternOperator MulOp, SDPatternOperator OpNode> 2670 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2671 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2672 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2673 [(set DPR:$Vd, (Ty (OpNode DPR:$src1, 2674 (Ty (MulOp DPR:$Vn, DPR:$Vm)))))]>; 2675 2676class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2677 string OpcodeStr, string Dt, 2678 ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp> 2679 : N3VLane32<0, 1, op21_20, op11_8, 1, 0, 2680 (outs DPR:$Vd), 2681 (ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2682 NVMulSLFrm, itin, 2683 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2684 [(set (Ty DPR:$Vd), 2685 (Ty (ShOp (Ty DPR:$src1), 2686 (Ty (MulOp DPR:$Vn, 2687 (Ty (NEONvduplane (Ty DPR_VFP2:$Vm), 2688 imm:$lane)))))))]>; 2689class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2690 string OpcodeStr, string Dt, 2691 ValueType Ty, SDNode MulOp, SDNode ShOp> 2692 : N3VLane16<0, 1, op21_20, op11_8, 1, 0, 2693 (outs DPR:$Vd), 2694 (ins DPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2695 NVMulSLFrm, itin, 2696 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2697 [(set (Ty DPR:$Vd), 2698 (Ty (ShOp (Ty DPR:$src1), 2699 (Ty (MulOp DPR:$Vn, 2700 (Ty (NEONvduplane (Ty DPR_8:$Vm), 2701 imm:$lane)))))))]>; 2702 2703class N3VQMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2704 InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, 2705 SDPatternOperator MulOp, SDPatternOperator OpNode> 2706 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2707 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2708 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2709 [(set QPR:$Vd, (Ty (OpNode QPR:$src1, 2710 (Ty (MulOp QPR:$Vn, QPR:$Vm)))))]>; 2711class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2712 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, 2713 SDPatternOperator MulOp, SDPatternOperator ShOp> 2714 : N3VLane32<1, 1, op21_20, op11_8, 1, 0, 2715 (outs QPR:$Vd), 2716 (ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2717 NVMulSLFrm, itin, 2718 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2719 [(set (ResTy QPR:$Vd), 2720 (ResTy (ShOp (ResTy QPR:$src1), 2721 (ResTy (MulOp QPR:$Vn, 2722 (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm), 2723 imm:$lane)))))))]>; 2724class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2725 string OpcodeStr, string Dt, 2726 ValueType ResTy, ValueType OpTy, 2727 SDNode MulOp, SDNode ShOp> 2728 : N3VLane16<1, 1, op21_20, op11_8, 1, 0, 2729 (outs QPR:$Vd), 2730 (ins QPR:$src1, QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2731 NVMulSLFrm, itin, 2732 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2733 [(set (ResTy QPR:$Vd), 2734 (ResTy (ShOp (ResTy QPR:$src1), 2735 (ResTy (MulOp QPR:$Vn, 2736 (ResTy (NEONvduplane (OpTy DPR_8:$Vm), 2737 imm:$lane)))))))]>; 2738 2739// Neon Intrinsic-Op instructions (VABA): double- and quad-register. 2740class N3VDIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2741 InstrItinClass itin, string OpcodeStr, string Dt, 2742 ValueType Ty, SDPatternOperator IntOp, SDNode OpNode> 2743 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2744 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2745 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2746 [(set DPR:$Vd, (Ty (OpNode DPR:$src1, 2747 (Ty (IntOp (Ty DPR:$Vn), (Ty DPR:$Vm))))))]>; 2748class N3VQIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2749 InstrItinClass itin, string OpcodeStr, string Dt, 2750 ValueType Ty, SDPatternOperator IntOp, SDNode OpNode> 2751 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2752 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2753 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2754 [(set QPR:$Vd, (Ty (OpNode QPR:$src1, 2755 (Ty (IntOp (Ty QPR:$Vn), (Ty QPR:$Vm))))))]>; 2756 2757// Neon 3-argument intrinsics, both double- and quad-register. 2758// The destination register is also used as the first source operand register. 2759class N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2760 InstrItinClass itin, string OpcodeStr, string Dt, 2761 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2762 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2763 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2764 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2765 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$src1), 2766 (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>; 2767class N3VQInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2768 InstrItinClass itin, string OpcodeStr, string Dt, 2769 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2770 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2771 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2772 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2773 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src1), 2774 (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>; 2775 2776// Long Multiply-Add/Sub operations. 2777class N3VLMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2778 InstrItinClass itin, string OpcodeStr, string Dt, 2779 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> 2780 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2781 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2782 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2783 [(set QPR:$Vd, (OpNode (TyQ QPR:$src1), 2784 (TyQ (MulOp (TyD DPR:$Vn), 2785 (TyD DPR:$Vm)))))]>; 2786class N3VLMulOpSL<bit op24, bits<2> op21_20, bits<4> op11_8, 2787 InstrItinClass itin, string OpcodeStr, string Dt, 2788 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> 2789 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), 2790 (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2791 NVMulSLFrm, itin, 2792 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2793 [(set QPR:$Vd, 2794 (OpNode (TyQ QPR:$src1), 2795 (TyQ (MulOp (TyD DPR:$Vn), 2796 (TyD (NEONvduplane (TyD DPR_VFP2:$Vm), 2797 imm:$lane))))))]>; 2798class N3VLMulOpSL16<bit op24, bits<2> op21_20, bits<4> op11_8, 2799 InstrItinClass itin, string OpcodeStr, string Dt, 2800 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> 2801 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), 2802 (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2803 NVMulSLFrm, itin, 2804 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2805 [(set QPR:$Vd, 2806 (OpNode (TyQ QPR:$src1), 2807 (TyQ (MulOp (TyD DPR:$Vn), 2808 (TyD (NEONvduplane (TyD DPR_8:$Vm), 2809 imm:$lane))))))]>; 2810 2811// Long Intrinsic-Op vector operations with explicit extend (VABAL). 2812class N3VLIntExtOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2813 InstrItinClass itin, string OpcodeStr, string Dt, 2814 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, SDNode ExtOp, 2815 SDNode OpNode> 2816 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2817 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2818 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2819 [(set QPR:$Vd, (OpNode (TyQ QPR:$src1), 2820 (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn), 2821 (TyD DPR:$Vm)))))))]>; 2822 2823// Neon Long 3-argument intrinsic. The destination register is 2824// a quad-register and is also used as the first source operand register. 2825class N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2826 InstrItinClass itin, string OpcodeStr, string Dt, 2827 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp> 2828 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2829 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2830 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2831 [(set QPR:$Vd, 2832 (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$Vn), (TyD DPR:$Vm))))]>; 2833class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2834 string OpcodeStr, string Dt, 2835 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2836 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, 2837 (outs QPR:$Vd), 2838 (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2839 NVMulSLFrm, itin, 2840 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2841 [(set (ResTy QPR:$Vd), 2842 (ResTy (IntOp (ResTy QPR:$src1), 2843 (OpTy DPR:$Vn), 2844 (OpTy (NEONvduplane (OpTy DPR_VFP2:$Vm), 2845 imm:$lane)))))]>; 2846class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8, 2847 InstrItinClass itin, string OpcodeStr, string Dt, 2848 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2849 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, 2850 (outs QPR:$Vd), 2851 (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2852 NVMulSLFrm, itin, 2853 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2854 [(set (ResTy QPR:$Vd), 2855 (ResTy (IntOp (ResTy QPR:$src1), 2856 (OpTy DPR:$Vn), 2857 (OpTy (NEONvduplane (OpTy DPR_8:$Vm), 2858 imm:$lane)))))]>; 2859 2860// Narrowing 3-register intrinsics. 2861class N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2862 string OpcodeStr, string Dt, ValueType TyD, ValueType TyQ, 2863 SDPatternOperator IntOp, bit Commutable> 2864 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2865 (outs DPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINi4D, 2866 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2867 [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vn), (TyQ QPR:$Vm))))]> { 2868 let isCommutable = Commutable; 2869} 2870 2871// Long 3-register operations. 2872class N3VL<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2873 InstrItinClass itin, string OpcodeStr, string Dt, 2874 ValueType TyQ, ValueType TyD, SDNode OpNode, bit Commutable> 2875 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2876 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2877 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2878 [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vn), (TyD DPR:$Vm))))]> { 2879 let isCommutable = Commutable; 2880} 2881 2882class N3VLSL<bit op24, bits<2> op21_20, bits<4> op11_8, 2883 InstrItinClass itin, string OpcodeStr, string Dt, 2884 ValueType TyQ, ValueType TyD, SDNode OpNode> 2885 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, 2886 (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2887 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2888 [(set QPR:$Vd, 2889 (TyQ (OpNode (TyD DPR:$Vn), 2890 (TyD (NEONvduplane (TyD DPR_VFP2:$Vm),imm:$lane)))))]>; 2891class N3VLSL16<bit op24, bits<2> op21_20, bits<4> op11_8, 2892 InstrItinClass itin, string OpcodeStr, string Dt, 2893 ValueType TyQ, ValueType TyD, SDNode OpNode> 2894 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, 2895 (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2896 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2897 [(set QPR:$Vd, 2898 (TyQ (OpNode (TyD DPR:$Vn), 2899 (TyD (NEONvduplane (TyD DPR_8:$Vm), imm:$lane)))))]>; 2900 2901// Long 3-register operations with explicitly extended operands. 2902class N3VLExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2903 InstrItinClass itin, string OpcodeStr, string Dt, 2904 ValueType TyQ, ValueType TyD, SDNode OpNode, SDNode ExtOp, 2905 bit Commutable> 2906 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2907 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2908 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2909 [(set QPR:$Vd, (OpNode (TyQ (ExtOp (TyD DPR:$Vn))), 2910 (TyQ (ExtOp (TyD DPR:$Vm)))))]> { 2911 let isCommutable = Commutable; 2912} 2913 2914// Long 3-register intrinsics with explicit extend (VABDL). 2915class N3VLIntExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2916 InstrItinClass itin, string OpcodeStr, string Dt, 2917 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, SDNode ExtOp, 2918 bit Commutable> 2919 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2920 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2921 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2922 [(set QPR:$Vd, (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn), 2923 (TyD DPR:$Vm))))))]> { 2924 let isCommutable = Commutable; 2925} 2926 2927// Long 3-register intrinsics. 2928class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2929 InstrItinClass itin, string OpcodeStr, string Dt, 2930 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, bit Commutable> 2931 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2932 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2933 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2934 [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vn), (TyD DPR:$Vm))))]> { 2935 let isCommutable = Commutable; 2936} 2937 2938// Same as above, but not predicated. 2939class N3VLIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, 2940 bit op4, InstrItinClass itin, string OpcodeStr, 2941 string Dt, ValueType ResTy, ValueType OpTy, 2942 SDPatternOperator IntOp, bit Commutable> 2943 : N3Vnp<op27_23, op21_20, op11_8, op6, op4, 2944 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt, 2945 ResTy, OpTy, IntOp, Commutable, 2946 [(set QPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>; 2947 2948class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2949 string OpcodeStr, string Dt, 2950 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2951 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, 2952 (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2953 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2954 [(set (ResTy QPR:$Vd), 2955 (ResTy (IntOp (OpTy DPR:$Vn), 2956 (OpTy (NEONvduplane (OpTy DPR_VFP2:$Vm), 2957 imm:$lane)))))]>; 2958class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8, 2959 InstrItinClass itin, string OpcodeStr, string Dt, 2960 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2961 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, 2962 (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2963 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2964 [(set (ResTy QPR:$Vd), 2965 (ResTy (IntOp (OpTy DPR:$Vn), 2966 (OpTy (NEONvduplane (OpTy DPR_8:$Vm), 2967 imm:$lane)))))]>; 2968 2969// Wide 3-register operations. 2970class N3VW<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2971 string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD, 2972 SDNode OpNode, SDNode ExtOp, bit Commutable> 2973 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2974 (outs QPR:$Vd), (ins QPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VSUBiD, 2975 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2976 [(set QPR:$Vd, (OpNode (TyQ QPR:$Vn), 2977 (TyQ (ExtOp (TyD DPR:$Vm)))))]> { 2978 // All of these have a two-operand InstAlias. 2979 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2980 let isCommutable = Commutable; 2981} 2982 2983// Pairwise long 2-register intrinsics, both double- and quad-register. 2984class N2VDPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2985 bits<2> op17_16, bits<5> op11_7, bit op4, 2986 string OpcodeStr, string Dt, 2987 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2988 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd), 2989 (ins DPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "", 2990 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>; 2991class N2VQPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2992 bits<2> op17_16, bits<5> op11_7, bit op4, 2993 string OpcodeStr, string Dt, 2994 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2995 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd), 2996 (ins QPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "", 2997 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; 2998 2999// Pairwise long 2-register accumulate intrinsics, 3000// both double- and quad-register. 3001// The destination register is also used as the first source operand register. 3002class N2VDPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 3003 bits<2> op17_16, bits<5> op11_7, bit op4, 3004 string OpcodeStr, string Dt, 3005 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3006 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, 3007 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vm), IIC_VPALiD, 3008 OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd", 3009 [(set DPR:$Vd, (ResTy (IntOp (ResTy DPR:$src1), (OpTy DPR:$Vm))))]>; 3010class N2VQPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 3011 bits<2> op17_16, bits<5> op11_7, bit op4, 3012 string OpcodeStr, string Dt, 3013 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3014 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, 3015 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vm), IIC_VPALiQ, 3016 OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd", 3017 [(set QPR:$Vd, (ResTy (IntOp (ResTy QPR:$src1), (OpTy QPR:$Vm))))]>; 3018 3019// Shift by immediate, 3020// both double- and quad-register. 3021let TwoOperandAliasConstraint = "$Vm = $Vd" in { 3022class N2VDSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3023 Format f, InstrItinClass itin, Operand ImmTy, 3024 string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode> 3025 : N2VImm<op24, op23, op11_8, op7, 0, op4, 3026 (outs DPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), f, itin, 3027 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3028 [(set DPR:$Vd, (Ty (OpNode (Ty DPR:$Vm), (i32 imm:$SIMM))))]>; 3029class N2VQSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3030 Format f, InstrItinClass itin, Operand ImmTy, 3031 string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode> 3032 : N2VImm<op24, op23, op11_8, op7, 1, op4, 3033 (outs QPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), f, itin, 3034 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3035 [(set QPR:$Vd, (Ty (OpNode (Ty QPR:$Vm), (i32 imm:$SIMM))))]>; 3036} 3037 3038// Long shift by immediate. 3039class N2VLSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, 3040 string OpcodeStr, string Dt, 3041 ValueType ResTy, ValueType OpTy, Operand ImmTy, SDNode OpNode> 3042 : N2VImm<op24, op23, op11_8, op7, op6, op4, 3043 (outs QPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), N2RegVShLFrm, 3044 IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3045 [(set QPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm), 3046 (i32 imm:$SIMM))))]>; 3047 3048// Narrow shift by immediate. 3049class N2VNSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, 3050 InstrItinClass itin, string OpcodeStr, string Dt, 3051 ValueType ResTy, ValueType OpTy, Operand ImmTy, SDNode OpNode> 3052 : N2VImm<op24, op23, op11_8, op7, op6, op4, 3053 (outs DPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, itin, 3054 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3055 [(set DPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm), 3056 (i32 imm:$SIMM))))]>; 3057 3058// Shift right by immediate and accumulate, 3059// both double- and quad-register. 3060let TwoOperandAliasConstraint = "$Vm = $Vd" in { 3061class N2VDShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3062 Operand ImmTy, string OpcodeStr, string Dt, 3063 ValueType Ty, SDNode ShOp> 3064 : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd), 3065 (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD, 3066 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 3067 [(set DPR:$Vd, (Ty (add DPR:$src1, 3068 (Ty (ShOp DPR:$Vm, (i32 imm:$SIMM))))))]>; 3069class N2VQShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3070 Operand ImmTy, string OpcodeStr, string Dt, 3071 ValueType Ty, SDNode ShOp> 3072 : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd), 3073 (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD, 3074 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 3075 [(set QPR:$Vd, (Ty (add QPR:$src1, 3076 (Ty (ShOp QPR:$Vm, (i32 imm:$SIMM))))))]>; 3077} 3078 3079// Shift by immediate and insert, 3080// both double- and quad-register. 3081let TwoOperandAliasConstraint = "$Vm = $Vd" in { 3082class N2VDShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3083 Operand ImmTy, Format f, string OpcodeStr, string Dt, 3084 ValueType Ty,SDNode ShOp> 3085 : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd), 3086 (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiD, 3087 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 3088 [(set DPR:$Vd, (Ty (ShOp DPR:$src1, DPR:$Vm, (i32 imm:$SIMM))))]>; 3089class N2VQShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3090 Operand ImmTy, Format f, string OpcodeStr, string Dt, 3091 ValueType Ty,SDNode ShOp> 3092 : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd), 3093 (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiQ, 3094 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 3095 [(set QPR:$Vd, (Ty (ShOp QPR:$src1, QPR:$Vm, (i32 imm:$SIMM))))]>; 3096} 3097 3098// Convert, with fractional bits immediate, 3099// both double- and quad-register. 3100class N2VCvtD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3101 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, 3102 SDPatternOperator IntOp> 3103 : N2VImm<op24, op23, op11_8, op7, 0, op4, 3104 (outs DPR:$Vd), (ins DPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm, 3105 IIC_VUNAD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3106 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (i32 imm:$SIMM))))]>; 3107class N2VCvtQ<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3108 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, 3109 SDPatternOperator IntOp> 3110 : N2VImm<op24, op23, op11_8, op7, 1, op4, 3111 (outs QPR:$Vd), (ins QPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm, 3112 IIC_VUNAQ, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3113 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (i32 imm:$SIMM))))]>; 3114 3115//===----------------------------------------------------------------------===// 3116// Multiclasses 3117//===----------------------------------------------------------------------===// 3118 3119// Abbreviations used in multiclass suffixes: 3120// Q = quarter int (8 bit) elements 3121// H = half int (16 bit) elements 3122// S = single int (32 bit) elements 3123// D = double int (64 bit) elements 3124 3125// Neon 2-register vector operations and intrinsics. 3126 3127// Neon 2-register comparisons. 3128// source operand element sizes of 8, 16 and 32 bits: 3129multiclass N2V_QHS_cmp<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3130 bits<5> op11_7, bit op4, string opc, string Dt, 3131 string asm, SDNode OpNode> { 3132 // 64-bit vector types. 3133 def v8i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 0, op4, 3134 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 3135 opc, !strconcat(Dt, "8"), asm, "", 3136 [(set DPR:$Vd, (v8i8 (OpNode (v8i8 DPR:$Vm))))]>; 3137 def v4i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4, 3138 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 3139 opc, !strconcat(Dt, "16"), asm, "", 3140 [(set DPR:$Vd, (v4i16 (OpNode (v4i16 DPR:$Vm))))]>; 3141 def v2i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4, 3142 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 3143 opc, !strconcat(Dt, "32"), asm, "", 3144 [(set DPR:$Vd, (v2i32 (OpNode (v2i32 DPR:$Vm))))]>; 3145 def v2f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4, 3146 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 3147 opc, "f32", asm, "", 3148 [(set DPR:$Vd, (v2i32 (OpNode (v2f32 DPR:$Vm))))]> { 3149 let Inst{10} = 1; // overwrite F = 1 3150 } 3151 3152 // 128-bit vector types. 3153 def v16i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 1, op4, 3154 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3155 opc, !strconcat(Dt, "8"), asm, "", 3156 [(set QPR:$Vd, (v16i8 (OpNode (v16i8 QPR:$Vm))))]>; 3157 def v8i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4, 3158 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3159 opc, !strconcat(Dt, "16"), asm, "", 3160 [(set QPR:$Vd, (v8i16 (OpNode (v8i16 QPR:$Vm))))]>; 3161 def v4i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4, 3162 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3163 opc, !strconcat(Dt, "32"), asm, "", 3164 [(set QPR:$Vd, (v4i32 (OpNode (v4i32 QPR:$Vm))))]>; 3165 def v4f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4, 3166 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3167 opc, "f32", asm, "", 3168 [(set QPR:$Vd, (v4i32 (OpNode (v4f32 QPR:$Vm))))]> { 3169 let Inst{10} = 1; // overwrite F = 1 3170 } 3171} 3172 3173 3174// Neon 2-register vector intrinsics, 3175// element sizes of 8, 16 and 32 bits: 3176multiclass N2VInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3177 bits<5> op11_7, bit op4, 3178 InstrItinClass itinD, InstrItinClass itinQ, 3179 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3180 // 64-bit vector types. 3181 def v8i8 : N2VDInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3182 itinD, OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>; 3183 def v4i16 : N2VDInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3184 itinD, OpcodeStr, !strconcat(Dt, "16"),v4i16,v4i16,IntOp>; 3185 def v2i32 : N2VDInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3186 itinD, OpcodeStr, !strconcat(Dt, "32"),v2i32,v2i32,IntOp>; 3187 3188 // 128-bit vector types. 3189 def v16i8 : N2VQInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3190 itinQ, OpcodeStr, !strconcat(Dt, "8"), v16i8,v16i8,IntOp>; 3191 def v8i16 : N2VQInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3192 itinQ, OpcodeStr, !strconcat(Dt, "16"),v8i16,v8i16,IntOp>; 3193 def v4i32 : N2VQInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3194 itinQ, OpcodeStr, !strconcat(Dt, "32"),v4i32,v4i32,IntOp>; 3195} 3196 3197 3198// Neon Narrowing 2-register vector operations, 3199// source operand element sizes of 16, 32 and 64 bits: 3200multiclass N2VN_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3201 bits<5> op11_7, bit op6, bit op4, 3202 InstrItinClass itin, string OpcodeStr, string Dt, 3203 SDNode OpNode> { 3204 def v8i8 : N2VN<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4, 3205 itin, OpcodeStr, !strconcat(Dt, "16"), 3206 v8i8, v8i16, OpNode>; 3207 def v4i16 : N2VN<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4, 3208 itin, OpcodeStr, !strconcat(Dt, "32"), 3209 v4i16, v4i32, OpNode>; 3210 def v2i32 : N2VN<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4, 3211 itin, OpcodeStr, !strconcat(Dt, "64"), 3212 v2i32, v2i64, OpNode>; 3213} 3214 3215// Neon Narrowing 2-register vector intrinsics, 3216// source operand element sizes of 16, 32 and 64 bits: 3217multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3218 bits<5> op11_7, bit op6, bit op4, 3219 InstrItinClass itin, string OpcodeStr, string Dt, 3220 SDPatternOperator IntOp> { 3221 def v8i8 : N2VNInt<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4, 3222 itin, OpcodeStr, !strconcat(Dt, "16"), 3223 v8i8, v8i16, IntOp>; 3224 def v4i16 : N2VNInt<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4, 3225 itin, OpcodeStr, !strconcat(Dt, "32"), 3226 v4i16, v4i32, IntOp>; 3227 def v2i32 : N2VNInt<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4, 3228 itin, OpcodeStr, !strconcat(Dt, "64"), 3229 v2i32, v2i64, IntOp>; 3230} 3231 3232 3233// Neon Lengthening 2-register vector intrinsic (currently specific to VMOVL). 3234// source operand element sizes of 16, 32 and 64 bits: 3235multiclass N2VL_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4, 3236 string OpcodeStr, string Dt, SDNode OpNode> { 3237 def v8i16 : N2VL<op24_23, 0b00, 0b10, 0b00, op11_7, op6, op4, IIC_VQUNAiD, 3238 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode>; 3239 def v4i32 : N2VL<op24_23, 0b01, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD, 3240 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode>; 3241 def v2i64 : N2VL<op24_23, 0b10, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD, 3242 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode>; 3243} 3244 3245 3246// Neon 3-register vector operations. 3247 3248// First with only element sizes of 8, 16 and 32 bits: 3249multiclass N3V_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3250 InstrItinClass itinD16, InstrItinClass itinD32, 3251 InstrItinClass itinQ16, InstrItinClass itinQ32, 3252 string OpcodeStr, string Dt, 3253 SDNode OpNode, bit Commutable = 0> { 3254 // 64-bit vector types. 3255 def v8i8 : N3VD<op24, op23, 0b00, op11_8, op4, itinD16, 3256 OpcodeStr, !strconcat(Dt, "8"), 3257 v8i8, v8i8, OpNode, Commutable>; 3258 def v4i16 : N3VD<op24, op23, 0b01, op11_8, op4, itinD16, 3259 OpcodeStr, !strconcat(Dt, "16"), 3260 v4i16, v4i16, OpNode, Commutable>; 3261 def v2i32 : N3VD<op24, op23, 0b10, op11_8, op4, itinD32, 3262 OpcodeStr, !strconcat(Dt, "32"), 3263 v2i32, v2i32, OpNode, Commutable>; 3264 3265 // 128-bit vector types. 3266 def v16i8 : N3VQ<op24, op23, 0b00, op11_8, op4, itinQ16, 3267 OpcodeStr, !strconcat(Dt, "8"), 3268 v16i8, v16i8, OpNode, Commutable>; 3269 def v8i16 : N3VQ<op24, op23, 0b01, op11_8, op4, itinQ16, 3270 OpcodeStr, !strconcat(Dt, "16"), 3271 v8i16, v8i16, OpNode, Commutable>; 3272 def v4i32 : N3VQ<op24, op23, 0b10, op11_8, op4, itinQ32, 3273 OpcodeStr, !strconcat(Dt, "32"), 3274 v4i32, v4i32, OpNode, Commutable>; 3275} 3276 3277multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, SDNode ShOp> { 3278 def v4i16 : N3VDSL16<0b01, op11_8, OpcodeStr, "i16", v4i16, ShOp>; 3279 def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, OpcodeStr, "i32", v2i32, ShOp>; 3280 def v8i16 : N3VQSL16<0b01, op11_8, OpcodeStr, "i16", v8i16, v4i16, ShOp>; 3281 def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, OpcodeStr, "i32", 3282 v4i32, v2i32, ShOp>; 3283} 3284 3285// ....then also with element size 64 bits: 3286multiclass N3V_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3287 InstrItinClass itinD, InstrItinClass itinQ, 3288 string OpcodeStr, string Dt, 3289 SDNode OpNode, bit Commutable = 0> 3290 : N3V_QHS<op24, op23, op11_8, op4, itinD, itinD, itinQ, itinQ, 3291 OpcodeStr, Dt, OpNode, Commutable> { 3292 def v1i64 : N3VD<op24, op23, 0b11, op11_8, op4, itinD, 3293 OpcodeStr, !strconcat(Dt, "64"), 3294 v1i64, v1i64, OpNode, Commutable>; 3295 def v2i64 : N3VQ<op24, op23, 0b11, op11_8, op4, itinQ, 3296 OpcodeStr, !strconcat(Dt, "64"), 3297 v2i64, v2i64, OpNode, Commutable>; 3298} 3299 3300 3301// Neon 3-register vector intrinsics. 3302 3303// First with only element sizes of 16 and 32 bits: 3304multiclass N3VInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3305 InstrItinClass itinD16, InstrItinClass itinD32, 3306 InstrItinClass itinQ16, InstrItinClass itinQ32, 3307 string OpcodeStr, string Dt, 3308 SDPatternOperator IntOp, bit Commutable = 0> { 3309 // 64-bit vector types. 3310 def v4i16 : N3VDInt<op24, op23, 0b01, op11_8, op4, f, itinD16, 3311 OpcodeStr, !strconcat(Dt, "16"), 3312 v4i16, v4i16, IntOp, Commutable>; 3313 def v2i32 : N3VDInt<op24, op23, 0b10, op11_8, op4, f, itinD32, 3314 OpcodeStr, !strconcat(Dt, "32"), 3315 v2i32, v2i32, IntOp, Commutable>; 3316 3317 // 128-bit vector types. 3318 def v8i16 : N3VQInt<op24, op23, 0b01, op11_8, op4, f, itinQ16, 3319 OpcodeStr, !strconcat(Dt, "16"), 3320 v8i16, v8i16, IntOp, Commutable>; 3321 def v4i32 : N3VQInt<op24, op23, 0b10, op11_8, op4, f, itinQ32, 3322 OpcodeStr, !strconcat(Dt, "32"), 3323 v4i32, v4i32, IntOp, Commutable>; 3324} 3325multiclass N3VInt_HSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3326 InstrItinClass itinD16, InstrItinClass itinD32, 3327 InstrItinClass itinQ16, InstrItinClass itinQ32, 3328 string OpcodeStr, string Dt, 3329 SDPatternOperator IntOp> { 3330 // 64-bit vector types. 3331 def v4i16 : N3VDIntSh<op24, op23, 0b01, op11_8, op4, f, itinD16, 3332 OpcodeStr, !strconcat(Dt, "16"), 3333 v4i16, v4i16, IntOp>; 3334 def v2i32 : N3VDIntSh<op24, op23, 0b10, op11_8, op4, f, itinD32, 3335 OpcodeStr, !strconcat(Dt, "32"), 3336 v2i32, v2i32, IntOp>; 3337 3338 // 128-bit vector types. 3339 def v8i16 : N3VQIntSh<op24, op23, 0b01, op11_8, op4, f, itinQ16, 3340 OpcodeStr, !strconcat(Dt, "16"), 3341 v8i16, v8i16, IntOp>; 3342 def v4i32 : N3VQIntSh<op24, op23, 0b10, op11_8, op4, f, itinQ32, 3343 OpcodeStr, !strconcat(Dt, "32"), 3344 v4i32, v4i32, IntOp>; 3345} 3346 3347multiclass N3VIntSL_HS<bits<4> op11_8, 3348 InstrItinClass itinD16, InstrItinClass itinD32, 3349 InstrItinClass itinQ16, InstrItinClass itinQ32, 3350 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3351 def v4i16 : N3VDIntSL16<0b01, op11_8, itinD16, 3352 OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp>; 3353 def v2i32 : N3VDIntSL<0b10, op11_8, itinD32, 3354 OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp>; 3355 def v8i16 : N3VQIntSL16<0b01, op11_8, itinQ16, 3356 OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, IntOp>; 3357 def v4i32 : N3VQIntSL<0b10, op11_8, itinQ32, 3358 OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, IntOp>; 3359} 3360 3361// ....then also with element size of 8 bits: 3362multiclass N3VInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3363 InstrItinClass itinD16, InstrItinClass itinD32, 3364 InstrItinClass itinQ16, InstrItinClass itinQ32, 3365 string OpcodeStr, string Dt, 3366 SDPatternOperator IntOp, bit Commutable = 0> 3367 : N3VInt_HS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 3368 OpcodeStr, Dt, IntOp, Commutable> { 3369 def v8i8 : N3VDInt<op24, op23, 0b00, op11_8, op4, f, itinD16, 3370 OpcodeStr, !strconcat(Dt, "8"), 3371 v8i8, v8i8, IntOp, Commutable>; 3372 def v16i8 : N3VQInt<op24, op23, 0b00, op11_8, op4, f, itinQ16, 3373 OpcodeStr, !strconcat(Dt, "8"), 3374 v16i8, v16i8, IntOp, Commutable>; 3375} 3376multiclass N3VInt_QHSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3377 InstrItinClass itinD16, InstrItinClass itinD32, 3378 InstrItinClass itinQ16, InstrItinClass itinQ32, 3379 string OpcodeStr, string Dt, 3380 SDPatternOperator IntOp> 3381 : N3VInt_HSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 3382 OpcodeStr, Dt, IntOp> { 3383 def v8i8 : N3VDIntSh<op24, op23, 0b00, op11_8, op4, f, itinD16, 3384 OpcodeStr, !strconcat(Dt, "8"), 3385 v8i8, v8i8, IntOp>; 3386 def v16i8 : N3VQIntSh<op24, op23, 0b00, op11_8, op4, f, itinQ16, 3387 OpcodeStr, !strconcat(Dt, "8"), 3388 v16i8, v16i8, IntOp>; 3389} 3390 3391 3392// ....then also with element size of 64 bits: 3393multiclass N3VInt_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3394 InstrItinClass itinD16, InstrItinClass itinD32, 3395 InstrItinClass itinQ16, InstrItinClass itinQ32, 3396 string OpcodeStr, string Dt, 3397 SDPatternOperator IntOp, bit Commutable = 0> 3398 : N3VInt_QHS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 3399 OpcodeStr, Dt, IntOp, Commutable> { 3400 def v1i64 : N3VDInt<op24, op23, 0b11, op11_8, op4, f, itinD32, 3401 OpcodeStr, !strconcat(Dt, "64"), 3402 v1i64, v1i64, IntOp, Commutable>; 3403 def v2i64 : N3VQInt<op24, op23, 0b11, op11_8, op4, f, itinQ32, 3404 OpcodeStr, !strconcat(Dt, "64"), 3405 v2i64, v2i64, IntOp, Commutable>; 3406} 3407multiclass N3VInt_QHSDSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3408 InstrItinClass itinD16, InstrItinClass itinD32, 3409 InstrItinClass itinQ16, InstrItinClass itinQ32, 3410 string OpcodeStr, string Dt, 3411 SDPatternOperator IntOp> 3412 : N3VInt_QHSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 3413 OpcodeStr, Dt, IntOp> { 3414 def v1i64 : N3VDIntSh<op24, op23, 0b11, op11_8, op4, f, itinD32, 3415 OpcodeStr, !strconcat(Dt, "64"), 3416 v1i64, v1i64, IntOp>; 3417 def v2i64 : N3VQIntSh<op24, op23, 0b11, op11_8, op4, f, itinQ32, 3418 OpcodeStr, !strconcat(Dt, "64"), 3419 v2i64, v2i64, IntOp>; 3420} 3421 3422// Neon Narrowing 3-register vector intrinsics, 3423// source operand element sizes of 16, 32 and 64 bits: 3424multiclass N3VNInt_HSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3425 string OpcodeStr, string Dt, 3426 SDPatternOperator IntOp, bit Commutable = 0> { 3427 def v8i8 : N3VNInt<op24, op23, 0b00, op11_8, op4, 3428 OpcodeStr, !strconcat(Dt, "16"), 3429 v8i8, v8i16, IntOp, Commutable>; 3430 def v4i16 : N3VNInt<op24, op23, 0b01, op11_8, op4, 3431 OpcodeStr, !strconcat(Dt, "32"), 3432 v4i16, v4i32, IntOp, Commutable>; 3433 def v2i32 : N3VNInt<op24, op23, 0b10, op11_8, op4, 3434 OpcodeStr, !strconcat(Dt, "64"), 3435 v2i32, v2i64, IntOp, Commutable>; 3436} 3437 3438 3439// Neon Long 3-register vector operations. 3440 3441multiclass N3VL_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3442 InstrItinClass itin16, InstrItinClass itin32, 3443 string OpcodeStr, string Dt, 3444 SDNode OpNode, bit Commutable = 0> { 3445 def v8i16 : N3VL<op24, op23, 0b00, op11_8, op4, itin16, 3446 OpcodeStr, !strconcat(Dt, "8"), 3447 v8i16, v8i8, OpNode, Commutable>; 3448 def v4i32 : N3VL<op24, op23, 0b01, op11_8, op4, itin16, 3449 OpcodeStr, !strconcat(Dt, "16"), 3450 v4i32, v4i16, OpNode, Commutable>; 3451 def v2i64 : N3VL<op24, op23, 0b10, op11_8, op4, itin32, 3452 OpcodeStr, !strconcat(Dt, "32"), 3453 v2i64, v2i32, OpNode, Commutable>; 3454} 3455 3456multiclass N3VLSL_HS<bit op24, bits<4> op11_8, 3457 InstrItinClass itin, string OpcodeStr, string Dt, 3458 SDNode OpNode> { 3459 def v4i16 : N3VLSL16<op24, 0b01, op11_8, itin, OpcodeStr, 3460 !strconcat(Dt, "16"), v4i32, v4i16, OpNode>; 3461 def v2i32 : N3VLSL<op24, 0b10, op11_8, itin, OpcodeStr, 3462 !strconcat(Dt, "32"), v2i64, v2i32, OpNode>; 3463} 3464 3465multiclass N3VLExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3466 InstrItinClass itin16, InstrItinClass itin32, 3467 string OpcodeStr, string Dt, 3468 SDNode OpNode, SDNode ExtOp, bit Commutable = 0> { 3469 def v8i16 : N3VLExt<op24, op23, 0b00, op11_8, op4, itin16, 3470 OpcodeStr, !strconcat(Dt, "8"), 3471 v8i16, v8i8, OpNode, ExtOp, Commutable>; 3472 def v4i32 : N3VLExt<op24, op23, 0b01, op11_8, op4, itin16, 3473 OpcodeStr, !strconcat(Dt, "16"), 3474 v4i32, v4i16, OpNode, ExtOp, Commutable>; 3475 def v2i64 : N3VLExt<op24, op23, 0b10, op11_8, op4, itin32, 3476 OpcodeStr, !strconcat(Dt, "32"), 3477 v2i64, v2i32, OpNode, ExtOp, Commutable>; 3478} 3479 3480// Neon Long 3-register vector intrinsics. 3481 3482// First with only element sizes of 16 and 32 bits: 3483multiclass N3VLInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, 3484 InstrItinClass itin16, InstrItinClass itin32, 3485 string OpcodeStr, string Dt, 3486 SDPatternOperator IntOp, bit Commutable = 0> { 3487 def v4i32 : N3VLInt<op24, op23, 0b01, op11_8, op4, itin16, 3488 OpcodeStr, !strconcat(Dt, "16"), 3489 v4i32, v4i16, IntOp, Commutable>; 3490 def v2i64 : N3VLInt<op24, op23, 0b10, op11_8, op4, itin32, 3491 OpcodeStr, !strconcat(Dt, "32"), 3492 v2i64, v2i32, IntOp, Commutable>; 3493} 3494 3495multiclass N3VLIntSL_HS<bit op24, bits<4> op11_8, 3496 InstrItinClass itin, string OpcodeStr, string Dt, 3497 SDPatternOperator IntOp> { 3498 def v4i16 : N3VLIntSL16<op24, 0b01, op11_8, itin, 3499 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>; 3500 def v2i32 : N3VLIntSL<op24, 0b10, op11_8, itin, 3501 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; 3502} 3503 3504// ....then also with element size of 8 bits: 3505multiclass N3VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3506 InstrItinClass itin16, InstrItinClass itin32, 3507 string OpcodeStr, string Dt, 3508 SDPatternOperator IntOp, bit Commutable = 0> 3509 : N3VLInt_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt, 3510 IntOp, Commutable> { 3511 def v8i16 : N3VLInt<op24, op23, 0b00, op11_8, op4, itin16, 3512 OpcodeStr, !strconcat(Dt, "8"), 3513 v8i16, v8i8, IntOp, Commutable>; 3514} 3515 3516// ....with explicit extend (VABDL). 3517multiclass N3VLIntExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3518 InstrItinClass itin, string OpcodeStr, string Dt, 3519 SDPatternOperator IntOp, SDNode ExtOp, bit Commutable = 0> { 3520 def v8i16 : N3VLIntExt<op24, op23, 0b00, op11_8, op4, itin, 3521 OpcodeStr, !strconcat(Dt, "8"), 3522 v8i16, v8i8, IntOp, ExtOp, Commutable>; 3523 def v4i32 : N3VLIntExt<op24, op23, 0b01, op11_8, op4, itin, 3524 OpcodeStr, !strconcat(Dt, "16"), 3525 v4i32, v4i16, IntOp, ExtOp, Commutable>; 3526 def v2i64 : N3VLIntExt<op24, op23, 0b10, op11_8, op4, itin, 3527 OpcodeStr, !strconcat(Dt, "32"), 3528 v2i64, v2i32, IntOp, ExtOp, Commutable>; 3529} 3530 3531 3532// Neon Wide 3-register vector intrinsics, 3533// source operand element sizes of 8, 16 and 32 bits: 3534multiclass N3VW_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3535 string OpcodeStr, string Dt, 3536 SDNode OpNode, SDNode ExtOp, bit Commutable = 0> { 3537 def v8i16 : N3VW<op24, op23, 0b00, op11_8, op4, 3538 OpcodeStr, !strconcat(Dt, "8"), 3539 v8i16, v8i8, OpNode, ExtOp, Commutable>; 3540 def v4i32 : N3VW<op24, op23, 0b01, op11_8, op4, 3541 OpcodeStr, !strconcat(Dt, "16"), 3542 v4i32, v4i16, OpNode, ExtOp, Commutable>; 3543 def v2i64 : N3VW<op24, op23, 0b10, op11_8, op4, 3544 OpcodeStr, !strconcat(Dt, "32"), 3545 v2i64, v2i32, OpNode, ExtOp, Commutable>; 3546} 3547 3548 3549// Neon Multiply-Op vector operations, 3550// element sizes of 8, 16 and 32 bits: 3551multiclass N3VMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3552 InstrItinClass itinD16, InstrItinClass itinD32, 3553 InstrItinClass itinQ16, InstrItinClass itinQ32, 3554 string OpcodeStr, string Dt, SDNode OpNode> { 3555 // 64-bit vector types. 3556 def v8i8 : N3VDMulOp<op24, op23, 0b00, op11_8, op4, itinD16, 3557 OpcodeStr, !strconcat(Dt, "8"), v8i8, mul, OpNode>; 3558 def v4i16 : N3VDMulOp<op24, op23, 0b01, op11_8, op4, itinD16, 3559 OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, OpNode>; 3560 def v2i32 : N3VDMulOp<op24, op23, 0b10, op11_8, op4, itinD32, 3561 OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, OpNode>; 3562 3563 // 128-bit vector types. 3564 def v16i8 : N3VQMulOp<op24, op23, 0b00, op11_8, op4, itinQ16, 3565 OpcodeStr, !strconcat(Dt, "8"), v16i8, mul, OpNode>; 3566 def v8i16 : N3VQMulOp<op24, op23, 0b01, op11_8, op4, itinQ16, 3567 OpcodeStr, !strconcat(Dt, "16"), v8i16, mul, OpNode>; 3568 def v4i32 : N3VQMulOp<op24, op23, 0b10, op11_8, op4, itinQ32, 3569 OpcodeStr, !strconcat(Dt, "32"), v4i32, mul, OpNode>; 3570} 3571 3572multiclass N3VMulOpSL_HS<bits<4> op11_8, 3573 InstrItinClass itinD16, InstrItinClass itinD32, 3574 InstrItinClass itinQ16, InstrItinClass itinQ32, 3575 string OpcodeStr, string Dt, SDNode ShOp> { 3576 def v4i16 : N3VDMulOpSL16<0b01, op11_8, itinD16, 3577 OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, ShOp>; 3578 def v2i32 : N3VDMulOpSL<0b10, op11_8, itinD32, 3579 OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, ShOp>; 3580 def v8i16 : N3VQMulOpSL16<0b01, op11_8, itinQ16, 3581 OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, 3582 mul, ShOp>; 3583 def v4i32 : N3VQMulOpSL<0b10, op11_8, itinQ32, 3584 OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, 3585 mul, ShOp>; 3586} 3587 3588// Neon Intrinsic-Op vector operations, 3589// element sizes of 8, 16 and 32 bits: 3590multiclass N3VIntOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3591 InstrItinClass itinD, InstrItinClass itinQ, 3592 string OpcodeStr, string Dt, SDPatternOperator IntOp, 3593 SDNode OpNode> { 3594 // 64-bit vector types. 3595 def v8i8 : N3VDIntOp<op24, op23, 0b00, op11_8, op4, itinD, 3596 OpcodeStr, !strconcat(Dt, "8"), v8i8, IntOp, OpNode>; 3597 def v4i16 : N3VDIntOp<op24, op23, 0b01, op11_8, op4, itinD, 3598 OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp, OpNode>; 3599 def v2i32 : N3VDIntOp<op24, op23, 0b10, op11_8, op4, itinD, 3600 OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp, OpNode>; 3601 3602 // 128-bit vector types. 3603 def v16i8 : N3VQIntOp<op24, op23, 0b00, op11_8, op4, itinQ, 3604 OpcodeStr, !strconcat(Dt, "8"), v16i8, IntOp, OpNode>; 3605 def v8i16 : N3VQIntOp<op24, op23, 0b01, op11_8, op4, itinQ, 3606 OpcodeStr, !strconcat(Dt, "16"), v8i16, IntOp, OpNode>; 3607 def v4i32 : N3VQIntOp<op24, op23, 0b10, op11_8, op4, itinQ, 3608 OpcodeStr, !strconcat(Dt, "32"), v4i32, IntOp, OpNode>; 3609} 3610 3611// Neon 3-argument intrinsics, 3612// element sizes of 8, 16 and 32 bits: 3613multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3614 InstrItinClass itinD, InstrItinClass itinQ, 3615 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3616 // 64-bit vector types. 3617 def v8i8 : N3VDInt3<op24, op23, 0b00, op11_8, op4, itinD, 3618 OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>; 3619 def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, itinD, 3620 OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>; 3621 def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, itinD, 3622 OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>; 3623 3624 // 128-bit vector types. 3625 def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, itinQ, 3626 OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>; 3627 def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, itinQ, 3628 OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>; 3629 def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, itinQ, 3630 OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>; 3631} 3632 3633 3634// Neon Long Multiply-Op vector operations, 3635// element sizes of 8, 16 and 32 bits: 3636multiclass N3VLMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3637 InstrItinClass itin16, InstrItinClass itin32, 3638 string OpcodeStr, string Dt, SDNode MulOp, 3639 SDNode OpNode> { 3640 def v8i16 : N3VLMulOp<op24, op23, 0b00, op11_8, op4, itin16, OpcodeStr, 3641 !strconcat(Dt, "8"), v8i16, v8i8, MulOp, OpNode>; 3642 def v4i32 : N3VLMulOp<op24, op23, 0b01, op11_8, op4, itin16, OpcodeStr, 3643 !strconcat(Dt, "16"), v4i32, v4i16, MulOp, OpNode>; 3644 def v2i64 : N3VLMulOp<op24, op23, 0b10, op11_8, op4, itin32, OpcodeStr, 3645 !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>; 3646} 3647 3648multiclass N3VLMulOpSL_HS<bit op24, bits<4> op11_8, string OpcodeStr, 3649 string Dt, SDNode MulOp, SDNode OpNode> { 3650 def v4i16 : N3VLMulOpSL16<op24, 0b01, op11_8, IIC_VMACi16D, OpcodeStr, 3651 !strconcat(Dt,"16"), v4i32, v4i16, MulOp, OpNode>; 3652 def v2i32 : N3VLMulOpSL<op24, 0b10, op11_8, IIC_VMACi32D, OpcodeStr, 3653 !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>; 3654} 3655 3656 3657// Neon Long 3-argument intrinsics. 3658 3659// First with only element sizes of 16 and 32 bits: 3660multiclass N3VLInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4, 3661 InstrItinClass itin16, InstrItinClass itin32, 3662 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3663 def v4i32 : N3VLInt3<op24, op23, 0b01, op11_8, op4, itin16, 3664 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>; 3665 def v2i64 : N3VLInt3<op24, op23, 0b10, op11_8, op4, itin32, 3666 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; 3667} 3668 3669multiclass N3VLInt3SL_HS<bit op24, bits<4> op11_8, 3670 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3671 def v4i16 : N3VLInt3SL16<op24, 0b01, op11_8, IIC_VMACi16D, 3672 OpcodeStr, !strconcat(Dt,"16"), v4i32, v4i16, IntOp>; 3673 def v2i32 : N3VLInt3SL<op24, 0b10, op11_8, IIC_VMACi32D, 3674 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; 3675} 3676 3677// ....then also with element size of 8 bits: 3678multiclass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3679 InstrItinClass itin16, InstrItinClass itin32, 3680 string OpcodeStr, string Dt, SDPatternOperator IntOp> 3681 : N3VLInt3_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt, IntOp> { 3682 def v8i16 : N3VLInt3<op24, op23, 0b00, op11_8, op4, itin16, 3683 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>; 3684} 3685 3686// ....with explicit extend (VABAL). 3687multiclass N3VLIntExtOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3688 InstrItinClass itin, string OpcodeStr, string Dt, 3689 SDPatternOperator IntOp, SDNode ExtOp, SDNode OpNode> { 3690 def v8i16 : N3VLIntExtOp<op24, op23, 0b00, op11_8, op4, itin, 3691 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, 3692 IntOp, ExtOp, OpNode>; 3693 def v4i32 : N3VLIntExtOp<op24, op23, 0b01, op11_8, op4, itin, 3694 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, 3695 IntOp, ExtOp, OpNode>; 3696 def v2i64 : N3VLIntExtOp<op24, op23, 0b10, op11_8, op4, itin, 3697 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, 3698 IntOp, ExtOp, OpNode>; 3699} 3700 3701 3702// Neon Pairwise long 2-register intrinsics, 3703// element sizes of 8, 16 and 32 bits: 3704multiclass N2VPLInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3705 bits<5> op11_7, bit op4, 3706 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3707 // 64-bit vector types. 3708 def v8i8 : N2VDPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3709 OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>; 3710 def v4i16 : N2VDPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3711 OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>; 3712 def v2i32 : N2VDPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3713 OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>; 3714 3715 // 128-bit vector types. 3716 def v16i8 : N2VQPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3717 OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>; 3718 def v8i16 : N2VQPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3719 OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>; 3720 def v4i32 : N2VQPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3721 OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>; 3722} 3723 3724 3725// Neon Pairwise long 2-register accumulate intrinsics, 3726// element sizes of 8, 16 and 32 bits: 3727multiclass N2VPLInt2_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3728 bits<5> op11_7, bit op4, 3729 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3730 // 64-bit vector types. 3731 def v8i8 : N2VDPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3732 OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>; 3733 def v4i16 : N2VDPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3734 OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>; 3735 def v2i32 : N2VDPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3736 OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>; 3737 3738 // 128-bit vector types. 3739 def v16i8 : N2VQPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3740 OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>; 3741 def v8i16 : N2VQPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3742 OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>; 3743 def v4i32 : N2VQPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3744 OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>; 3745} 3746 3747 3748// Neon 2-register vector shift by immediate, 3749// with f of either N2RegVShLFrm or N2RegVShRFrm 3750// element sizes of 8, 16, 32 and 64 bits: 3751multiclass N2VShL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3752 InstrItinClass itin, string OpcodeStr, string Dt, 3753 SDNode OpNode> { 3754 // 64-bit vector types. 3755 def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3756 OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> { 3757 let Inst{21-19} = 0b001; // imm6 = 001xxx 3758 } 3759 def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3760 OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> { 3761 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3762 } 3763 def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3764 OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> { 3765 let Inst{21} = 0b1; // imm6 = 1xxxxx 3766 } 3767 def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm, 3768 OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>; 3769 // imm6 = xxxxxx 3770 3771 // 128-bit vector types. 3772 def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3773 OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> { 3774 let Inst{21-19} = 0b001; // imm6 = 001xxx 3775 } 3776 def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3777 OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> { 3778 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3779 } 3780 def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3781 OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> { 3782 let Inst{21} = 0b1; // imm6 = 1xxxxx 3783 } 3784 def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm, 3785 OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>; 3786 // imm6 = xxxxxx 3787} 3788multiclass N2VShR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3789 InstrItinClass itin, string OpcodeStr, string Dt, 3790 string baseOpc, SDNode OpNode> { 3791 // 64-bit vector types. 3792 def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8, 3793 OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> { 3794 let Inst{21-19} = 0b001; // imm6 = 001xxx 3795 } 3796 def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16, 3797 OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> { 3798 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3799 } 3800 def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32, 3801 OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> { 3802 let Inst{21} = 0b1; // imm6 = 1xxxxx 3803 } 3804 def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64, 3805 OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>; 3806 // imm6 = xxxxxx 3807 3808 // 128-bit vector types. 3809 def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8, 3810 OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> { 3811 let Inst{21-19} = 0b001; // imm6 = 001xxx 3812 } 3813 def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16, 3814 OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> { 3815 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3816 } 3817 def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32, 3818 OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> { 3819 let Inst{21} = 0b1; // imm6 = 1xxxxx 3820 } 3821 def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64, 3822 OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>; 3823 // imm6 = xxxxxx 3824} 3825 3826// Neon Shift-Accumulate vector operations, 3827// element sizes of 8, 16, 32 and 64 bits: 3828multiclass N2VShAdd_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3829 string OpcodeStr, string Dt, SDNode ShOp> { 3830 // 64-bit vector types. 3831 def v8i8 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm8, 3832 OpcodeStr, !strconcat(Dt, "8"), v8i8, ShOp> { 3833 let Inst{21-19} = 0b001; // imm6 = 001xxx 3834 } 3835 def v4i16 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm16, 3836 OpcodeStr, !strconcat(Dt, "16"), v4i16, ShOp> { 3837 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3838 } 3839 def v2i32 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm32, 3840 OpcodeStr, !strconcat(Dt, "32"), v2i32, ShOp> { 3841 let Inst{21} = 0b1; // imm6 = 1xxxxx 3842 } 3843 def v1i64 : N2VDShAdd<op24, op23, op11_8, 1, op4, shr_imm64, 3844 OpcodeStr, !strconcat(Dt, "64"), v1i64, ShOp>; 3845 // imm6 = xxxxxx 3846 3847 // 128-bit vector types. 3848 def v16i8 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm8, 3849 OpcodeStr, !strconcat(Dt, "8"), v16i8, ShOp> { 3850 let Inst{21-19} = 0b001; // imm6 = 001xxx 3851 } 3852 def v8i16 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm16, 3853 OpcodeStr, !strconcat(Dt, "16"), v8i16, ShOp> { 3854 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3855 } 3856 def v4i32 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm32, 3857 OpcodeStr, !strconcat(Dt, "32"), v4i32, ShOp> { 3858 let Inst{21} = 0b1; // imm6 = 1xxxxx 3859 } 3860 def v2i64 : N2VQShAdd<op24, op23, op11_8, 1, op4, shr_imm64, 3861 OpcodeStr, !strconcat(Dt, "64"), v2i64, ShOp>; 3862 // imm6 = xxxxxx 3863} 3864 3865// Neon Shift-Insert vector operations, 3866// with f of either N2RegVShLFrm or N2RegVShRFrm 3867// element sizes of 8, 16, 32 and 64 bits: 3868multiclass N2VShInsL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3869 string OpcodeStr> { 3870 // 64-bit vector types. 3871 def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm, 3872 N2RegVShLFrm, OpcodeStr, "8", v8i8, NEONvsli> { 3873 let Inst{21-19} = 0b001; // imm6 = 001xxx 3874 } 3875 def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm, 3876 N2RegVShLFrm, OpcodeStr, "16", v4i16, NEONvsli> { 3877 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3878 } 3879 def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm, 3880 N2RegVShLFrm, OpcodeStr, "32", v2i32, NEONvsli> { 3881 let Inst{21} = 0b1; // imm6 = 1xxxxx 3882 } 3883 def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, i32imm, 3884 N2RegVShLFrm, OpcodeStr, "64", v1i64, NEONvsli>; 3885 // imm6 = xxxxxx 3886 3887 // 128-bit vector types. 3888 def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm, 3889 N2RegVShLFrm, OpcodeStr, "8", v16i8, NEONvsli> { 3890 let Inst{21-19} = 0b001; // imm6 = 001xxx 3891 } 3892 def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm, 3893 N2RegVShLFrm, OpcodeStr, "16", v8i16, NEONvsli> { 3894 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3895 } 3896 def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm, 3897 N2RegVShLFrm, OpcodeStr, "32", v4i32, NEONvsli> { 3898 let Inst{21} = 0b1; // imm6 = 1xxxxx 3899 } 3900 def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, i32imm, 3901 N2RegVShLFrm, OpcodeStr, "64", v2i64, NEONvsli>; 3902 // imm6 = xxxxxx 3903} 3904multiclass N2VShInsR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3905 string OpcodeStr> { 3906 // 64-bit vector types. 3907 def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm8, 3908 N2RegVShRFrm, OpcodeStr, "8", v8i8, NEONvsri> { 3909 let Inst{21-19} = 0b001; // imm6 = 001xxx 3910 } 3911 def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm16, 3912 N2RegVShRFrm, OpcodeStr, "16", v4i16, NEONvsri> { 3913 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3914 } 3915 def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm32, 3916 N2RegVShRFrm, OpcodeStr, "32", v2i32, NEONvsri> { 3917 let Inst{21} = 0b1; // imm6 = 1xxxxx 3918 } 3919 def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, shr_imm64, 3920 N2RegVShRFrm, OpcodeStr, "64", v1i64, NEONvsri>; 3921 // imm6 = xxxxxx 3922 3923 // 128-bit vector types. 3924 def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm8, 3925 N2RegVShRFrm, OpcodeStr, "8", v16i8, NEONvsri> { 3926 let Inst{21-19} = 0b001; // imm6 = 001xxx 3927 } 3928 def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm16, 3929 N2RegVShRFrm, OpcodeStr, "16", v8i16, NEONvsri> { 3930 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3931 } 3932 def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm32, 3933 N2RegVShRFrm, OpcodeStr, "32", v4i32, NEONvsri> { 3934 let Inst{21} = 0b1; // imm6 = 1xxxxx 3935 } 3936 def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, shr_imm64, 3937 N2RegVShRFrm, OpcodeStr, "64", v2i64, NEONvsri>; 3938 // imm6 = xxxxxx 3939} 3940 3941// Neon Shift Long operations, 3942// element sizes of 8, 16, 32 bits: 3943multiclass N2VLSh_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, 3944 bit op4, string OpcodeStr, string Dt, SDNode OpNode> { 3945 def v8i16 : N2VLSh<op24, op23, op11_8, op7, op6, op4, 3946 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, imm1_7, OpNode> { 3947 let Inst{21-19} = 0b001; // imm6 = 001xxx 3948 } 3949 def v4i32 : N2VLSh<op24, op23, op11_8, op7, op6, op4, 3950 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, imm1_15, OpNode> { 3951 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3952 } 3953 def v2i64 : N2VLSh<op24, op23, op11_8, op7, op6, op4, 3954 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, imm1_31, OpNode> { 3955 let Inst{21} = 0b1; // imm6 = 1xxxxx 3956 } 3957} 3958 3959// Neon Shift Narrow operations, 3960// element sizes of 16, 32, 64 bits: 3961multiclass N2VNSh_HSD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, 3962 bit op4, InstrItinClass itin, string OpcodeStr, string Dt, 3963 SDNode OpNode> { 3964 def v8i8 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, 3965 OpcodeStr, !strconcat(Dt, "16"), 3966 v8i8, v8i16, shr_imm8, OpNode> { 3967 let Inst{21-19} = 0b001; // imm6 = 001xxx 3968 } 3969 def v4i16 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, 3970 OpcodeStr, !strconcat(Dt, "32"), 3971 v4i16, v4i32, shr_imm16, OpNode> { 3972 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3973 } 3974 def v2i32 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, 3975 OpcodeStr, !strconcat(Dt, "64"), 3976 v2i32, v2i64, shr_imm32, OpNode> { 3977 let Inst{21} = 0b1; // imm6 = 1xxxxx 3978 } 3979} 3980 3981//===----------------------------------------------------------------------===// 3982// Instruction Definitions. 3983//===----------------------------------------------------------------------===// 3984 3985// Vector Add Operations. 3986 3987// VADD : Vector Add (integer and floating-point) 3988defm VADD : N3V_QHSD<0, 0, 0b1000, 0, IIC_VBINiD, IIC_VBINiQ, "vadd", "i", 3989 add, 1>; 3990def VADDfd : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd", "f32", 3991 v2f32, v2f32, fadd, 1>; 3992def VADDfq : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd", "f32", 3993 v4f32, v4f32, fadd, 1>; 3994// VADDL : Vector Add Long (Q = D + D) 3995defm VADDLs : N3VLExt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD, 3996 "vaddl", "s", add, sext, 1>; 3997defm VADDLu : N3VLExt_QHS<1,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD, 3998 "vaddl", "u", add, zext, 1>; 3999// VADDW : Vector Add Wide (Q = Q + D) 4000defm VADDWs : N3VW_QHS<0,1,0b0001,0, "vaddw", "s", add, sext, 0>; 4001defm VADDWu : N3VW_QHS<1,1,0b0001,0, "vaddw", "u", add, zext, 0>; 4002// VHADD : Vector Halving Add 4003defm VHADDs : N3VInt_QHS<0, 0, 0b0000, 0, N3RegFrm, 4004 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4005 "vhadd", "s", int_arm_neon_vhadds, 1>; 4006defm VHADDu : N3VInt_QHS<1, 0, 0b0000, 0, N3RegFrm, 4007 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4008 "vhadd", "u", int_arm_neon_vhaddu, 1>; 4009// VRHADD : Vector Rounding Halving Add 4010defm VRHADDs : N3VInt_QHS<0, 0, 0b0001, 0, N3RegFrm, 4011 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4012 "vrhadd", "s", int_arm_neon_vrhadds, 1>; 4013defm VRHADDu : N3VInt_QHS<1, 0, 0b0001, 0, N3RegFrm, 4014 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4015 "vrhadd", "u", int_arm_neon_vrhaddu, 1>; 4016// VQADD : Vector Saturating Add 4017defm VQADDs : N3VInt_QHSD<0, 0, 0b0000, 1, N3RegFrm, 4018 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4019 "vqadd", "s", int_arm_neon_vqadds, 1>; 4020defm VQADDu : N3VInt_QHSD<1, 0, 0b0000, 1, N3RegFrm, 4021 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4022 "vqadd", "u", int_arm_neon_vqaddu, 1>; 4023// VADDHN : Vector Add and Narrow Returning High Half (D = Q + Q) 4024defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i", null_frag, 1>; 4025// VRADDHN : Vector Rounding Add and Narrow Returning High Half (D = Q + Q) 4026defm VRADDHN : N3VNInt_HSD<1,1,0b0100,0, "vraddhn", "i", 4027 int_arm_neon_vraddhn, 1>; 4028 4029def : Pat<(v8i8 (trunc (NEONvshru (add (v8i16 QPR:$Vn), QPR:$Vm), 8))), 4030 (VADDHNv8i8 QPR:$Vn, QPR:$Vm)>; 4031def : Pat<(v4i16 (trunc (NEONvshru (add (v4i32 QPR:$Vn), QPR:$Vm), 16))), 4032 (VADDHNv4i16 QPR:$Vn, QPR:$Vm)>; 4033def : Pat<(v2i32 (trunc (NEONvshru (add (v2i64 QPR:$Vn), QPR:$Vm), 32))), 4034 (VADDHNv2i32 QPR:$Vn, QPR:$Vm)>; 4035 4036// Vector Multiply Operations. 4037 4038// VMUL : Vector Multiply (integer, polynomial and floating-point) 4039defm VMUL : N3V_QHS<0, 0, 0b1001, 1, IIC_VMULi16D, IIC_VMULi32D, 4040 IIC_VMULi16Q, IIC_VMULi32Q, "vmul", "i", mul, 1>; 4041def VMULpd : N3VDInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16D, "vmul", 4042 "p8", v8i8, v8i8, int_arm_neon_vmulp, 1>; 4043def VMULpq : N3VQInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16Q, "vmul", 4044 "p8", v16i8, v16i8, int_arm_neon_vmulp, 1>; 4045def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VFMULD, "vmul", "f32", 4046 v2f32, v2f32, fmul, 1>; 4047def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VFMULQ, "vmul", "f32", 4048 v4f32, v4f32, fmul, 1>; 4049defm VMULsl : N3VSL_HS<0b1000, "vmul", mul>; 4050def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>; 4051def VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32, 4052 v2f32, fmul>; 4053 4054def : Pat<(v8i16 (mul (v8i16 QPR:$src1), 4055 (v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))), 4056 (v8i16 (VMULslv8i16 (v8i16 QPR:$src1), 4057 (v4i16 (EXTRACT_SUBREG QPR:$src2, 4058 (DSubReg_i16_reg imm:$lane))), 4059 (SubReg_i16_lane imm:$lane)))>; 4060def : Pat<(v4i32 (mul (v4i32 QPR:$src1), 4061 (v4i32 (NEONvduplane (v4i32 QPR:$src2), imm:$lane)))), 4062 (v4i32 (VMULslv4i32 (v4i32 QPR:$src1), 4063 (v2i32 (EXTRACT_SUBREG QPR:$src2, 4064 (DSubReg_i32_reg imm:$lane))), 4065 (SubReg_i32_lane imm:$lane)))>; 4066def : Pat<(v4f32 (fmul (v4f32 QPR:$src1), 4067 (v4f32 (NEONvduplane (v4f32 QPR:$src2), imm:$lane)))), 4068 (v4f32 (VMULslfq (v4f32 QPR:$src1), 4069 (v2f32 (EXTRACT_SUBREG QPR:$src2, 4070 (DSubReg_i32_reg imm:$lane))), 4071 (SubReg_i32_lane imm:$lane)))>; 4072 4073 4074def : Pat<(v2f32 (fmul DPR:$Rn, (NEONvdup (f32 SPR:$Rm)))), 4075 (VMULslfd DPR:$Rn, 4076 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0), 4077 (i32 0))>; 4078def : Pat<(v4f32 (fmul QPR:$Rn, (NEONvdup (f32 SPR:$Rm)))), 4079 (VMULslfq QPR:$Rn, 4080 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0), 4081 (i32 0))>; 4082 4083 4084// VQDMULH : Vector Saturating Doubling Multiply Returning High Half 4085defm VQDMULH : N3VInt_HS<0, 0, 0b1011, 0, N3RegFrm, IIC_VMULi16D, IIC_VMULi32D, 4086 IIC_VMULi16Q, IIC_VMULi32Q, 4087 "vqdmulh", "s", int_arm_neon_vqdmulh, 1>; 4088defm VQDMULHsl: N3VIntSL_HS<0b1100, IIC_VMULi16D, IIC_VMULi32D, 4089 IIC_VMULi16Q, IIC_VMULi32Q, 4090 "vqdmulh", "s", int_arm_neon_vqdmulh>; 4091def : Pat<(v8i16 (int_arm_neon_vqdmulh (v8i16 QPR:$src1), 4092 (v8i16 (NEONvduplane (v8i16 QPR:$src2), 4093 imm:$lane)))), 4094 (v8i16 (VQDMULHslv8i16 (v8i16 QPR:$src1), 4095 (v4i16 (EXTRACT_SUBREG QPR:$src2, 4096 (DSubReg_i16_reg imm:$lane))), 4097 (SubReg_i16_lane imm:$lane)))>; 4098def : Pat<(v4i32 (int_arm_neon_vqdmulh (v4i32 QPR:$src1), 4099 (v4i32 (NEONvduplane (v4i32 QPR:$src2), 4100 imm:$lane)))), 4101 (v4i32 (VQDMULHslv4i32 (v4i32 QPR:$src1), 4102 (v2i32 (EXTRACT_SUBREG QPR:$src2, 4103 (DSubReg_i32_reg imm:$lane))), 4104 (SubReg_i32_lane imm:$lane)))>; 4105 4106// VQRDMULH : Vector Rounding Saturating Doubling Multiply Returning High Half 4107defm VQRDMULH : N3VInt_HS<1, 0, 0b1011, 0, N3RegFrm, 4108 IIC_VMULi16D,IIC_VMULi32D,IIC_VMULi16Q,IIC_VMULi32Q, 4109 "vqrdmulh", "s", int_arm_neon_vqrdmulh, 1>; 4110defm VQRDMULHsl : N3VIntSL_HS<0b1101, IIC_VMULi16D, IIC_VMULi32D, 4111 IIC_VMULi16Q, IIC_VMULi32Q, 4112 "vqrdmulh", "s", int_arm_neon_vqrdmulh>; 4113def : Pat<(v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$src1), 4114 (v8i16 (NEONvduplane (v8i16 QPR:$src2), 4115 imm:$lane)))), 4116 (v8i16 (VQRDMULHslv8i16 (v8i16 QPR:$src1), 4117 (v4i16 (EXTRACT_SUBREG QPR:$src2, 4118 (DSubReg_i16_reg imm:$lane))), 4119 (SubReg_i16_lane imm:$lane)))>; 4120def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1), 4121 (v4i32 (NEONvduplane (v4i32 QPR:$src2), 4122 imm:$lane)))), 4123 (v4i32 (VQRDMULHslv4i32 (v4i32 QPR:$src1), 4124 (v2i32 (EXTRACT_SUBREG QPR:$src2, 4125 (DSubReg_i32_reg imm:$lane))), 4126 (SubReg_i32_lane imm:$lane)))>; 4127 4128// VMULL : Vector Multiply Long (integer and polynomial) (Q = D * D) 4129let PostEncoderMethod = "NEONThumb2DataIPostEncoder", 4130 DecoderNamespace = "NEONData" in { 4131 defm VMULLs : N3VL_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, 4132 "vmull", "s", NEONvmulls, 1>; 4133 defm VMULLu : N3VL_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, 4134 "vmull", "u", NEONvmullu, 1>; 4135 def VMULLp8 : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8", 4136 v8i16, v8i8, int_arm_neon_vmullp, 1>; 4137 def VMULLp64 : N3VLIntnp<0b00101, 0b10, 0b1110, 0, 0, NoItinerary, 4138 "vmull", "p64", v2i64, v1i64, int_arm_neon_vmullp, 1>, 4139 Requires<[HasV8, HasCrypto]>; 4140} 4141defm VMULLsls : N3VLSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s", NEONvmulls>; 4142defm VMULLslu : N3VLSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u", NEONvmullu>; 4143 4144// VQDMULL : Vector Saturating Doubling Multiply Long (Q = D * D) 4145defm VQDMULL : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, IIC_VMULi32D, 4146 "vqdmull", "s", int_arm_neon_vqdmull, 1>; 4147defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D, 4148 "vqdmull", "s", int_arm_neon_vqdmull>; 4149 4150// Vector Multiply-Accumulate and Multiply-Subtract Operations. 4151 4152// VMLA : Vector Multiply Accumulate (integer and floating-point) 4153defm VMLA : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, 4154 IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; 4155def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32", 4156 v2f32, fmul_su, fadd_mlx>, 4157 Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>; 4158def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32", 4159 v4f32, fmul_su, fadd_mlx>, 4160 Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>; 4161defm VMLAsl : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D, 4162 IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; 4163def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32", 4164 v2f32, fmul_su, fadd_mlx>, 4165 Requires<[HasNEON, UseFPVMLx]>; 4166def VMLAslfq : N3VQMulOpSL<0b10, 0b0001, IIC_VMACQ, "vmla", "f32", 4167 v4f32, v2f32, fmul_su, fadd_mlx>, 4168 Requires<[HasNEON, UseFPVMLx]>; 4169 4170def : Pat<(v8i16 (add (v8i16 QPR:$src1), 4171 (mul (v8i16 QPR:$src2), 4172 (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))), 4173 (v8i16 (VMLAslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2), 4174 (v4i16 (EXTRACT_SUBREG QPR:$src3, 4175 (DSubReg_i16_reg imm:$lane))), 4176 (SubReg_i16_lane imm:$lane)))>; 4177 4178def : Pat<(v4i32 (add (v4i32 QPR:$src1), 4179 (mul (v4i32 QPR:$src2), 4180 (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))), 4181 (v4i32 (VMLAslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2), 4182 (v2i32 (EXTRACT_SUBREG QPR:$src3, 4183 (DSubReg_i32_reg imm:$lane))), 4184 (SubReg_i32_lane imm:$lane)))>; 4185 4186def : Pat<(v4f32 (fadd_mlx (v4f32 QPR:$src1), 4187 (fmul_su (v4f32 QPR:$src2), 4188 (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))), 4189 (v4f32 (VMLAslfq (v4f32 QPR:$src1), 4190 (v4f32 QPR:$src2), 4191 (v2f32 (EXTRACT_SUBREG QPR:$src3, 4192 (DSubReg_i32_reg imm:$lane))), 4193 (SubReg_i32_lane imm:$lane)))>, 4194 Requires<[HasNEON, UseFPVMLx]>; 4195 4196// VMLAL : Vector Multiply Accumulate Long (Q += D * D) 4197defm VMLALs : N3VLMulOp_QHS<0,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D, 4198 "vmlal", "s", NEONvmulls, add>; 4199defm VMLALu : N3VLMulOp_QHS<1,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D, 4200 "vmlal", "u", NEONvmullu, add>; 4201 4202defm VMLALsls : N3VLMulOpSL_HS<0, 0b0010, "vmlal", "s", NEONvmulls, add>; 4203defm VMLALslu : N3VLMulOpSL_HS<1, 0b0010, "vmlal", "u", NEONvmullu, add>; 4204 4205// VQDMLAL : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D) 4206defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, 4207 "vqdmlal", "s", null_frag>; 4208defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", null_frag>; 4209 4210def : Pat<(v4i32 (int_arm_neon_vqadds (v4i32 QPR:$src1), 4211 (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), 4212 (v4i16 DPR:$Vm))))), 4213 (VQDMLALv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>; 4214def : Pat<(v2i64 (int_arm_neon_vqadds (v2i64 QPR:$src1), 4215 (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), 4216 (v2i32 DPR:$Vm))))), 4217 (VQDMLALv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>; 4218def : Pat<(v4i32 (int_arm_neon_vqadds (v4i32 QPR:$src1), 4219 (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), 4220 (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm), 4221 imm:$lane)))))), 4222 (VQDMLALslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>; 4223def : Pat<(v2i64 (int_arm_neon_vqadds (v2i64 QPR:$src1), 4224 (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), 4225 (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm), 4226 imm:$lane)))))), 4227 (VQDMLALslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>; 4228 4229// VMLS : Vector Multiply Subtract (integer and floating-point) 4230defm VMLS : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, 4231 IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; 4232def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32", 4233 v2f32, fmul_su, fsub_mlx>, 4234 Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>; 4235def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32", 4236 v4f32, fmul_su, fsub_mlx>, 4237 Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>; 4238defm VMLSsl : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D, 4239 IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; 4240def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32", 4241 v2f32, fmul_su, fsub_mlx>, 4242 Requires<[HasNEON, UseFPVMLx]>; 4243def VMLSslfq : N3VQMulOpSL<0b10, 0b0101, IIC_VMACQ, "vmls", "f32", 4244 v4f32, v2f32, fmul_su, fsub_mlx>, 4245 Requires<[HasNEON, UseFPVMLx]>; 4246 4247def : Pat<(v8i16 (sub (v8i16 QPR:$src1), 4248 (mul (v8i16 QPR:$src2), 4249 (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))), 4250 (v8i16 (VMLSslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2), 4251 (v4i16 (EXTRACT_SUBREG QPR:$src3, 4252 (DSubReg_i16_reg imm:$lane))), 4253 (SubReg_i16_lane imm:$lane)))>; 4254 4255def : Pat<(v4i32 (sub (v4i32 QPR:$src1), 4256 (mul (v4i32 QPR:$src2), 4257 (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))), 4258 (v4i32 (VMLSslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2), 4259 (v2i32 (EXTRACT_SUBREG QPR:$src3, 4260 (DSubReg_i32_reg imm:$lane))), 4261 (SubReg_i32_lane imm:$lane)))>; 4262 4263def : Pat<(v4f32 (fsub_mlx (v4f32 QPR:$src1), 4264 (fmul_su (v4f32 QPR:$src2), 4265 (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))), 4266 (v4f32 (VMLSslfq (v4f32 QPR:$src1), (v4f32 QPR:$src2), 4267 (v2f32 (EXTRACT_SUBREG QPR:$src3, 4268 (DSubReg_i32_reg imm:$lane))), 4269 (SubReg_i32_lane imm:$lane)))>, 4270 Requires<[HasNEON, UseFPVMLx]>; 4271 4272// VMLSL : Vector Multiply Subtract Long (Q -= D * D) 4273defm VMLSLs : N3VLMulOp_QHS<0,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D, 4274 "vmlsl", "s", NEONvmulls, sub>; 4275defm VMLSLu : N3VLMulOp_QHS<1,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D, 4276 "vmlsl", "u", NEONvmullu, sub>; 4277 4278defm VMLSLsls : N3VLMulOpSL_HS<0, 0b0110, "vmlsl", "s", NEONvmulls, sub>; 4279defm VMLSLslu : N3VLMulOpSL_HS<1, 0b0110, "vmlsl", "u", NEONvmullu, sub>; 4280 4281// VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D) 4282defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D, 4283 "vqdmlsl", "s", null_frag>; 4284defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", null_frag>; 4285 4286def : Pat<(v4i32 (int_arm_neon_vqsubs (v4i32 QPR:$src1), 4287 (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), 4288 (v4i16 DPR:$Vm))))), 4289 (VQDMLSLv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>; 4290def : Pat<(v2i64 (int_arm_neon_vqsubs (v2i64 QPR:$src1), 4291 (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), 4292 (v2i32 DPR:$Vm))))), 4293 (VQDMLSLv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>; 4294def : Pat<(v4i32 (int_arm_neon_vqsubs (v4i32 QPR:$src1), 4295 (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), 4296 (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm), 4297 imm:$lane)))))), 4298 (VQDMLSLslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>; 4299def : Pat<(v2i64 (int_arm_neon_vqsubs (v2i64 QPR:$src1), 4300 (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), 4301 (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm), 4302 imm:$lane)))))), 4303 (VQDMLSLslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>; 4304 4305// Fused Vector Multiply-Accumulate and Fused Multiply-Subtract Operations. 4306def VFMAfd : N3VDMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACD, "vfma", "f32", 4307 v2f32, fmul_su, fadd_mlx>, 4308 Requires<[HasNEON,HasVFP4,UseFusedMAC]>; 4309 4310def VFMAfq : N3VQMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACQ, "vfma", "f32", 4311 v4f32, fmul_su, fadd_mlx>, 4312 Requires<[HasNEON,HasVFP4,UseFusedMAC]>; 4313 4314// Fused Vector Multiply Subtract (floating-point) 4315def VFMSfd : N3VDMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACD, "vfms", "f32", 4316 v2f32, fmul_su, fsub_mlx>, 4317 Requires<[HasNEON,HasVFP4,UseFusedMAC]>; 4318def VFMSfq : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32", 4319 v4f32, fmul_su, fsub_mlx>, 4320 Requires<[HasNEON,HasVFP4,UseFusedMAC]>; 4321 4322// Match @llvm.fma.* intrinsics 4323def : Pat<(v2f32 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)), 4324 (VFMAfd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 4325 Requires<[HasVFP4]>; 4326def : Pat<(v4f32 (fma QPR:$Vn, QPR:$Vm, QPR:$src1)), 4327 (VFMAfq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 4328 Requires<[HasVFP4]>; 4329def : Pat<(v2f32 (fma (fneg DPR:$Vn), DPR:$Vm, DPR:$src1)), 4330 (VFMSfd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 4331 Requires<[HasVFP4]>; 4332def : Pat<(v4f32 (fma (fneg QPR:$Vn), QPR:$Vm, QPR:$src1)), 4333 (VFMSfq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 4334 Requires<[HasVFP4]>; 4335 4336// Vector Subtract Operations. 4337 4338// VSUB : Vector Subtract (integer and floating-point) 4339defm VSUB : N3V_QHSD<1, 0, 0b1000, 0, IIC_VSUBiD, IIC_VSUBiQ, 4340 "vsub", "i", sub, 0>; 4341def VSUBfd : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub", "f32", 4342 v2f32, v2f32, fsub, 0>; 4343def VSUBfq : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub", "f32", 4344 v4f32, v4f32, fsub, 0>; 4345// VSUBL : Vector Subtract Long (Q = D - D) 4346defm VSUBLs : N3VLExt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD, 4347 "vsubl", "s", sub, sext, 0>; 4348defm VSUBLu : N3VLExt_QHS<1,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD, 4349 "vsubl", "u", sub, zext, 0>; 4350// VSUBW : Vector Subtract Wide (Q = Q - D) 4351defm VSUBWs : N3VW_QHS<0,1,0b0011,0, "vsubw", "s", sub, sext, 0>; 4352defm VSUBWu : N3VW_QHS<1,1,0b0011,0, "vsubw", "u", sub, zext, 0>; 4353// VHSUB : Vector Halving Subtract 4354defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, N3RegFrm, 4355 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4356 "vhsub", "s", int_arm_neon_vhsubs, 0>; 4357defm VHSUBu : N3VInt_QHS<1, 0, 0b0010, 0, N3RegFrm, 4358 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4359 "vhsub", "u", int_arm_neon_vhsubu, 0>; 4360// VQSUB : Vector Saturing Subtract 4361defm VQSUBs : N3VInt_QHSD<0, 0, 0b0010, 1, N3RegFrm, 4362 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4363 "vqsub", "s", int_arm_neon_vqsubs, 0>; 4364defm VQSUBu : N3VInt_QHSD<1, 0, 0b0010, 1, N3RegFrm, 4365 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4366 "vqsub", "u", int_arm_neon_vqsubu, 0>; 4367// VSUBHN : Vector Subtract and Narrow Returning High Half (D = Q - Q) 4368defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i", null_frag, 0>; 4369// VRSUBHN : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q) 4370defm VRSUBHN : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn", "i", 4371 int_arm_neon_vrsubhn, 0>; 4372 4373def : Pat<(v8i8 (trunc (NEONvshru (sub (v8i16 QPR:$Vn), QPR:$Vm), 8))), 4374 (VSUBHNv8i8 QPR:$Vn, QPR:$Vm)>; 4375def : Pat<(v4i16 (trunc (NEONvshru (sub (v4i32 QPR:$Vn), QPR:$Vm), 16))), 4376 (VSUBHNv4i16 QPR:$Vn, QPR:$Vm)>; 4377def : Pat<(v2i32 (trunc (NEONvshru (sub (v2i64 QPR:$Vn), QPR:$Vm), 32))), 4378 (VSUBHNv2i32 QPR:$Vn, QPR:$Vm)>; 4379 4380// Vector Comparisons. 4381 4382// VCEQ : Vector Compare Equal 4383defm VCEQ : N3V_QHS<1, 0, 0b1000, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 4384 IIC_VSUBi4Q, "vceq", "i", NEONvceq, 1>; 4385def VCEQfd : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32, 4386 NEONvceq, 1>; 4387def VCEQfq : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32, 4388 NEONvceq, 1>; 4389 4390let TwoOperandAliasConstraint = "$Vm = $Vd" in 4391defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i", 4392 "$Vd, $Vm, #0", NEONvceqz>; 4393 4394// VCGE : Vector Compare Greater Than or Equal 4395defm VCGEs : N3V_QHS<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 4396 IIC_VSUBi4Q, "vcge", "s", NEONvcge, 0>; 4397defm VCGEu : N3V_QHS<1, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 4398 IIC_VSUBi4Q, "vcge", "u", NEONvcgeu, 0>; 4399def VCGEfd : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32, 4400 NEONvcge, 0>; 4401def VCGEfq : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32, 4402 NEONvcge, 0>; 4403 4404let TwoOperandAliasConstraint = "$Vm = $Vd" in { 4405defm VCGEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s", 4406 "$Vd, $Vm, #0", NEONvcgez>; 4407defm VCLEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s", 4408 "$Vd, $Vm, #0", NEONvclez>; 4409} 4410 4411// VCGT : Vector Compare Greater Than 4412defm VCGTs : N3V_QHS<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 4413 IIC_VSUBi4Q, "vcgt", "s", NEONvcgt, 0>; 4414defm VCGTu : N3V_QHS<1, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 4415 IIC_VSUBi4Q, "vcgt", "u", NEONvcgtu, 0>; 4416def VCGTfd : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32, 4417 NEONvcgt, 0>; 4418def VCGTfq : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32, 4419 NEONvcgt, 0>; 4420 4421let TwoOperandAliasConstraint = "$Vm = $Vd" in { 4422defm VCGTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s", 4423 "$Vd, $Vm, #0", NEONvcgtz>; 4424defm VCLTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s", 4425 "$Vd, $Vm, #0", NEONvcltz>; 4426} 4427 4428// VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE) 4429def VACGEd : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge", 4430 "f32", v2i32, v2f32, int_arm_neon_vacged, 0>; 4431def VACGEq : N3VQInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge", 4432 "f32", v4i32, v4f32, int_arm_neon_vacgeq, 0>; 4433// VACGT : Vector Absolute Compare Greater Than (aka VCAGT) 4434def VACGTd : N3VDInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt", 4435 "f32", v2i32, v2f32, int_arm_neon_vacgtd, 0>; 4436def VACGTq : N3VQInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt", 4437 "f32", v4i32, v4f32, int_arm_neon_vacgtq, 0>; 4438// VTST : Vector Test Bits 4439defm VTST : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 4440 IIC_VBINi4Q, "vtst", "", NEONvtst, 1>; 4441 4442def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm", 4443 (VACGTd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>; 4444def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm", 4445 (VACGTq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>; 4446def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm", 4447 (VACGEd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>; 4448def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm", 4449 (VACGEq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>; 4450 4451def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm", 4452 (VACGTd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>; 4453def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm", 4454 (VACGTq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>; 4455def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm", 4456 (VACGEd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>; 4457def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm", 4458 (VACGEq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>; 4459 4460// Vector Bitwise Operations. 4461 4462def vnotd : PatFrag<(ops node:$in), 4463 (xor node:$in, (bitconvert (v8i8 NEONimmAllOnesV)))>; 4464def vnotq : PatFrag<(ops node:$in), 4465 (xor node:$in, (bitconvert (v16i8 NEONimmAllOnesV)))>; 4466 4467 4468// VAND : Vector Bitwise AND 4469def VANDd : N3VDX<0, 0, 0b00, 0b0001, 1, IIC_VBINiD, "vand", 4470 v2i32, v2i32, and, 1>; 4471def VANDq : N3VQX<0, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "vand", 4472 v4i32, v4i32, and, 1>; 4473 4474// VEOR : Vector Bitwise Exclusive OR 4475def VEORd : N3VDX<1, 0, 0b00, 0b0001, 1, IIC_VBINiD, "veor", 4476 v2i32, v2i32, xor, 1>; 4477def VEORq : N3VQX<1, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "veor", 4478 v4i32, v4i32, xor, 1>; 4479 4480// VORR : Vector Bitwise OR 4481def VORRd : N3VDX<0, 0, 0b10, 0b0001, 1, IIC_VBINiD, "vorr", 4482 v2i32, v2i32, or, 1>; 4483def VORRq : N3VQX<0, 0, 0b10, 0b0001, 1, IIC_VBINiQ, "vorr", 4484 v4i32, v4i32, or, 1>; 4485 4486def VORRiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 0, 1, 4487 (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src), 4488 IIC_VMOVImm, 4489 "vorr", "i16", "$Vd, $SIMM", "$src = $Vd", 4490 [(set DPR:$Vd, 4491 (v4i16 (NEONvorrImm DPR:$src, timm:$SIMM)))]> { 4492 let Inst{9} = SIMM{9}; 4493} 4494 4495def VORRiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 0, 1, 4496 (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src), 4497 IIC_VMOVImm, 4498 "vorr", "i32", "$Vd, $SIMM", "$src = $Vd", 4499 [(set DPR:$Vd, 4500 (v2i32 (NEONvorrImm DPR:$src, timm:$SIMM)))]> { 4501 let Inst{10-9} = SIMM{10-9}; 4502} 4503 4504def VORRiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 0, 1, 4505 (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src), 4506 IIC_VMOVImm, 4507 "vorr", "i16", "$Vd, $SIMM", "$src = $Vd", 4508 [(set QPR:$Vd, 4509 (v8i16 (NEONvorrImm QPR:$src, timm:$SIMM)))]> { 4510 let Inst{9} = SIMM{9}; 4511} 4512 4513def VORRiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 0, 1, 4514 (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src), 4515 IIC_VMOVImm, 4516 "vorr", "i32", "$Vd, $SIMM", "$src = $Vd", 4517 [(set QPR:$Vd, 4518 (v4i32 (NEONvorrImm QPR:$src, timm:$SIMM)))]> { 4519 let Inst{10-9} = SIMM{10-9}; 4520} 4521 4522 4523// VBIC : Vector Bitwise Bit Clear (AND NOT) 4524let TwoOperandAliasConstraint = "$Vn = $Vd" in { 4525def VBICd : N3VX<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd), 4526 (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD, 4527 "vbic", "$Vd, $Vn, $Vm", "", 4528 [(set DPR:$Vd, (v2i32 (and DPR:$Vn, 4529 (vnotd DPR:$Vm))))]>; 4530def VBICq : N3VX<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd), 4531 (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ, 4532 "vbic", "$Vd, $Vn, $Vm", "", 4533 [(set QPR:$Vd, (v4i32 (and QPR:$Vn, 4534 (vnotq QPR:$Vm))))]>; 4535} 4536 4537def VBICiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 1, 1, 4538 (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src), 4539 IIC_VMOVImm, 4540 "vbic", "i16", "$Vd, $SIMM", "$src = $Vd", 4541 [(set DPR:$Vd, 4542 (v4i16 (NEONvbicImm DPR:$src, timm:$SIMM)))]> { 4543 let Inst{9} = SIMM{9}; 4544} 4545 4546def VBICiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 1, 1, 4547 (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src), 4548 IIC_VMOVImm, 4549 "vbic", "i32", "$Vd, $SIMM", "$src = $Vd", 4550 [(set DPR:$Vd, 4551 (v2i32 (NEONvbicImm DPR:$src, timm:$SIMM)))]> { 4552 let Inst{10-9} = SIMM{10-9}; 4553} 4554 4555def VBICiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 1, 1, 4556 (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src), 4557 IIC_VMOVImm, 4558 "vbic", "i16", "$Vd, $SIMM", "$src = $Vd", 4559 [(set QPR:$Vd, 4560 (v8i16 (NEONvbicImm QPR:$src, timm:$SIMM)))]> { 4561 let Inst{9} = SIMM{9}; 4562} 4563 4564def VBICiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 1, 1, 4565 (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src), 4566 IIC_VMOVImm, 4567 "vbic", "i32", "$Vd, $SIMM", "$src = $Vd", 4568 [(set QPR:$Vd, 4569 (v4i32 (NEONvbicImm QPR:$src, timm:$SIMM)))]> { 4570 let Inst{10-9} = SIMM{10-9}; 4571} 4572 4573// VORN : Vector Bitwise OR NOT 4574def VORNd : N3VX<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$Vd), 4575 (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD, 4576 "vorn", "$Vd, $Vn, $Vm", "", 4577 [(set DPR:$Vd, (v2i32 (or DPR:$Vn, 4578 (vnotd DPR:$Vm))))]>; 4579def VORNq : N3VX<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$Vd), 4580 (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ, 4581 "vorn", "$Vd, $Vn, $Vm", "", 4582 [(set QPR:$Vd, (v4i32 (or QPR:$Vn, 4583 (vnotq QPR:$Vm))))]>; 4584 4585// VMVN : Vector Bitwise NOT (Immediate) 4586 4587let isReMaterializable = 1 in { 4588 4589def VMVNv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 1, 1, (outs DPR:$Vd), 4590 (ins nImmSplatI16:$SIMM), IIC_VMOVImm, 4591 "vmvn", "i16", "$Vd, $SIMM", "", 4592 [(set DPR:$Vd, (v4i16 (NEONvmvnImm timm:$SIMM)))]> { 4593 let Inst{9} = SIMM{9}; 4594} 4595 4596def VMVNv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 1, 1, (outs QPR:$Vd), 4597 (ins nImmSplatI16:$SIMM), IIC_VMOVImm, 4598 "vmvn", "i16", "$Vd, $SIMM", "", 4599 [(set QPR:$Vd, (v8i16 (NEONvmvnImm timm:$SIMM)))]> { 4600 let Inst{9} = SIMM{9}; 4601} 4602 4603def VMVNv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 1, 1, (outs DPR:$Vd), 4604 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, 4605 "vmvn", "i32", "$Vd, $SIMM", "", 4606 [(set DPR:$Vd, (v2i32 (NEONvmvnImm timm:$SIMM)))]> { 4607 let Inst{11-8} = SIMM{11-8}; 4608} 4609 4610def VMVNv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 1, 1, (outs QPR:$Vd), 4611 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, 4612 "vmvn", "i32", "$Vd, $SIMM", "", 4613 [(set QPR:$Vd, (v4i32 (NEONvmvnImm timm:$SIMM)))]> { 4614 let Inst{11-8} = SIMM{11-8}; 4615} 4616} 4617 4618// VMVN : Vector Bitwise NOT 4619def VMVNd : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0, 4620 (outs DPR:$Vd), (ins DPR:$Vm), IIC_VSUBiD, 4621 "vmvn", "$Vd, $Vm", "", 4622 [(set DPR:$Vd, (v2i32 (vnotd DPR:$Vm)))]>; 4623def VMVNq : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0, 4624 (outs QPR:$Vd), (ins QPR:$Vm), IIC_VSUBiD, 4625 "vmvn", "$Vd, $Vm", "", 4626 [(set QPR:$Vd, (v4i32 (vnotq QPR:$Vm)))]>; 4627def : Pat<(v2i32 (vnotd DPR:$src)), (VMVNd DPR:$src)>; 4628def : Pat<(v4i32 (vnotq QPR:$src)), (VMVNq QPR:$src)>; 4629 4630// VBSL : Vector Bitwise Select 4631def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd), 4632 (ins DPR:$src1, DPR:$Vn, DPR:$Vm), 4633 N3RegFrm, IIC_VCNTiD, 4634 "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd", 4635 [(set DPR:$Vd, 4636 (v2i32 (NEONvbsl DPR:$src1, DPR:$Vn, DPR:$Vm)))]>; 4637def : Pat<(v8i8 (int_arm_neon_vbsl (v8i8 DPR:$src1), 4638 (v8i8 DPR:$Vn), (v8i8 DPR:$Vm))), 4639 (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 4640 Requires<[HasNEON]>; 4641def : Pat<(v4i16 (int_arm_neon_vbsl (v4i16 DPR:$src1), 4642 (v4i16 DPR:$Vn), (v4i16 DPR:$Vm))), 4643 (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 4644 Requires<[HasNEON]>; 4645def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 DPR:$src1), 4646 (v2i32 DPR:$Vn), (v2i32 DPR:$Vm))), 4647 (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 4648 Requires<[HasNEON]>; 4649def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 DPR:$src1), 4650 (v2f32 DPR:$Vn), (v2f32 DPR:$Vm))), 4651 (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 4652 Requires<[HasNEON]>; 4653def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 DPR:$src1), 4654 (v1i64 DPR:$Vn), (v1i64 DPR:$Vm))), 4655 (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 4656 Requires<[HasNEON]>; 4657 4658def : Pat<(v2i32 (or (and DPR:$Vn, DPR:$Vd), 4659 (and DPR:$Vm, (vnotd DPR:$Vd)))), 4660 (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>, 4661 Requires<[HasNEON]>; 4662 4663def : Pat<(v1i64 (or (and DPR:$Vn, DPR:$Vd), 4664 (and DPR:$Vm, (vnotd DPR:$Vd)))), 4665 (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>, 4666 Requires<[HasNEON]>; 4667 4668def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd), 4669 (ins QPR:$src1, QPR:$Vn, QPR:$Vm), 4670 N3RegFrm, IIC_VCNTiQ, 4671 "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd", 4672 [(set QPR:$Vd, 4673 (v4i32 (NEONvbsl QPR:$src1, QPR:$Vn, QPR:$Vm)))]>; 4674 4675def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 QPR:$src1), 4676 (v16i8 QPR:$Vn), (v16i8 QPR:$Vm))), 4677 (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 4678 Requires<[HasNEON]>; 4679def : Pat<(v8i16 (int_arm_neon_vbsl (v8i16 QPR:$src1), 4680 (v8i16 QPR:$Vn), (v8i16 QPR:$Vm))), 4681 (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 4682 Requires<[HasNEON]>; 4683def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 QPR:$src1), 4684 (v4i32 QPR:$Vn), (v4i32 QPR:$Vm))), 4685 (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 4686 Requires<[HasNEON]>; 4687def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 QPR:$src1), 4688 (v4f32 QPR:$Vn), (v4f32 QPR:$Vm))), 4689 (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 4690 Requires<[HasNEON]>; 4691def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 QPR:$src1), 4692 (v2i64 QPR:$Vn), (v2i64 QPR:$Vm))), 4693 (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 4694 Requires<[HasNEON]>; 4695 4696def : Pat<(v4i32 (or (and QPR:$Vn, QPR:$Vd), 4697 (and QPR:$Vm, (vnotq QPR:$Vd)))), 4698 (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>, 4699 Requires<[HasNEON]>; 4700def : Pat<(v2i64 (or (and QPR:$Vn, QPR:$Vd), 4701 (and QPR:$Vm, (vnotq QPR:$Vd)))), 4702 (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>, 4703 Requires<[HasNEON]>; 4704 4705// VBIF : Vector Bitwise Insert if False 4706// like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst", 4707// FIXME: This instruction's encoding MAY NOT BE correct. 4708def VBIFd : N3VX<1, 0, 0b11, 0b0001, 0, 1, 4709 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), 4710 N3RegFrm, IIC_VBINiD, 4711 "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd", 4712 []>; 4713def VBIFq : N3VX<1, 0, 0b11, 0b0001, 1, 1, 4714 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), 4715 N3RegFrm, IIC_VBINiQ, 4716 "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd", 4717 []>; 4718 4719// VBIT : Vector Bitwise Insert if True 4720// like VBSL but with: "vbit $dst, $src2, $src1", "$src3 = $dst", 4721// FIXME: This instruction's encoding MAY NOT BE correct. 4722def VBITd : N3VX<1, 0, 0b10, 0b0001, 0, 1, 4723 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), 4724 N3RegFrm, IIC_VBINiD, 4725 "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd", 4726 []>; 4727def VBITq : N3VX<1, 0, 0b10, 0b0001, 1, 1, 4728 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), 4729 N3RegFrm, IIC_VBINiQ, 4730 "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd", 4731 []>; 4732 4733// VBIT/VBIF are not yet implemented. The TwoAddress pass will not go looking 4734// for equivalent operations with different register constraints; it just 4735// inserts copies. 4736 4737// Vector Absolute Differences. 4738 4739// VABD : Vector Absolute Difference 4740defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, N3RegFrm, 4741 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4742 "vabd", "s", int_arm_neon_vabds, 1>; 4743defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, N3RegFrm, 4744 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4745 "vabd", "u", int_arm_neon_vabdu, 1>; 4746def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND, 4747 "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 1>; 4748def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ, 4749 "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 1>; 4750 4751// VABDL : Vector Absolute Difference Long (Q = | D - D |) 4752defm VABDLs : N3VLIntExt_QHS<0,1,0b0111,0, IIC_VSUBi4Q, 4753 "vabdl", "s", int_arm_neon_vabds, zext, 1>; 4754defm VABDLu : N3VLIntExt_QHS<1,1,0b0111,0, IIC_VSUBi4Q, 4755 "vabdl", "u", int_arm_neon_vabdu, zext, 1>; 4756 4757// VABA : Vector Absolute Difference and Accumulate 4758defm VABAs : N3VIntOp_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ, 4759 "vaba", "s", int_arm_neon_vabds, add>; 4760defm VABAu : N3VIntOp_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ, 4761 "vaba", "u", int_arm_neon_vabdu, add>; 4762 4763// VABAL : Vector Absolute Difference and Accumulate Long (Q += | D - D |) 4764defm VABALs : N3VLIntExtOp_QHS<0,1,0b0101,0, IIC_VABAD, 4765 "vabal", "s", int_arm_neon_vabds, zext, add>; 4766defm VABALu : N3VLIntExtOp_QHS<1,1,0b0101,0, IIC_VABAD, 4767 "vabal", "u", int_arm_neon_vabdu, zext, add>; 4768 4769// Vector Maximum and Minimum. 4770 4771// VMAX : Vector Maximum 4772defm VMAXs : N3VInt_QHS<0, 0, 0b0110, 0, N3RegFrm, 4773 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4774 "vmax", "s", int_arm_neon_vmaxs, 1>; 4775defm VMAXu : N3VInt_QHS<1, 0, 0b0110, 0, N3RegFrm, 4776 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4777 "vmax", "u", int_arm_neon_vmaxu, 1>; 4778def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBIND, 4779 "vmax", "f32", 4780 v2f32, v2f32, int_arm_neon_vmaxs, 1>; 4781def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINQ, 4782 "vmax", "f32", 4783 v4f32, v4f32, int_arm_neon_vmaxs, 1>; 4784 4785// VMAXNM 4786let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { 4787 def VMAXNMND : N3VDIntnp<0b00110, 0b00, 0b1111, 0, 1, 4788 N3RegFrm, NoItinerary, "vmaxnm", "f32", 4789 v2f32, v2f32, int_arm_neon_vmaxnm, 1>, 4790 Requires<[HasV8, HasNEON]>; 4791 def VMAXNMNQ : N3VQIntnp<0b00110, 0b00, 0b1111, 1, 1, 4792 N3RegFrm, NoItinerary, "vmaxnm", "f32", 4793 v4f32, v4f32, int_arm_neon_vmaxnm, 1>, 4794 Requires<[HasV8, HasNEON]>; 4795} 4796 4797// VMIN : Vector Minimum 4798defm VMINs : N3VInt_QHS<0, 0, 0b0110, 1, N3RegFrm, 4799 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4800 "vmin", "s", int_arm_neon_vmins, 1>; 4801defm VMINu : N3VInt_QHS<1, 0, 0b0110, 1, N3RegFrm, 4802 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4803 "vmin", "u", int_arm_neon_vminu, 1>; 4804def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBIND, 4805 "vmin", "f32", 4806 v2f32, v2f32, int_arm_neon_vmins, 1>; 4807def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINQ, 4808 "vmin", "f32", 4809 v4f32, v4f32, int_arm_neon_vmins, 1>; 4810 4811// VMINNM 4812let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { 4813 def VMINNMND : N3VDIntnp<0b00110, 0b10, 0b1111, 0, 1, 4814 N3RegFrm, NoItinerary, "vminnm", "f32", 4815 v2f32, v2f32, int_arm_neon_vminnm, 1>, 4816 Requires<[HasV8, HasNEON]>; 4817 def VMINNMNQ : N3VQIntnp<0b00110, 0b10, 0b1111, 1, 1, 4818 N3RegFrm, NoItinerary, "vminnm", "f32", 4819 v4f32, v4f32, int_arm_neon_vminnm, 1>, 4820 Requires<[HasV8, HasNEON]>; 4821} 4822 4823// Vector Pairwise Operations. 4824 4825// VPADD : Vector Pairwise Add 4826def VPADDi8 : N3VDInt<0, 0, 0b00, 0b1011, 1, N3RegFrm, IIC_VSHLiD, 4827 "vpadd", "i8", 4828 v8i8, v8i8, int_arm_neon_vpadd, 0>; 4829def VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, N3RegFrm, IIC_VSHLiD, 4830 "vpadd", "i16", 4831 v4i16, v4i16, int_arm_neon_vpadd, 0>; 4832def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, N3RegFrm, IIC_VSHLiD, 4833 "vpadd", "i32", 4834 v2i32, v2i32, int_arm_neon_vpadd, 0>; 4835def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, N3RegFrm, 4836 IIC_VPBIND, "vpadd", "f32", 4837 v2f32, v2f32, int_arm_neon_vpadd, 0>; 4838 4839// VPADDL : Vector Pairwise Add Long 4840defm VPADDLs : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl", "s", 4841 int_arm_neon_vpaddls>; 4842defm VPADDLu : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpaddl", "u", 4843 int_arm_neon_vpaddlu>; 4844 4845// VPADAL : Vector Pairwise Add and Accumulate Long 4846defm VPADALs : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01100, 0, "vpadal", "s", 4847 int_arm_neon_vpadals>; 4848defm VPADALu : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01101, 0, "vpadal", "u", 4849 int_arm_neon_vpadalu>; 4850 4851// VPMAX : Vector Pairwise Maximum 4852def VPMAXs8 : N3VDInt<0, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 4853 "s8", v8i8, v8i8, int_arm_neon_vpmaxs, 0>; 4854def VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 4855 "s16", v4i16, v4i16, int_arm_neon_vpmaxs, 0>; 4856def VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 4857 "s32", v2i32, v2i32, int_arm_neon_vpmaxs, 0>; 4858def VPMAXu8 : N3VDInt<1, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 4859 "u8", v8i8, v8i8, int_arm_neon_vpmaxu, 0>; 4860def VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 4861 "u16", v4i16, v4i16, int_arm_neon_vpmaxu, 0>; 4862def VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 4863 "u32", v2i32, v2i32, int_arm_neon_vpmaxu, 0>; 4864def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax", 4865 "f32", v2f32, v2f32, int_arm_neon_vpmaxs, 0>; 4866 4867// VPMIN : Vector Pairwise Minimum 4868def VPMINs8 : N3VDInt<0, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 4869 "s8", v8i8, v8i8, int_arm_neon_vpmins, 0>; 4870def VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 4871 "s16", v4i16, v4i16, int_arm_neon_vpmins, 0>; 4872def VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 4873 "s32", v2i32, v2i32, int_arm_neon_vpmins, 0>; 4874def VPMINu8 : N3VDInt<1, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 4875 "u8", v8i8, v8i8, int_arm_neon_vpminu, 0>; 4876def VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 4877 "u16", v4i16, v4i16, int_arm_neon_vpminu, 0>; 4878def VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 4879 "u32", v2i32, v2i32, int_arm_neon_vpminu, 0>; 4880def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin", 4881 "f32", v2f32, v2f32, int_arm_neon_vpmins, 0>; 4882 4883// Vector Reciprocal and Reciprocal Square Root Estimate and Step. 4884 4885// VRECPE : Vector Reciprocal Estimate 4886def VRECPEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, 4887 IIC_VUNAD, "vrecpe", "u32", 4888 v2i32, v2i32, int_arm_neon_vrecpe>; 4889def VRECPEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, 4890 IIC_VUNAQ, "vrecpe", "u32", 4891 v4i32, v4i32, int_arm_neon_vrecpe>; 4892def VRECPEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, 4893 IIC_VUNAD, "vrecpe", "f32", 4894 v2f32, v2f32, int_arm_neon_vrecpe>; 4895def VRECPEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, 4896 IIC_VUNAQ, "vrecpe", "f32", 4897 v4f32, v4f32, int_arm_neon_vrecpe>; 4898 4899// VRECPS : Vector Reciprocal Step 4900def VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, N3RegFrm, 4901 IIC_VRECSD, "vrecps", "f32", 4902 v2f32, v2f32, int_arm_neon_vrecps, 1>; 4903def VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, N3RegFrm, 4904 IIC_VRECSQ, "vrecps", "f32", 4905 v4f32, v4f32, int_arm_neon_vrecps, 1>; 4906 4907// VRSQRTE : Vector Reciprocal Square Root Estimate 4908def VRSQRTEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, 4909 IIC_VUNAD, "vrsqrte", "u32", 4910 v2i32, v2i32, int_arm_neon_vrsqrte>; 4911def VRSQRTEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, 4912 IIC_VUNAQ, "vrsqrte", "u32", 4913 v4i32, v4i32, int_arm_neon_vrsqrte>; 4914def VRSQRTEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, 4915 IIC_VUNAD, "vrsqrte", "f32", 4916 v2f32, v2f32, int_arm_neon_vrsqrte>; 4917def VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, 4918 IIC_VUNAQ, "vrsqrte", "f32", 4919 v4f32, v4f32, int_arm_neon_vrsqrte>; 4920 4921// VRSQRTS : Vector Reciprocal Square Root Step 4922def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, N3RegFrm, 4923 IIC_VRECSD, "vrsqrts", "f32", 4924 v2f32, v2f32, int_arm_neon_vrsqrts, 1>; 4925def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, N3RegFrm, 4926 IIC_VRECSQ, "vrsqrts", "f32", 4927 v4f32, v4f32, int_arm_neon_vrsqrts, 1>; 4928 4929// Vector Shifts. 4930 4931// VSHL : Vector Shift 4932defm VSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 0, N3RegVShFrm, 4933 IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ, 4934 "vshl", "s", int_arm_neon_vshifts>; 4935defm VSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 0, N3RegVShFrm, 4936 IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ, 4937 "vshl", "u", int_arm_neon_vshiftu>; 4938 4939// VSHL : Vector Shift Left (Immediate) 4940defm VSHLi : N2VShL_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", NEONvshl>; 4941 4942// VSHR : Vector Shift Right (Immediate) 4943defm VSHRs : N2VShR_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s", "VSHRs", 4944 NEONvshrs>; 4945defm VSHRu : N2VShR_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u", "VSHRu", 4946 NEONvshru>; 4947 4948// VSHLL : Vector Shift Left Long 4949defm VSHLLs : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s", NEONvshlls>; 4950defm VSHLLu : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll", "u", NEONvshllu>; 4951 4952// VSHLL : Vector Shift Left Long (with maximum shift count) 4953class N2VLShMax<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, 4954 bit op6, bit op4, string OpcodeStr, string Dt, ValueType ResTy, 4955 ValueType OpTy, Operand ImmTy, SDNode OpNode> 4956 : N2VLSh<op24, op23, op11_8, op7, op6, op4, OpcodeStr, Dt, 4957 ResTy, OpTy, ImmTy, OpNode> { 4958 let Inst{21-16} = op21_16; 4959 let DecoderMethod = "DecodeVSHLMaxInstruction"; 4960} 4961def VSHLLi8 : N2VLShMax<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll", "i8", 4962 v8i16, v8i8, imm8, NEONvshlli>; 4963def VSHLLi16 : N2VLShMax<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll", "i16", 4964 v4i32, v4i16, imm16, NEONvshlli>; 4965def VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll", "i32", 4966 v2i64, v2i32, imm32, NEONvshlli>; 4967 4968// VSHRN : Vector Shift Right and Narrow 4969defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i", 4970 NEONvshrn>; 4971 4972// VRSHL : Vector Rounding Shift 4973defm VRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 0, N3RegVShFrm, 4974 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 4975 "vrshl", "s", int_arm_neon_vrshifts>; 4976defm VRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 0, N3RegVShFrm, 4977 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 4978 "vrshl", "u", int_arm_neon_vrshiftu>; 4979// VRSHR : Vector Rounding Shift Right 4980defm VRSHRs : N2VShR_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s", "VRSHRs", 4981 NEONvrshrs>; 4982defm VRSHRu : N2VShR_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u", "VRSHRu", 4983 NEONvrshru>; 4984 4985// VRSHRN : Vector Rounding Shift Right and Narrow 4986defm VRSHRN : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i", 4987 NEONvrshrn>; 4988 4989// VQSHL : Vector Saturating Shift 4990defm VQSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 1, N3RegVShFrm, 4991 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 4992 "vqshl", "s", int_arm_neon_vqshifts>; 4993defm VQSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 1, N3RegVShFrm, 4994 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 4995 "vqshl", "u", int_arm_neon_vqshiftu>; 4996// VQSHL : Vector Saturating Shift Left (Immediate) 4997defm VQSHLsi : N2VShL_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s",NEONvqshls>; 4998defm VQSHLui : N2VShL_QHSD<1,1,0b0111,1, IIC_VSHLi4D, "vqshl", "u",NEONvqshlu>; 4999 5000// VQSHLU : Vector Saturating Shift Left (Immediate, Unsigned) 5001defm VQSHLsu : N2VShL_QHSD<1,1,0b0110,1, IIC_VSHLi4D,"vqshlu","s",NEONvqshlsu>; 5002 5003// VQSHRN : Vector Saturating Shift Right and Narrow 5004defm VQSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "s", 5005 NEONvqshrns>; 5006defm VQSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "u", 5007 NEONvqshrnu>; 5008 5009// VQSHRUN : Vector Saturating Shift Right and Narrow (Unsigned) 5010defm VQSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 0, 1, IIC_VSHLi4D, "vqshrun", "s", 5011 NEONvqshrnsu>; 5012 5013// VQRSHL : Vector Saturating Rounding Shift 5014defm VQRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 1, N3RegVShFrm, 5015 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 5016 "vqrshl", "s", int_arm_neon_vqrshifts>; 5017defm VQRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 1, N3RegVShFrm, 5018 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 5019 "vqrshl", "u", int_arm_neon_vqrshiftu>; 5020 5021// VQRSHRN : Vector Saturating Rounding Shift Right and Narrow 5022defm VQRSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "s", 5023 NEONvqrshrns>; 5024defm VQRSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "u", 5025 NEONvqrshrnu>; 5026 5027// VQRSHRUN : Vector Saturating Rounding Shift Right and Narrow (Unsigned) 5028defm VQRSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vqrshrun", "s", 5029 NEONvqrshrnsu>; 5030 5031// VSRA : Vector Shift Right and Accumulate 5032defm VSRAs : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra", "s", NEONvshrs>; 5033defm VSRAu : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra", "u", NEONvshru>; 5034// VRSRA : Vector Rounding Shift Right and Accumulate 5035defm VRSRAs : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra", "s", NEONvrshrs>; 5036defm VRSRAu : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra", "u", NEONvrshru>; 5037 5038// VSLI : Vector Shift Left and Insert 5039defm VSLI : N2VShInsL_QHSD<1, 1, 0b0101, 1, "vsli">; 5040 5041// VSRI : Vector Shift Right and Insert 5042defm VSRI : N2VShInsR_QHSD<1, 1, 0b0100, 1, "vsri">; 5043 5044// Vector Absolute and Saturating Absolute. 5045 5046// VABS : Vector Absolute Value 5047defm VABS : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0, 5048 IIC_VUNAiD, IIC_VUNAiQ, "vabs", "s", 5049 int_arm_neon_vabs>; 5050def VABSfd : N2VD<0b11, 0b11, 0b10, 0b01, 0b01110, 0, 5051 "vabs", "f32", 5052 v2f32, v2f32, fabs>; 5053def VABSfq : N2VQ<0b11, 0b11, 0b10, 0b01, 0b01110, 0, 5054 "vabs", "f32", 5055 v4f32, v4f32, fabs>; 5056 5057def : Pat<(xor (v2i32 (bitconvert (v8i8 (NEONvshrs DPR:$src, (i32 7))))), 5058 (v2i32 (bitconvert (v8i8 (add DPR:$src, 5059 (NEONvshrs DPR:$src, (i32 7))))))), 5060 (VABSv8i8 DPR:$src)>; 5061def : Pat<(xor (v2i32 (bitconvert (v4i16 (NEONvshrs DPR:$src, (i32 15))))), 5062 (v2i32 (bitconvert (v4i16 (add DPR:$src, 5063 (NEONvshrs DPR:$src, (i32 15))))))), 5064 (VABSv4i16 DPR:$src)>; 5065def : Pat<(xor (v2i32 (NEONvshrs DPR:$src, (i32 31))), 5066 (v2i32 (add DPR:$src, (NEONvshrs DPR:$src, (i32 31))))), 5067 (VABSv2i32 DPR:$src)>; 5068def : Pat<(xor (v4i32 (bitconvert (v16i8 (NEONvshrs QPR:$src, (i32 7))))), 5069 (v4i32 (bitconvert (v16i8 (add QPR:$src, 5070 (NEONvshrs QPR:$src, (i32 7))))))), 5071 (VABSv16i8 QPR:$src)>; 5072def : Pat<(xor (v4i32 (bitconvert (v8i16 (NEONvshrs QPR:$src, (i32 15))))), 5073 (v4i32 (bitconvert (v8i16 (add QPR:$src, 5074 (NEONvshrs QPR:$src, (i32 15))))))), 5075 (VABSv8i16 QPR:$src)>; 5076def : Pat<(xor (v4i32 (NEONvshrs QPR:$src, (i32 31))), 5077 (v4i32 (add QPR:$src, (NEONvshrs QPR:$src, (i32 31))))), 5078 (VABSv4i32 QPR:$src)>; 5079 5080def : Pat<(v2f32 (int_arm_neon_vabs (v2f32 DPR:$src))), (VABSfd DPR:$src)>; 5081def : Pat<(v4f32 (int_arm_neon_vabs (v4f32 QPR:$src))), (VABSfq QPR:$src)>; 5082 5083// VQABS : Vector Saturating Absolute Value 5084defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0, 5085 IIC_VQUNAiD, IIC_VQUNAiQ, "vqabs", "s", 5086 int_arm_neon_vqabs>; 5087 5088// Vector Negate. 5089 5090def vnegd : PatFrag<(ops node:$in), 5091 (sub (bitconvert (v2i32 NEONimmAllZerosV)), node:$in)>; 5092def vnegq : PatFrag<(ops node:$in), 5093 (sub (bitconvert (v4i32 NEONimmAllZerosV)), node:$in)>; 5094 5095class VNEGD<bits<2> size, string OpcodeStr, string Dt, ValueType Ty> 5096 : N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$Vd), (ins DPR:$Vm), 5097 IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "", 5098 [(set DPR:$Vd, (Ty (vnegd DPR:$Vm)))]>; 5099class VNEGQ<bits<2> size, string OpcodeStr, string Dt, ValueType Ty> 5100 : N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$Vd), (ins QPR:$Vm), 5101 IIC_VSHLiQ, OpcodeStr, Dt, "$Vd, $Vm", "", 5102 [(set QPR:$Vd, (Ty (vnegq QPR:$Vm)))]>; 5103 5104// VNEG : Vector Negate (integer) 5105def VNEGs8d : VNEGD<0b00, "vneg", "s8", v8i8>; 5106def VNEGs16d : VNEGD<0b01, "vneg", "s16", v4i16>; 5107def VNEGs32d : VNEGD<0b10, "vneg", "s32", v2i32>; 5108def VNEGs8q : VNEGQ<0b00, "vneg", "s8", v16i8>; 5109def VNEGs16q : VNEGQ<0b01, "vneg", "s16", v8i16>; 5110def VNEGs32q : VNEGQ<0b10, "vneg", "s32", v4i32>; 5111 5112// VNEG : Vector Negate (floating-point) 5113def VNEGfd : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0, 5114 (outs DPR:$Vd), (ins DPR:$Vm), IIC_VUNAD, 5115 "vneg", "f32", "$Vd, $Vm", "", 5116 [(set DPR:$Vd, (v2f32 (fneg DPR:$Vm)))]>; 5117def VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0, 5118 (outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ, 5119 "vneg", "f32", "$Vd, $Vm", "", 5120 [(set QPR:$Vd, (v4f32 (fneg QPR:$Vm)))]>; 5121 5122def : Pat<(v8i8 (vnegd DPR:$src)), (VNEGs8d DPR:$src)>; 5123def : Pat<(v4i16 (vnegd DPR:$src)), (VNEGs16d DPR:$src)>; 5124def : Pat<(v2i32 (vnegd DPR:$src)), (VNEGs32d DPR:$src)>; 5125def : Pat<(v16i8 (vnegq QPR:$src)), (VNEGs8q QPR:$src)>; 5126def : Pat<(v8i16 (vnegq QPR:$src)), (VNEGs16q QPR:$src)>; 5127def : Pat<(v4i32 (vnegq QPR:$src)), (VNEGs32q QPR:$src)>; 5128 5129// VQNEG : Vector Saturating Negate 5130defm VQNEG : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0, 5131 IIC_VQUNAiD, IIC_VQUNAiQ, "vqneg", "s", 5132 int_arm_neon_vqneg>; 5133 5134// Vector Bit Counting Operations. 5135 5136// VCLS : Vector Count Leading Sign Bits 5137defm VCLS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01000, 0, 5138 IIC_VCNTiD, IIC_VCNTiQ, "vcls", "s", 5139 int_arm_neon_vcls>; 5140// VCLZ : Vector Count Leading Zeros 5141defm VCLZ : N2VInt_QHS<0b11, 0b11, 0b00, 0b01001, 0, 5142 IIC_VCNTiD, IIC_VCNTiQ, "vclz", "i", 5143 ctlz>; 5144// VCNT : Vector Count One Bits 5145def VCNTd : N2VDInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, 5146 IIC_VCNTiD, "vcnt", "8", 5147 v8i8, v8i8, ctpop>; 5148def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, 5149 IIC_VCNTiQ, "vcnt", "8", 5150 v16i8, v16i8, ctpop>; 5151 5152// Vector Swap 5153def VSWPd : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 0, 0, 5154 (outs DPR:$Vd, DPR:$Vm), (ins DPR:$in1, DPR:$in2), 5155 NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm", 5156 []>; 5157def VSWPq : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0, 5158 (outs QPR:$Vd, QPR:$Vm), (ins QPR:$in1, QPR:$in2), 5159 NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm", 5160 []>; 5161 5162// Vector Move Operations. 5163 5164// VMOV : Vector Move (Register) 5165def : NEONInstAlias<"vmov${p} $Vd, $Vm", 5166 (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>; 5167def : NEONInstAlias<"vmov${p} $Vd, $Vm", 5168 (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>; 5169 5170// VMOV : Vector Move (Immediate) 5171 5172let isReMaterializable = 1 in { 5173def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$Vd), 5174 (ins nImmSplatI8:$SIMM), IIC_VMOVImm, 5175 "vmov", "i8", "$Vd, $SIMM", "", 5176 [(set DPR:$Vd, (v8i8 (NEONvmovImm timm:$SIMM)))]>; 5177def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$Vd), 5178 (ins nImmSplatI8:$SIMM), IIC_VMOVImm, 5179 "vmov", "i8", "$Vd, $SIMM", "", 5180 [(set QPR:$Vd, (v16i8 (NEONvmovImm timm:$SIMM)))]>; 5181 5182def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 0, 1, (outs DPR:$Vd), 5183 (ins nImmSplatI16:$SIMM), IIC_VMOVImm, 5184 "vmov", "i16", "$Vd, $SIMM", "", 5185 [(set DPR:$Vd, (v4i16 (NEONvmovImm timm:$SIMM)))]> { 5186 let Inst{9} = SIMM{9}; 5187} 5188 5189def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 0, 1, (outs QPR:$Vd), 5190 (ins nImmSplatI16:$SIMM), IIC_VMOVImm, 5191 "vmov", "i16", "$Vd, $SIMM", "", 5192 [(set QPR:$Vd, (v8i16 (NEONvmovImm timm:$SIMM)))]> { 5193 let Inst{9} = SIMM{9}; 5194} 5195 5196def VMOVv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 0, 1, (outs DPR:$Vd), 5197 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, 5198 "vmov", "i32", "$Vd, $SIMM", "", 5199 [(set DPR:$Vd, (v2i32 (NEONvmovImm timm:$SIMM)))]> { 5200 let Inst{11-8} = SIMM{11-8}; 5201} 5202 5203def VMOVv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 0, 1, (outs QPR:$Vd), 5204 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, 5205 "vmov", "i32", "$Vd, $SIMM", "", 5206 [(set QPR:$Vd, (v4i32 (NEONvmovImm timm:$SIMM)))]> { 5207 let Inst{11-8} = SIMM{11-8}; 5208} 5209 5210def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$Vd), 5211 (ins nImmSplatI64:$SIMM), IIC_VMOVImm, 5212 "vmov", "i64", "$Vd, $SIMM", "", 5213 [(set DPR:$Vd, (v1i64 (NEONvmovImm timm:$SIMM)))]>; 5214def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$Vd), 5215 (ins nImmSplatI64:$SIMM), IIC_VMOVImm, 5216 "vmov", "i64", "$Vd, $SIMM", "", 5217 [(set QPR:$Vd, (v2i64 (NEONvmovImm timm:$SIMM)))]>; 5218 5219def VMOVv2f32 : N1ModImm<1, 0b000, 0b1111, 0, 0, 0, 1, (outs DPR:$Vd), 5220 (ins nImmVMOVF32:$SIMM), IIC_VMOVImm, 5221 "vmov", "f32", "$Vd, $SIMM", "", 5222 [(set DPR:$Vd, (v2f32 (NEONvmovFPImm timm:$SIMM)))]>; 5223def VMOVv4f32 : N1ModImm<1, 0b000, 0b1111, 0, 1, 0, 1, (outs QPR:$Vd), 5224 (ins nImmVMOVF32:$SIMM), IIC_VMOVImm, 5225 "vmov", "f32", "$Vd, $SIMM", "", 5226 [(set QPR:$Vd, (v4f32 (NEONvmovFPImm timm:$SIMM)))]>; 5227} // isReMaterializable 5228 5229// VMOV : Vector Get Lane (move scalar to ARM core register) 5230 5231def VGETLNs8 : NVGetLane<{1,1,1,0,0,1,?,1}, 0b1011, {?,?}, 5232 (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane), 5233 IIC_VMOVSI, "vmov", "s8", "$R, $V$lane", 5234 [(set GPR:$R, (NEONvgetlanes (v8i8 DPR:$V), 5235 imm:$lane))]> { 5236 let Inst{21} = lane{2}; 5237 let Inst{6-5} = lane{1-0}; 5238} 5239def VGETLNs16 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, {?,1}, 5240 (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane), 5241 IIC_VMOVSI, "vmov", "s16", "$R, $V$lane", 5242 [(set GPR:$R, (NEONvgetlanes (v4i16 DPR:$V), 5243 imm:$lane))]> { 5244 let Inst{21} = lane{1}; 5245 let Inst{6} = lane{0}; 5246} 5247def VGETLNu8 : NVGetLane<{1,1,1,0,1,1,?,1}, 0b1011, {?,?}, 5248 (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane), 5249 IIC_VMOVSI, "vmov", "u8", "$R, $V$lane", 5250 [(set GPR:$R, (NEONvgetlaneu (v8i8 DPR:$V), 5251 imm:$lane))]> { 5252 let Inst{21} = lane{2}; 5253 let Inst{6-5} = lane{1-0}; 5254} 5255def VGETLNu16 : NVGetLane<{1,1,1,0,1,0,?,1}, 0b1011, {?,1}, 5256 (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane), 5257 IIC_VMOVSI, "vmov", "u16", "$R, $V$lane", 5258 [(set GPR:$R, (NEONvgetlaneu (v4i16 DPR:$V), 5259 imm:$lane))]> { 5260 let Inst{21} = lane{1}; 5261 let Inst{6} = lane{0}; 5262} 5263def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00, 5264 (outs GPR:$R), (ins DPR:$V, VectorIndex32:$lane), 5265 IIC_VMOVSI, "vmov", "32", "$R, $V$lane", 5266 [(set GPR:$R, (extractelt (v2i32 DPR:$V), 5267 imm:$lane))]>, 5268 Requires<[HasNEON, HasFastVGETLNi32]> { 5269 let Inst{21} = lane{0}; 5270} 5271// def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td 5272def : Pat<(NEONvgetlanes (v16i8 QPR:$src), imm:$lane), 5273 (VGETLNs8 (v8i8 (EXTRACT_SUBREG QPR:$src, 5274 (DSubReg_i8_reg imm:$lane))), 5275 (SubReg_i8_lane imm:$lane))>; 5276def : Pat<(NEONvgetlanes (v8i16 QPR:$src), imm:$lane), 5277 (VGETLNs16 (v4i16 (EXTRACT_SUBREG QPR:$src, 5278 (DSubReg_i16_reg imm:$lane))), 5279 (SubReg_i16_lane imm:$lane))>; 5280def : Pat<(NEONvgetlaneu (v16i8 QPR:$src), imm:$lane), 5281 (VGETLNu8 (v8i8 (EXTRACT_SUBREG QPR:$src, 5282 (DSubReg_i8_reg imm:$lane))), 5283 (SubReg_i8_lane imm:$lane))>; 5284def : Pat<(NEONvgetlaneu (v8i16 QPR:$src), imm:$lane), 5285 (VGETLNu16 (v4i16 (EXTRACT_SUBREG QPR:$src, 5286 (DSubReg_i16_reg imm:$lane))), 5287 (SubReg_i16_lane imm:$lane))>; 5288def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane), 5289 (VGETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src, 5290 (DSubReg_i32_reg imm:$lane))), 5291 (SubReg_i32_lane imm:$lane))>, 5292 Requires<[HasNEON, HasFastVGETLNi32]>; 5293def : Pat<(extractelt (v2i32 DPR:$src), imm:$lane), 5294 (COPY_TO_REGCLASS 5295 (i32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>, 5296 Requires<[HasNEON, HasSlowVGETLNi32]>; 5297def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane), 5298 (COPY_TO_REGCLASS 5299 (i32 (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>, 5300 Requires<[HasNEON, HasSlowVGETLNi32]>; 5301def : Pat<(extractelt (v2f32 DPR:$src1), imm:$src2), 5302 (EXTRACT_SUBREG (v2f32 (COPY_TO_REGCLASS (v2f32 DPR:$src1),DPR_VFP2)), 5303 (SSubReg_f32_reg imm:$src2))>; 5304def : Pat<(extractelt (v4f32 QPR:$src1), imm:$src2), 5305 (EXTRACT_SUBREG (v4f32 (COPY_TO_REGCLASS (v4f32 QPR:$src1),QPR_VFP2)), 5306 (SSubReg_f32_reg imm:$src2))>; 5307//def : Pat<(extractelt (v2i64 QPR:$src1), imm:$src2), 5308// (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>; 5309def : Pat<(extractelt (v2f64 QPR:$src1), imm:$src2), 5310 (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>; 5311 5312 5313// VMOV : Vector Set Lane (move ARM core register to scalar) 5314 5315let Constraints = "$src1 = $V" in { 5316def VSETLNi8 : NVSetLane<{1,1,1,0,0,1,?,0}, 0b1011, {?,?}, (outs DPR:$V), 5317 (ins DPR:$src1, GPR:$R, VectorIndex8:$lane), 5318 IIC_VMOVISL, "vmov", "8", "$V$lane, $R", 5319 [(set DPR:$V, (vector_insert (v8i8 DPR:$src1), 5320 GPR:$R, imm:$lane))]> { 5321 let Inst{21} = lane{2}; 5322 let Inst{6-5} = lane{1-0}; 5323} 5324def VSETLNi16 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, {?,1}, (outs DPR:$V), 5325 (ins DPR:$src1, GPR:$R, VectorIndex16:$lane), 5326 IIC_VMOVISL, "vmov", "16", "$V$lane, $R", 5327 [(set DPR:$V, (vector_insert (v4i16 DPR:$src1), 5328 GPR:$R, imm:$lane))]> { 5329 let Inst{21} = lane{1}; 5330 let Inst{6} = lane{0}; 5331} 5332def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$V), 5333 (ins DPR:$src1, GPR:$R, VectorIndex32:$lane), 5334 IIC_VMOVISL, "vmov", "32", "$V$lane, $R", 5335 [(set DPR:$V, (insertelt (v2i32 DPR:$src1), 5336 GPR:$R, imm:$lane))]> { 5337 let Inst{21} = lane{0}; 5338} 5339} 5340def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane), 5341 (v16i8 (INSERT_SUBREG QPR:$src1, 5342 (v8i8 (VSETLNi8 (v8i8 (EXTRACT_SUBREG QPR:$src1, 5343 (DSubReg_i8_reg imm:$lane))), 5344 GPR:$src2, (SubReg_i8_lane imm:$lane))), 5345 (DSubReg_i8_reg imm:$lane)))>; 5346def : Pat<(vector_insert (v8i16 QPR:$src1), GPR:$src2, imm:$lane), 5347 (v8i16 (INSERT_SUBREG QPR:$src1, 5348 (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1, 5349 (DSubReg_i16_reg imm:$lane))), 5350 GPR:$src2, (SubReg_i16_lane imm:$lane))), 5351 (DSubReg_i16_reg imm:$lane)))>; 5352def : Pat<(insertelt (v4i32 QPR:$src1), GPR:$src2, imm:$lane), 5353 (v4i32 (INSERT_SUBREG QPR:$src1, 5354 (v2i32 (VSETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src1, 5355 (DSubReg_i32_reg imm:$lane))), 5356 GPR:$src2, (SubReg_i32_lane imm:$lane))), 5357 (DSubReg_i32_reg imm:$lane)))>; 5358 5359def : Pat<(v2f32 (insertelt DPR:$src1, SPR:$src2, imm:$src3)), 5360 (INSERT_SUBREG (v2f32 (COPY_TO_REGCLASS DPR:$src1, DPR_VFP2)), 5361 SPR:$src2, (SSubReg_f32_reg imm:$src3))>; 5362def : Pat<(v4f32 (insertelt QPR:$src1, SPR:$src2, imm:$src3)), 5363 (INSERT_SUBREG (v4f32 (COPY_TO_REGCLASS QPR:$src1, QPR_VFP2)), 5364 SPR:$src2, (SSubReg_f32_reg imm:$src3))>; 5365 5366//def : Pat<(v2i64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)), 5367// (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>; 5368def : Pat<(v2f64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)), 5369 (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>; 5370 5371def : Pat<(v2f32 (scalar_to_vector SPR:$src)), 5372 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>; 5373def : Pat<(v2f64 (scalar_to_vector (f64 DPR:$src))), 5374 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; 5375def : Pat<(v4f32 (scalar_to_vector SPR:$src)), 5376 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>; 5377 5378def : Pat<(v8i8 (scalar_to_vector GPR:$src)), 5379 (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 5380def : Pat<(v4i16 (scalar_to_vector GPR:$src)), 5381 (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 5382def : Pat<(v2i32 (scalar_to_vector GPR:$src)), 5383 (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 5384 5385def : Pat<(v16i8 (scalar_to_vector GPR:$src)), 5386 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 5387 (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 5388 dsub_0)>; 5389def : Pat<(v8i16 (scalar_to_vector GPR:$src)), 5390 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), 5391 (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 5392 dsub_0)>; 5393def : Pat<(v4i32 (scalar_to_vector GPR:$src)), 5394 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), 5395 (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 5396 dsub_0)>; 5397 5398// VDUP : Vector Duplicate (from ARM core register to all elements) 5399 5400class VDUPD<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty> 5401 : NVDup<opcod1, 0b1011, opcod3, (outs DPR:$V), (ins GPR:$R), 5402 IIC_VMOVIS, "vdup", Dt, "$V, $R", 5403 [(set DPR:$V, (Ty (NEONvdup (i32 GPR:$R))))]>; 5404class VDUPQ<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty> 5405 : NVDup<opcod1, 0b1011, opcod3, (outs QPR:$V), (ins GPR:$R), 5406 IIC_VMOVIS, "vdup", Dt, "$V, $R", 5407 [(set QPR:$V, (Ty (NEONvdup (i32 GPR:$R))))]>; 5408 5409def VDUP8d : VDUPD<0b11101100, 0b00, "8", v8i8>; 5410def VDUP16d : VDUPD<0b11101000, 0b01, "16", v4i16>; 5411def VDUP32d : VDUPD<0b11101000, 0b00, "32", v2i32>, 5412 Requires<[HasNEON, HasFastVDUP32]>; 5413def VDUP8q : VDUPQ<0b11101110, 0b00, "8", v16i8>; 5414def VDUP16q : VDUPQ<0b11101010, 0b01, "16", v8i16>; 5415def VDUP32q : VDUPQ<0b11101010, 0b00, "32", v4i32>; 5416 5417// NEONvdup patterns for uarchs with fast VDUP.32. 5418def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32d GPR:$R)>, 5419 Requires<[HasNEON,HasFastVDUP32]>; 5420def : Pat<(v4f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32q GPR:$R)>; 5421 5422// NEONvdup patterns for uarchs with slow VDUP.32 - use VMOVDRR instead. 5423def : Pat<(v2i32 (NEONvdup (i32 GPR:$R))), (VMOVDRR GPR:$R, GPR:$R)>, 5424 Requires<[HasNEON,HasSlowVDUP32]>; 5425def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VMOVDRR GPR:$R, GPR:$R)>, 5426 Requires<[HasNEON,HasSlowVDUP32]>; 5427 5428// VDUP : Vector Duplicate Lane (from scalar to all elements) 5429 5430class VDUPLND<bits<4> op19_16, string OpcodeStr, string Dt, 5431 ValueType Ty, Operand IdxTy> 5432 : NVDupLane<op19_16, 0, (outs DPR:$Vd), (ins DPR:$Vm, IdxTy:$lane), 5433 IIC_VMOVD, OpcodeStr, Dt, "$Vd, $Vm$lane", 5434 [(set DPR:$Vd, (Ty (NEONvduplane (Ty DPR:$Vm), imm:$lane)))]>; 5435 5436class VDUPLNQ<bits<4> op19_16, string OpcodeStr, string Dt, 5437 ValueType ResTy, ValueType OpTy, Operand IdxTy> 5438 : NVDupLane<op19_16, 1, (outs QPR:$Vd), (ins DPR:$Vm, IdxTy:$lane), 5439 IIC_VMOVQ, OpcodeStr, Dt, "$Vd, $Vm$lane", 5440 [(set QPR:$Vd, (ResTy (NEONvduplane (OpTy DPR:$Vm), 5441 VectorIndex32:$lane)))]>; 5442 5443// Inst{19-16} is partially specified depending on the element size. 5444 5445def VDUPLN8d : VDUPLND<{?,?,?,1}, "vdup", "8", v8i8, VectorIndex8> { 5446 bits<3> lane; 5447 let Inst{19-17} = lane{2-0}; 5448} 5449def VDUPLN16d : VDUPLND<{?,?,1,0}, "vdup", "16", v4i16, VectorIndex16> { 5450 bits<2> lane; 5451 let Inst{19-18} = lane{1-0}; 5452} 5453def VDUPLN32d : VDUPLND<{?,1,0,0}, "vdup", "32", v2i32, VectorIndex32> { 5454 bits<1> lane; 5455 let Inst{19} = lane{0}; 5456} 5457def VDUPLN8q : VDUPLNQ<{?,?,?,1}, "vdup", "8", v16i8, v8i8, VectorIndex8> { 5458 bits<3> lane; 5459 let Inst{19-17} = lane{2-0}; 5460} 5461def VDUPLN16q : VDUPLNQ<{?,?,1,0}, "vdup", "16", v8i16, v4i16, VectorIndex16> { 5462 bits<2> lane; 5463 let Inst{19-18} = lane{1-0}; 5464} 5465def VDUPLN32q : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4i32, v2i32, VectorIndex32> { 5466 bits<1> lane; 5467 let Inst{19} = lane{0}; 5468} 5469 5470def : Pat<(v2f32 (NEONvduplane (v2f32 DPR:$Vm), imm:$lane)), 5471 (VDUPLN32d DPR:$Vm, imm:$lane)>; 5472 5473def : Pat<(v4f32 (NEONvduplane (v2f32 DPR:$Vm), imm:$lane)), 5474 (VDUPLN32q DPR:$Vm, imm:$lane)>; 5475 5476def : Pat<(v16i8 (NEONvduplane (v16i8 QPR:$src), imm:$lane)), 5477 (v16i8 (VDUPLN8q (v8i8 (EXTRACT_SUBREG QPR:$src, 5478 (DSubReg_i8_reg imm:$lane))), 5479 (SubReg_i8_lane imm:$lane)))>; 5480def : Pat<(v8i16 (NEONvduplane (v8i16 QPR:$src), imm:$lane)), 5481 (v8i16 (VDUPLN16q (v4i16 (EXTRACT_SUBREG QPR:$src, 5482 (DSubReg_i16_reg imm:$lane))), 5483 (SubReg_i16_lane imm:$lane)))>; 5484def : Pat<(v4i32 (NEONvduplane (v4i32 QPR:$src), imm:$lane)), 5485 (v4i32 (VDUPLN32q (v2i32 (EXTRACT_SUBREG QPR:$src, 5486 (DSubReg_i32_reg imm:$lane))), 5487 (SubReg_i32_lane imm:$lane)))>; 5488def : Pat<(v4f32 (NEONvduplane (v4f32 QPR:$src), imm:$lane)), 5489 (v4f32 (VDUPLN32q (v2f32 (EXTRACT_SUBREG QPR:$src, 5490 (DSubReg_i32_reg imm:$lane))), 5491 (SubReg_i32_lane imm:$lane)))>; 5492 5493def VDUPfdf : PseudoNeonI<(outs DPR:$dst), (ins SPR:$src), IIC_VMOVD, "", 5494 [(set DPR:$dst, (v2f32 (NEONvdup (f32 SPR:$src))))]>; 5495def VDUPfqf : PseudoNeonI<(outs QPR:$dst), (ins SPR:$src), IIC_VMOVD, "", 5496 [(set QPR:$dst, (v4f32 (NEONvdup (f32 SPR:$src))))]>; 5497 5498// VMOVN : Vector Narrowing Move 5499defm VMOVN : N2VN_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVN, 5500 "vmovn", "i", trunc>; 5501// VQMOVN : Vector Saturating Narrowing Move 5502defm VQMOVNs : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, IIC_VQUNAiD, 5503 "vqmovn", "s", int_arm_neon_vqmovns>; 5504defm VQMOVNu : N2VNInt_HSD<0b11,0b11,0b10,0b00101,1,0, IIC_VQUNAiD, 5505 "vqmovn", "u", int_arm_neon_vqmovnu>; 5506defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD, 5507 "vqmovun", "s", int_arm_neon_vqmovnsu>; 5508// VMOVL : Vector Lengthening Move 5509defm VMOVLs : N2VL_QHS<0b01,0b10100,0,1, "vmovl", "s", sext>; 5510defm VMOVLu : N2VL_QHS<0b11,0b10100,0,1, "vmovl", "u", zext>; 5511def : Pat<(v8i16 (anyext (v8i8 DPR:$Vm))), (VMOVLuv8i16 DPR:$Vm)>; 5512def : Pat<(v4i32 (anyext (v4i16 DPR:$Vm))), (VMOVLuv4i32 DPR:$Vm)>; 5513def : Pat<(v2i64 (anyext (v2i32 DPR:$Vm))), (VMOVLuv2i64 DPR:$Vm)>; 5514 5515// Vector Conversions. 5516 5517// VCVT : Vector Convert Between Floating-Point and Integers 5518def VCVTf2sd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32", 5519 v2i32, v2f32, fp_to_sint>; 5520def VCVTf2ud : N2VD<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32", 5521 v2i32, v2f32, fp_to_uint>; 5522def VCVTs2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32", 5523 v2f32, v2i32, sint_to_fp>; 5524def VCVTu2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", 5525 v2f32, v2i32, uint_to_fp>; 5526 5527def VCVTf2sq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32", 5528 v4i32, v4f32, fp_to_sint>; 5529def VCVTf2uq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32", 5530 v4i32, v4f32, fp_to_uint>; 5531def VCVTs2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32", 5532 v4f32, v4i32, sint_to_fp>; 5533def VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", 5534 v4f32, v4i32, uint_to_fp>; 5535 5536// VCVT{A, N, P, M} 5537multiclass VCVT_FPI<string op, bits<3> op10_8, SDPatternOperator IntS, 5538 SDPatternOperator IntU> { 5539 let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { 5540 def SD : N2VDIntnp<0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op), 5541 "s32.f32", v2i32, v2f32, IntS>, Requires<[HasV8, HasNEON]>; 5542 def SQ : N2VQIntnp<0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op), 5543 "s32.f32", v4i32, v4f32, IntS>, Requires<[HasV8, HasNEON]>; 5544 def UD : N2VDIntnp<0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op), 5545 "u32.f32", v2i32, v2f32, IntU>, Requires<[HasV8, HasNEON]>; 5546 def UQ : N2VQIntnp<0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op), 5547 "u32.f32", v4i32, v4f32, IntU>, Requires<[HasV8, HasNEON]>; 5548 } 5549} 5550 5551defm VCVTAN : VCVT_FPI<"a", 0b000, int_arm_neon_vcvtas, int_arm_neon_vcvtau>; 5552defm VCVTNN : VCVT_FPI<"n", 0b001, int_arm_neon_vcvtns, int_arm_neon_vcvtnu>; 5553defm VCVTPN : VCVT_FPI<"p", 0b010, int_arm_neon_vcvtps, int_arm_neon_vcvtpu>; 5554defm VCVTMN : VCVT_FPI<"m", 0b011, int_arm_neon_vcvtms, int_arm_neon_vcvtmu>; 5555 5556// VCVT : Vector Convert Between Floating-Point and Fixed-Point. 5557let DecoderMethod = "DecodeVCVTD" in { 5558def VCVTf2xsd : N2VCvtD<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32", 5559 v2i32, v2f32, int_arm_neon_vcvtfp2fxs>; 5560def VCVTf2xud : N2VCvtD<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32", 5561 v2i32, v2f32, int_arm_neon_vcvtfp2fxu>; 5562def VCVTxs2fd : N2VCvtD<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32", 5563 v2f32, v2i32, int_arm_neon_vcvtfxs2fp>; 5564def VCVTxu2fd : N2VCvtD<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32", 5565 v2f32, v2i32, int_arm_neon_vcvtfxu2fp>; 5566} 5567 5568let DecoderMethod = "DecodeVCVTQ" in { 5569def VCVTf2xsq : N2VCvtQ<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32", 5570 v4i32, v4f32, int_arm_neon_vcvtfp2fxs>; 5571def VCVTf2xuq : N2VCvtQ<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32", 5572 v4i32, v4f32, int_arm_neon_vcvtfp2fxu>; 5573def VCVTxs2fq : N2VCvtQ<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32", 5574 v4f32, v4i32, int_arm_neon_vcvtfxs2fp>; 5575def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32", 5576 v4f32, v4i32, int_arm_neon_vcvtfxu2fp>; 5577} 5578 5579def : NEONInstAlias<"vcvt${p}.s32.f32 $Dd, $Dm, #0", 5580 (VCVTf2sd DPR:$Dd, DPR:$Dm, pred:$p)>; 5581def : NEONInstAlias<"vcvt${p}.u32.f32 $Dd, $Dm, #0", 5582 (VCVTf2ud DPR:$Dd, DPR:$Dm, pred:$p)>; 5583def : NEONInstAlias<"vcvt${p}.f32.s32 $Dd, $Dm, #0", 5584 (VCVTs2fd DPR:$Dd, DPR:$Dm, pred:$p)>; 5585def : NEONInstAlias<"vcvt${p}.f32.u32 $Dd, $Dm, #0", 5586 (VCVTu2fd DPR:$Dd, DPR:$Dm, pred:$p)>; 5587 5588def : NEONInstAlias<"vcvt${p}.s32.f32 $Qd, $Qm, #0", 5589 (VCVTf2sq QPR:$Qd, QPR:$Qm, pred:$p)>; 5590def : NEONInstAlias<"vcvt${p}.u32.f32 $Qd, $Qm, #0", 5591 (VCVTf2uq QPR:$Qd, QPR:$Qm, pred:$p)>; 5592def : NEONInstAlias<"vcvt${p}.f32.s32 $Qd, $Qm, #0", 5593 (VCVTs2fq QPR:$Qd, QPR:$Qm, pred:$p)>; 5594def : NEONInstAlias<"vcvt${p}.f32.u32 $Qd, $Qm, #0", 5595 (VCVTu2fq QPR:$Qd, QPR:$Qm, pred:$p)>; 5596 5597 5598// VCVT : Vector Convert Between Half-Precision and Single-Precision. 5599def VCVTf2h : N2VNInt<0b11, 0b11, 0b01, 0b10, 0b01100, 0, 0, 5600 IIC_VUNAQ, "vcvt", "f16.f32", 5601 v4i16, v4f32, int_arm_neon_vcvtfp2hf>, 5602 Requires<[HasNEON, HasFP16]>; 5603def VCVTh2f : N2VLInt<0b11, 0b11, 0b01, 0b10, 0b01110, 0, 0, 5604 IIC_VUNAQ, "vcvt", "f32.f16", 5605 v4f32, v4i16, int_arm_neon_vcvthf2fp>, 5606 Requires<[HasNEON, HasFP16]>; 5607 5608// Vector Reverse. 5609 5610// VREV64 : Vector Reverse elements within 64-bit doublewords 5611 5612class VREV64D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 5613 : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 0, 0, (outs DPR:$Vd), 5614 (ins DPR:$Vm), IIC_VMOVD, 5615 OpcodeStr, Dt, "$Vd, $Vm", "", 5616 [(set DPR:$Vd, (Ty (NEONvrev64 (Ty DPR:$Vm))))]>; 5617class VREV64Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 5618 : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 1, 0, (outs QPR:$Vd), 5619 (ins QPR:$Vm), IIC_VMOVQ, 5620 OpcodeStr, Dt, "$Vd, $Vm", "", 5621 [(set QPR:$Vd, (Ty (NEONvrev64 (Ty QPR:$Vm))))]>; 5622 5623def VREV64d8 : VREV64D<0b00, "vrev64", "8", v8i8>; 5624def VREV64d16 : VREV64D<0b01, "vrev64", "16", v4i16>; 5625def VREV64d32 : VREV64D<0b10, "vrev64", "32", v2i32>; 5626def : Pat<(v2f32 (NEONvrev64 (v2f32 DPR:$Vm))), (VREV64d32 DPR:$Vm)>; 5627 5628def VREV64q8 : VREV64Q<0b00, "vrev64", "8", v16i8>; 5629def VREV64q16 : VREV64Q<0b01, "vrev64", "16", v8i16>; 5630def VREV64q32 : VREV64Q<0b10, "vrev64", "32", v4i32>; 5631def : Pat<(v4f32 (NEONvrev64 (v4f32 QPR:$Vm))), (VREV64q32 QPR:$Vm)>; 5632 5633// VREV32 : Vector Reverse elements within 32-bit words 5634 5635class VREV32D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 5636 : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 0, 0, (outs DPR:$Vd), 5637 (ins DPR:$Vm), IIC_VMOVD, 5638 OpcodeStr, Dt, "$Vd, $Vm", "", 5639 [(set DPR:$Vd, (Ty (NEONvrev32 (Ty DPR:$Vm))))]>; 5640class VREV32Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 5641 : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 1, 0, (outs QPR:$Vd), 5642 (ins QPR:$Vm), IIC_VMOVQ, 5643 OpcodeStr, Dt, "$Vd, $Vm", "", 5644 [(set QPR:$Vd, (Ty (NEONvrev32 (Ty QPR:$Vm))))]>; 5645 5646def VREV32d8 : VREV32D<0b00, "vrev32", "8", v8i8>; 5647def VREV32d16 : VREV32D<0b01, "vrev32", "16", v4i16>; 5648 5649def VREV32q8 : VREV32Q<0b00, "vrev32", "8", v16i8>; 5650def VREV32q16 : VREV32Q<0b01, "vrev32", "16", v8i16>; 5651 5652// VREV16 : Vector Reverse elements within 16-bit halfwords 5653 5654class VREV16D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 5655 : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 0, 0, (outs DPR:$Vd), 5656 (ins DPR:$Vm), IIC_VMOVD, 5657 OpcodeStr, Dt, "$Vd, $Vm", "", 5658 [(set DPR:$Vd, (Ty (NEONvrev16 (Ty DPR:$Vm))))]>; 5659class VREV16Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 5660 : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 1, 0, (outs QPR:$Vd), 5661 (ins QPR:$Vm), IIC_VMOVQ, 5662 OpcodeStr, Dt, "$Vd, $Vm", "", 5663 [(set QPR:$Vd, (Ty (NEONvrev16 (Ty QPR:$Vm))))]>; 5664 5665def VREV16d8 : VREV16D<0b00, "vrev16", "8", v8i8>; 5666def VREV16q8 : VREV16Q<0b00, "vrev16", "8", v16i8>; 5667 5668// Other Vector Shuffles. 5669 5670// Aligned extractions: really just dropping registers 5671 5672class AlignedVEXTq<ValueType DestTy, ValueType SrcTy, SDNodeXForm LaneCVT> 5673 : Pat<(DestTy (vector_extract_subvec (SrcTy QPR:$src), (i32 imm:$start))), 5674 (EXTRACT_SUBREG (SrcTy QPR:$src), (LaneCVT imm:$start))>; 5675 5676def : AlignedVEXTq<v8i8, v16i8, DSubReg_i8_reg>; 5677 5678def : AlignedVEXTq<v4i16, v8i16, DSubReg_i16_reg>; 5679 5680def : AlignedVEXTq<v2i32, v4i32, DSubReg_i32_reg>; 5681 5682def : AlignedVEXTq<v1i64, v2i64, DSubReg_f64_reg>; 5683 5684def : AlignedVEXTq<v2f32, v4f32, DSubReg_i32_reg>; 5685 5686 5687// VEXT : Vector Extract 5688 5689 5690// All of these have a two-operand InstAlias. 5691let TwoOperandAliasConstraint = "$Vn = $Vd" in { 5692class VEXTd<string OpcodeStr, string Dt, ValueType Ty, Operand immTy> 5693 : N3V<0,1,0b11,{?,?,?,?},0,0, (outs DPR:$Vd), 5694 (ins DPR:$Vn, DPR:$Vm, immTy:$index), NVExtFrm, 5695 IIC_VEXTD, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "", 5696 [(set DPR:$Vd, (Ty (NEONvext (Ty DPR:$Vn), 5697 (Ty DPR:$Vm), imm:$index)))]> { 5698 bits<3> index; 5699 let Inst{11} = 0b0; 5700 let Inst{10-8} = index{2-0}; 5701} 5702 5703class VEXTq<string OpcodeStr, string Dt, ValueType Ty, Operand immTy> 5704 : N3V<0,1,0b11,{?,?,?,?},1,0, (outs QPR:$Vd), 5705 (ins QPR:$Vn, QPR:$Vm, imm0_15:$index), NVExtFrm, 5706 IIC_VEXTQ, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "", 5707 [(set QPR:$Vd, (Ty (NEONvext (Ty QPR:$Vn), 5708 (Ty QPR:$Vm), imm:$index)))]> { 5709 bits<4> index; 5710 let Inst{11-8} = index{3-0}; 5711} 5712} 5713 5714def VEXTd8 : VEXTd<"vext", "8", v8i8, imm0_7> { 5715 let Inst{10-8} = index{2-0}; 5716} 5717def VEXTd16 : VEXTd<"vext", "16", v4i16, imm0_3> { 5718 let Inst{10-9} = index{1-0}; 5719 let Inst{8} = 0b0; 5720} 5721def VEXTd32 : VEXTd<"vext", "32", v2i32, imm0_1> { 5722 let Inst{10} = index{0}; 5723 let Inst{9-8} = 0b00; 5724} 5725def : Pat<(v2f32 (NEONvext (v2f32 DPR:$Vn), 5726 (v2f32 DPR:$Vm), 5727 (i32 imm:$index))), 5728 (VEXTd32 DPR:$Vn, DPR:$Vm, imm:$index)>; 5729 5730def VEXTq8 : VEXTq<"vext", "8", v16i8, imm0_15> { 5731 let Inst{11-8} = index{3-0}; 5732} 5733def VEXTq16 : VEXTq<"vext", "16", v8i16, imm0_7> { 5734 let Inst{11-9} = index{2-0}; 5735 let Inst{8} = 0b0; 5736} 5737def VEXTq32 : VEXTq<"vext", "32", v4i32, imm0_3> { 5738 let Inst{11-10} = index{1-0}; 5739 let Inst{9-8} = 0b00; 5740} 5741def VEXTq64 : VEXTq<"vext", "64", v2i64, imm0_1> { 5742 let Inst{11} = index{0}; 5743 let Inst{10-8} = 0b000; 5744} 5745def : Pat<(v4f32 (NEONvext (v4f32 QPR:$Vn), 5746 (v4f32 QPR:$Vm), 5747 (i32 imm:$index))), 5748 (VEXTq32 QPR:$Vn, QPR:$Vm, imm:$index)>; 5749 5750// VTRN : Vector Transpose 5751 5752def VTRNd8 : N2VDShuffle<0b00, 0b00001, "vtrn", "8">; 5753def VTRNd16 : N2VDShuffle<0b01, 0b00001, "vtrn", "16">; 5754def VTRNd32 : N2VDShuffle<0b10, 0b00001, "vtrn", "32">; 5755 5756def VTRNq8 : N2VQShuffle<0b00, 0b00001, IIC_VPERMQ, "vtrn", "8">; 5757def VTRNq16 : N2VQShuffle<0b01, 0b00001, IIC_VPERMQ, "vtrn", "16">; 5758def VTRNq32 : N2VQShuffle<0b10, 0b00001, IIC_VPERMQ, "vtrn", "32">; 5759 5760// VUZP : Vector Unzip (Deinterleave) 5761 5762def VUZPd8 : N2VDShuffle<0b00, 0b00010, "vuzp", "8">; 5763def VUZPd16 : N2VDShuffle<0b01, 0b00010, "vuzp", "16">; 5764// vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 5765def : NEONInstAlias<"vuzp${p}.32 $Dd, $Dm", 5766 (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>; 5767 5768def VUZPq8 : N2VQShuffle<0b00, 0b00010, IIC_VPERMQ3, "vuzp", "8">; 5769def VUZPq16 : N2VQShuffle<0b01, 0b00010, IIC_VPERMQ3, "vuzp", "16">; 5770def VUZPq32 : N2VQShuffle<0b10, 0b00010, IIC_VPERMQ3, "vuzp", "32">; 5771 5772// VZIP : Vector Zip (Interleave) 5773 5774def VZIPd8 : N2VDShuffle<0b00, 0b00011, "vzip", "8">; 5775def VZIPd16 : N2VDShuffle<0b01, 0b00011, "vzip", "16">; 5776// vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 5777def : NEONInstAlias<"vzip${p}.32 $Dd, $Dm", 5778 (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>; 5779 5780def VZIPq8 : N2VQShuffle<0b00, 0b00011, IIC_VPERMQ3, "vzip", "8">; 5781def VZIPq16 : N2VQShuffle<0b01, 0b00011, IIC_VPERMQ3, "vzip", "16">; 5782def VZIPq32 : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip", "32">; 5783 5784// Vector Table Lookup and Table Extension. 5785 5786// VTBL : Vector Table Lookup 5787let DecoderMethod = "DecodeTBLInstruction" in { 5788def VTBL1 5789 : N3V<1,1,0b11,0b1000,0,0, (outs DPR:$Vd), 5790 (ins VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB1, 5791 "vtbl", "8", "$Vd, $Vn, $Vm", "", 5792 [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbl1 VecListOneD:$Vn, DPR:$Vm)))]>; 5793let hasExtraSrcRegAllocReq = 1 in { 5794def VTBL2 5795 : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$Vd), 5796 (ins VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB2, 5797 "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; 5798def VTBL3 5799 : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$Vd), 5800 (ins VecListThreeD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB3, 5801 "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; 5802def VTBL4 5803 : N3V<1,1,0b11,0b1011,0,0, (outs DPR:$Vd), 5804 (ins VecListFourD:$Vn, DPR:$Vm), 5805 NVTBLFrm, IIC_VTB4, 5806 "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; 5807} // hasExtraSrcRegAllocReq = 1 5808 5809def VTBL3Pseudo 5810 : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB3, "", []>; 5811def VTBL4Pseudo 5812 : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB4, "", []>; 5813 5814// VTBX : Vector Table Extension 5815def VTBX1 5816 : N3V<1,1,0b11,0b1000,1,0, (outs DPR:$Vd), 5817 (ins DPR:$orig, VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX1, 5818 "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", 5819 [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbx1 5820 DPR:$orig, VecListOneD:$Vn, DPR:$Vm)))]>; 5821let hasExtraSrcRegAllocReq = 1 in { 5822def VTBX2 5823 : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$Vd), 5824 (ins DPR:$orig, VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX2, 5825 "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", []>; 5826def VTBX3 5827 : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$Vd), 5828 (ins DPR:$orig, VecListThreeD:$Vn, DPR:$Vm), 5829 NVTBLFrm, IIC_VTBX3, 5830 "vtbx", "8", "$Vd, $Vn, $Vm", 5831 "$orig = $Vd", []>; 5832def VTBX4 5833 : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$Vd), 5834 (ins DPR:$orig, VecListFourD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX4, 5835 "vtbx", "8", "$Vd, $Vn, $Vm", 5836 "$orig = $Vd", []>; 5837} // hasExtraSrcRegAllocReq = 1 5838 5839def VTBX3Pseudo 5840 : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src), 5841 IIC_VTBX3, "$orig = $dst", []>; 5842def VTBX4Pseudo 5843 : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src), 5844 IIC_VTBX4, "$orig = $dst", []>; 5845} // DecoderMethod = "DecodeTBLInstruction" 5846 5847// VRINT : Vector Rounding 5848multiclass VRINT_FPI<string op, bits<3> op9_7, SDPatternOperator Int> { 5849 let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { 5850 def D : N2VDIntnp<0b10, 0b100, 0, NoItinerary, 5851 !strconcat("vrint", op), "f32", 5852 v2f32, v2f32, Int>, Requires<[HasV8, HasNEON]> { 5853 let Inst{9-7} = op9_7; 5854 } 5855 def Q : N2VQIntnp<0b10, 0b100, 0, NoItinerary, 5856 !strconcat("vrint", op), "f32", 5857 v4f32, v4f32, Int>, Requires<[HasV8, HasNEON]> { 5858 let Inst{9-7} = op9_7; 5859 } 5860 } 5861 5862 def : NEONInstAlias<!strconcat("vrint", op, ".f32.f32\t$Dd, $Dm"), 5863 (!cast<Instruction>(NAME#"D") DPR:$Dd, DPR:$Dm)>; 5864 def : NEONInstAlias<!strconcat("vrint", op, ".f32.f32\t$Qd, $Qm"), 5865 (!cast<Instruction>(NAME#"Q") QPR:$Qd, QPR:$Qm)>; 5866} 5867 5868defm VRINTNN : VRINT_FPI<"n", 0b000, int_arm_neon_vrintn>; 5869defm VRINTXN : VRINT_FPI<"x", 0b001, int_arm_neon_vrintx>; 5870defm VRINTAN : VRINT_FPI<"a", 0b010, int_arm_neon_vrinta>; 5871defm VRINTZN : VRINT_FPI<"z", 0b011, int_arm_neon_vrintz>; 5872defm VRINTMN : VRINT_FPI<"m", 0b101, int_arm_neon_vrintm>; 5873defm VRINTPN : VRINT_FPI<"p", 0b111, int_arm_neon_vrintp>; 5874 5875// Cryptography instructions 5876let PostEncoderMethod = "NEONThumb2DataIPostEncoder", 5877 DecoderNamespace = "v8Crypto" in { 5878 class AES<string op, bit op7, bit op6, SDPatternOperator Int> 5879 : N2VQIntXnp<0b00, 0b00, 0b011, op6, op7, NoItinerary, 5880 !strconcat("aes", op), "8", v16i8, v16i8, Int>, 5881 Requires<[HasV8, HasCrypto]>; 5882 class AES2Op<string op, bit op7, bit op6, SDPatternOperator Int> 5883 : N2VQIntX2np<0b00, 0b00, 0b011, op6, op7, NoItinerary, 5884 !strconcat("aes", op), "8", v16i8, v16i8, Int>, 5885 Requires<[HasV8, HasCrypto]>; 5886 class N2SHA<string op, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6, 5887 SDPatternOperator Int> 5888 : N2VQIntXnp<0b10, op17_16, op10_8, op6, op7, NoItinerary, 5889 !strconcat("sha", op), "32", v4i32, v4i32, Int>, 5890 Requires<[HasV8, HasCrypto]>; 5891 class N2SHA2Op<string op, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6, 5892 SDPatternOperator Int> 5893 : N2VQIntX2np<0b10, op17_16, op10_8, op6, op7, NoItinerary, 5894 !strconcat("sha", op), "32", v4i32, v4i32, Int>, 5895 Requires<[HasV8, HasCrypto]>; 5896 class N3SHA3Op<string op, bits<5> op27_23, bits<2> op21_20, SDPatternOperator Int> 5897 : N3VQInt3np<op27_23, op21_20, 0b1100, 1, 0, N3RegFrm, NoItinerary, 5898 !strconcat("sha", op), "32", v4i32, v4i32, Int, 0>, 5899 Requires<[HasV8, HasCrypto]>; 5900} 5901 5902def AESD : AES2Op<"d", 0, 1, int_arm_neon_aesd>; 5903def AESE : AES2Op<"e", 0, 0, int_arm_neon_aese>; 5904def AESIMC : AES<"imc", 1, 1, int_arm_neon_aesimc>; 5905def AESMC : AES<"mc", 1, 0, int_arm_neon_aesmc>; 5906 5907def SHA1H : N2SHA<"1h", 0b01, 0b010, 1, 1, int_arm_neon_sha1h>; 5908def SHA1SU1 : N2SHA2Op<"1su1", 0b10, 0b011, 1, 0, int_arm_neon_sha1su1>; 5909def SHA256SU0 : N2SHA2Op<"256su0", 0b10, 0b011, 1, 1, int_arm_neon_sha256su0>; 5910def SHA1C : N3SHA3Op<"1c", 0b00100, 0b00, int_arm_neon_sha1c>; 5911def SHA1M : N3SHA3Op<"1m", 0b00100, 0b10, int_arm_neon_sha1m>; 5912def SHA1P : N3SHA3Op<"1p", 0b00100, 0b01, int_arm_neon_sha1p>; 5913def SHA1SU0 : N3SHA3Op<"1su0", 0b00100, 0b11, int_arm_neon_sha1su0>; 5914def SHA256H : N3SHA3Op<"256h", 0b00110, 0b00, int_arm_neon_sha256h>; 5915def SHA256H2 : N3SHA3Op<"256h2", 0b00110, 0b01, int_arm_neon_sha256h2>; 5916def SHA256SU1 : N3SHA3Op<"256su1", 0b00110, 0b10, int_arm_neon_sha256su1>; 5917 5918//===----------------------------------------------------------------------===// 5919// NEON instructions for single-precision FP math 5920//===----------------------------------------------------------------------===// 5921 5922class N2VSPat<SDNode OpNode, NeonI Inst> 5923 : NEONFPPat<(f32 (OpNode SPR:$a)), 5924 (EXTRACT_SUBREG 5925 (v2f32 (COPY_TO_REGCLASS (Inst 5926 (INSERT_SUBREG 5927 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 5928 SPR:$a, ssub_0)), DPR_VFP2)), ssub_0)>; 5929 5930class N3VSPat<SDNode OpNode, NeonI Inst> 5931 : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)), 5932 (EXTRACT_SUBREG 5933 (v2f32 (COPY_TO_REGCLASS (Inst 5934 (INSERT_SUBREG 5935 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 5936 SPR:$a, ssub_0), 5937 (INSERT_SUBREG 5938 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 5939 SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>; 5940 5941class N3VSMulOpPat<SDNode MulNode, SDNode OpNode, NeonI Inst> 5942 : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))), 5943 (EXTRACT_SUBREG 5944 (v2f32 (COPY_TO_REGCLASS (Inst 5945 (INSERT_SUBREG 5946 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 5947 SPR:$acc, ssub_0), 5948 (INSERT_SUBREG 5949 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 5950 SPR:$a, ssub_0), 5951 (INSERT_SUBREG 5952 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 5953 SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>; 5954 5955def : N3VSPat<fadd, VADDfd>; 5956def : N3VSPat<fsub, VSUBfd>; 5957def : N3VSPat<fmul, VMULfd>; 5958def : N3VSMulOpPat<fmul, fadd, VMLAfd>, 5959 Requires<[HasNEON, UseNEONForFP, UseFPVMLx, DontUseFusedMAC]>; 5960def : N3VSMulOpPat<fmul, fsub, VMLSfd>, 5961 Requires<[HasNEON, UseNEONForFP, UseFPVMLx, DontUseFusedMAC]>; 5962def : N3VSMulOpPat<fmul, fadd, VFMAfd>, 5963 Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>; 5964def : N3VSMulOpPat<fmul, fsub, VFMSfd>, 5965 Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>; 5966def : N2VSPat<fabs, VABSfd>; 5967def : N2VSPat<fneg, VNEGfd>; 5968def : N3VSPat<NEONfmax, VMAXfd>; 5969def : N3VSPat<NEONfmin, VMINfd>; 5970def : N2VSPat<arm_ftosi, VCVTf2sd>; 5971def : N2VSPat<arm_ftoui, VCVTf2ud>; 5972def : N2VSPat<arm_sitof, VCVTs2fd>; 5973def : N2VSPat<arm_uitof, VCVTu2fd>; 5974 5975// Prefer VMOVDRR for i32 -> f32 bitcasts, it can write all DPR registers. 5976def : Pat<(f32 (bitconvert GPR:$a)), 5977 (EXTRACT_SUBREG (VMOVDRR GPR:$a, GPR:$a), ssub_0)>, 5978 Requires<[HasNEON, DontUseVMOVSR]>; 5979 5980//===----------------------------------------------------------------------===// 5981// Non-Instruction Patterns 5982//===----------------------------------------------------------------------===// 5983 5984// bit_convert 5985def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (v1i64 DPR:$src)>; 5986def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (v1i64 DPR:$src)>; 5987def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (v1i64 DPR:$src)>; 5988def : Pat<(v1i64 (bitconvert (f64 DPR:$src))), (v1i64 DPR:$src)>; 5989def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (v1i64 DPR:$src)>; 5990def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (v2i32 DPR:$src)>; 5991def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (v2i32 DPR:$src)>; 5992def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (v2i32 DPR:$src)>; 5993def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (v2i32 DPR:$src)>; 5994def : Pat<(v2i32 (bitconvert (v2f32 DPR:$src))), (v2i32 DPR:$src)>; 5995def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (v4i16 DPR:$src)>; 5996def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (v4i16 DPR:$src)>; 5997def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (v4i16 DPR:$src)>; 5998def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (v4i16 DPR:$src)>; 5999def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (v4i16 DPR:$src)>; 6000def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (v8i8 DPR:$src)>; 6001def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (v8i8 DPR:$src)>; 6002def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (v8i8 DPR:$src)>; 6003def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (v8i8 DPR:$src)>; 6004def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (v8i8 DPR:$src)>; 6005def : Pat<(f64 (bitconvert (v1i64 DPR:$src))), (f64 DPR:$src)>; 6006def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (f64 DPR:$src)>; 6007def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (f64 DPR:$src)>; 6008def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (f64 DPR:$src)>; 6009def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (f64 DPR:$src)>; 6010def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (v2f32 DPR:$src)>; 6011def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (v2f32 DPR:$src)>; 6012def : Pat<(v2f32 (bitconvert (v2i32 DPR:$src))), (v2f32 DPR:$src)>; 6013def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (v2f32 DPR:$src)>; 6014def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (v2f32 DPR:$src)>; 6015 6016def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>; 6017def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>; 6018def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>; 6019def : Pat<(v2i64 (bitconvert (v2f64 QPR:$src))), (v2i64 QPR:$src)>; 6020def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>; 6021def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>; 6022def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>; 6023def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>; 6024def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>; 6025def : Pat<(v4i32 (bitconvert (v4f32 QPR:$src))), (v4i32 QPR:$src)>; 6026def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>; 6027def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>; 6028def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>; 6029def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>; 6030def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>; 6031def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>; 6032def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>; 6033def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>; 6034def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>; 6035def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>; 6036def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>; 6037def : Pat<(v4f32 (bitconvert (v4i32 QPR:$src))), (v4f32 QPR:$src)>; 6038def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>; 6039def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>; 6040def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>; 6041def : Pat<(v2f64 (bitconvert (v2i64 QPR:$src))), (v2f64 QPR:$src)>; 6042def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>; 6043def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>; 6044def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>; 6045def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>; 6046 6047// Fold extracting an element out of a v2i32 into a vfp register. 6048def : Pat<(f32 (bitconvert (i32 (extractelt (v2i32 DPR:$src), imm:$lane)))), 6049 (f32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>; 6050 6051// Vector lengthening move with load, matching extending loads. 6052 6053// extload, zextload and sextload for a standard lengthening load. Example: 6054// Lengthen_Single<"8", "i16", "8"> = 6055// Pat<(v8i16 (extloadvi8 addrmode6:$addr)) 6056// (VMOVLuv8i16 (VLD1d8 addrmode6:$addr, 6057// (f64 (IMPLICIT_DEF)), (i32 0)))>; 6058multiclass Lengthen_Single<string DestLanes, string DestTy, string SrcTy> { 6059 let AddedComplexity = 10 in { 6060 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6061 (!cast<PatFrag>("extloadvi" # SrcTy) addrmode6:$addr)), 6062 (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy) 6063 (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>; 6064 6065 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6066 (!cast<PatFrag>("zextloadvi" # SrcTy) addrmode6:$addr)), 6067 (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy) 6068 (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>; 6069 6070 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6071 (!cast<PatFrag>("sextloadvi" # SrcTy) addrmode6:$addr)), 6072 (!cast<Instruction>("VMOVLsv" # DestLanes # DestTy) 6073 (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>; 6074 } 6075} 6076 6077// extload, zextload and sextload for a lengthening load which only uses 6078// half the lanes available. Example: 6079// Lengthen_HalfSingle<"4", "i16", "8", "i16", "i8"> = 6080// Pat<(v4i16 (extloadvi8 addrmode6oneL32:$addr)), 6081// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr, 6082// (f64 (IMPLICIT_DEF)), (i32 0))), 6083// dsub_0)>; 6084multiclass Lengthen_HalfSingle<string DestLanes, string DestTy, string SrcTy, 6085 string InsnLanes, string InsnTy> { 6086 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6087 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)), 6088 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy) 6089 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 6090 dsub_0)>; 6091 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6092 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)), 6093 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy) 6094 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 6095 dsub_0)>; 6096 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6097 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)), 6098 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy) 6099 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 6100 dsub_0)>; 6101} 6102 6103// extload, zextload and sextload for a lengthening load followed by another 6104// lengthening load, to quadruple the initial length. 6105// 6106// Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32"> = 6107// Pat<(v4i32 (extloadvi8 addrmode6oneL32:$addr)) 6108// (EXTRACT_SUBREG (VMOVLuv4i32 6109// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr, 6110// (f64 (IMPLICIT_DEF)), 6111// (i32 0))), 6112// dsub_0)), 6113// dsub_0)>; 6114multiclass Lengthen_Double<string DestLanes, string DestTy, string SrcTy, 6115 string Insn1Lanes, string Insn1Ty, string Insn2Lanes, 6116 string Insn2Ty> { 6117 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6118 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)), 6119 (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 6120 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 6121 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 6122 dsub_0))>; 6123 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6124 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)), 6125 (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 6126 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 6127 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 6128 dsub_0))>; 6129 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6130 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)), 6131 (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty) 6132 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty) 6133 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 6134 dsub_0))>; 6135} 6136 6137// extload, zextload and sextload for a lengthening load followed by another 6138// lengthening load, to quadruple the initial length, but which ends up only 6139// requiring half the available lanes (a 64-bit outcome instead of a 128-bit). 6140// 6141// Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32"> = 6142// Pat<(v2i32 (extloadvi8 addrmode6:$addr)) 6143// (EXTRACT_SUBREG (VMOVLuv4i32 6144// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd16 addrmode6:$addr, 6145// (f64 (IMPLICIT_DEF)), (i32 0))), 6146// dsub_0)), 6147// dsub_0)>; 6148multiclass Lengthen_HalfDouble<string DestLanes, string DestTy, string SrcTy, 6149 string Insn1Lanes, string Insn1Ty, string Insn2Lanes, 6150 string Insn2Ty> { 6151 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6152 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6:$addr)), 6153 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 6154 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 6155 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 6156 dsub_0)), 6157 dsub_0)>; 6158 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6159 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6:$addr)), 6160 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 6161 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 6162 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 6163 dsub_0)), 6164 dsub_0)>; 6165 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6166 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6:$addr)), 6167 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty) 6168 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty) 6169 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 6170 dsub_0)), 6171 dsub_0)>; 6172} 6173 6174defm : Lengthen_Single<"8", "i16", "8">; // v8i8 -> v8i16 6175defm : Lengthen_Single<"4", "i32", "16">; // v4i16 -> v4i32 6176defm : Lengthen_Single<"2", "i64", "32">; // v2i32 -> v2i64 6177 6178defm : Lengthen_HalfSingle<"4", "i16", "i8", "8", "i16">; // v4i8 -> v4i16 6179defm : Lengthen_HalfSingle<"2", "i32", "i16", "4", "i32">; // v2i16 -> v2i32 6180 6181// Double lengthening - v4i8 -> v4i16 -> v4i32 6182defm : Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32">; 6183// v2i8 -> v2i16 -> v2i32 6184defm : Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32">; 6185// v2i16 -> v2i32 -> v2i64 6186defm : Lengthen_Double<"2", "i64", "i16", "4", "i32", "2", "i64">; 6187 6188// Triple lengthening - v2i8 -> v2i16 -> v2i32 -> v2i64 6189def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)), 6190 (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 6191 (VLD1LNd16 addrmode6:$addr, 6192 (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; 6193def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)), 6194 (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 6195 (VLD1LNd16 addrmode6:$addr, 6196 (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; 6197def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)), 6198 (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16 6199 (VLD1LNd16 addrmode6:$addr, 6200 (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; 6201 6202//===----------------------------------------------------------------------===// 6203// Assembler aliases 6204// 6205 6206def : VFP2InstAlias<"fmdhr${p} $Dd, $Rn", 6207 (VSETLNi32 DPR:$Dd, GPR:$Rn, 1, pred:$p)>; 6208def : VFP2InstAlias<"fmdlr${p} $Dd, $Rn", 6209 (VSETLNi32 DPR:$Dd, GPR:$Rn, 0, pred:$p)>; 6210 6211// VAND/VBIC/VEOR/VORR accept but do not require a type suffix. 6212defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm", 6213 (VANDd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 6214defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm", 6215 (VANDq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 6216defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm", 6217 (VBICd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 6218defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm", 6219 (VBICq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 6220defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm", 6221 (VEORd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 6222defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm", 6223 (VEORq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 6224defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm", 6225 (VORRd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 6226defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm", 6227 (VORRq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 6228// ... two-operand aliases 6229defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm", 6230 (VANDd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 6231defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm", 6232 (VANDq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 6233defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm", 6234 (VEORd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 6235defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm", 6236 (VEORq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 6237defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm", 6238 (VORRd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 6239defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm", 6240 (VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 6241 6242// VLD1 single-lane pseudo-instructions. These need special handling for 6243// the lane index that an InstAlias can't handle, so we use these instead. 6244def VLD1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr", 6245 (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6246def VLD1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr", 6247 (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6248def VLD1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr", 6249 (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6250 6251def VLD1LNdWB_fixed_Asm_8 : 6252 NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr!", 6253 (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6254def VLD1LNdWB_fixed_Asm_16 : 6255 NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr!", 6256 (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6257def VLD1LNdWB_fixed_Asm_32 : 6258 NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr!", 6259 (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6260def VLD1LNdWB_register_Asm_8 : 6261 NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr, $Rm", 6262 (ins VecListOneDByteIndexed:$list, addrmode6:$addr, 6263 rGPR:$Rm, pred:$p)>; 6264def VLD1LNdWB_register_Asm_16 : 6265 NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr, $Rm", 6266 (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, 6267 rGPR:$Rm, pred:$p)>; 6268def VLD1LNdWB_register_Asm_32 : 6269 NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr, $Rm", 6270 (ins VecListOneDWordIndexed:$list, addrmode6:$addr, 6271 rGPR:$Rm, pred:$p)>; 6272 6273 6274// VST1 single-lane pseudo-instructions. These need special handling for 6275// the lane index that an InstAlias can't handle, so we use these instead. 6276def VST1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr", 6277 (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6278def VST1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr", 6279 (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6280def VST1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr", 6281 (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6282 6283def VST1LNdWB_fixed_Asm_8 : 6284 NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr!", 6285 (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6286def VST1LNdWB_fixed_Asm_16 : 6287 NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr!", 6288 (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6289def VST1LNdWB_fixed_Asm_32 : 6290 NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr!", 6291 (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6292def VST1LNdWB_register_Asm_8 : 6293 NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr, $Rm", 6294 (ins VecListOneDByteIndexed:$list, addrmode6:$addr, 6295 rGPR:$Rm, pred:$p)>; 6296def VST1LNdWB_register_Asm_16 : 6297 NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr, $Rm", 6298 (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, 6299 rGPR:$Rm, pred:$p)>; 6300def VST1LNdWB_register_Asm_32 : 6301 NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr, $Rm", 6302 (ins VecListOneDWordIndexed:$list, addrmode6:$addr, 6303 rGPR:$Rm, pred:$p)>; 6304 6305// VLD2 single-lane pseudo-instructions. These need special handling for 6306// the lane index that an InstAlias can't handle, so we use these instead. 6307def VLD2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr", 6308 (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6309def VLD2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr", 6310 (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6311def VLD2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr", 6312 (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6313def VLD2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr", 6314 (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6315def VLD2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr", 6316 (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6317 6318def VLD2LNdWB_fixed_Asm_8 : 6319 NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr!", 6320 (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6321def VLD2LNdWB_fixed_Asm_16 : 6322 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!", 6323 (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6324def VLD2LNdWB_fixed_Asm_32 : 6325 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!", 6326 (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6327def VLD2LNqWB_fixed_Asm_16 : 6328 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!", 6329 (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6330def VLD2LNqWB_fixed_Asm_32 : 6331 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!", 6332 (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6333def VLD2LNdWB_register_Asm_8 : 6334 NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr, $Rm", 6335 (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, 6336 rGPR:$Rm, pred:$p)>; 6337def VLD2LNdWB_register_Asm_16 : 6338 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm", 6339 (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, 6340 rGPR:$Rm, pred:$p)>; 6341def VLD2LNdWB_register_Asm_32 : 6342 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm", 6343 (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, 6344 rGPR:$Rm, pred:$p)>; 6345def VLD2LNqWB_register_Asm_16 : 6346 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm", 6347 (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, 6348 rGPR:$Rm, pred:$p)>; 6349def VLD2LNqWB_register_Asm_32 : 6350 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm", 6351 (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, 6352 rGPR:$Rm, pred:$p)>; 6353 6354 6355// VST2 single-lane pseudo-instructions. These need special handling for 6356// the lane index that an InstAlias can't handle, so we use these instead. 6357def VST2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr", 6358 (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6359def VST2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr", 6360 (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6361def VST2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr", 6362 (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6363def VST2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr", 6364 (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6365def VST2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr", 6366 (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6367 6368def VST2LNdWB_fixed_Asm_8 : 6369 NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr!", 6370 (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6371def VST2LNdWB_fixed_Asm_16 : 6372 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!", 6373 (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6374def VST2LNdWB_fixed_Asm_32 : 6375 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!", 6376 (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6377def VST2LNqWB_fixed_Asm_16 : 6378 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!", 6379 (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6380def VST2LNqWB_fixed_Asm_32 : 6381 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!", 6382 (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6383def VST2LNdWB_register_Asm_8 : 6384 NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr, $Rm", 6385 (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, 6386 rGPR:$Rm, pred:$p)>; 6387def VST2LNdWB_register_Asm_16 : 6388 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm", 6389 (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, 6390 rGPR:$Rm, pred:$p)>; 6391def VST2LNdWB_register_Asm_32 : 6392 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm", 6393 (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, 6394 rGPR:$Rm, pred:$p)>; 6395def VST2LNqWB_register_Asm_16 : 6396 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm", 6397 (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, 6398 rGPR:$Rm, pred:$p)>; 6399def VST2LNqWB_register_Asm_32 : 6400 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm", 6401 (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, 6402 rGPR:$Rm, pred:$p)>; 6403 6404// VLD3 all-lanes pseudo-instructions. These need special handling for 6405// the lane index that an InstAlias can't handle, so we use these instead. 6406def VLD3DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 6407 (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; 6408def VLD3DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 6409 (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; 6410def VLD3DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 6411 (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; 6412def VLD3DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 6413 (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; 6414def VLD3DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 6415 (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; 6416def VLD3DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 6417 (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; 6418 6419def VLD3DUPdWB_fixed_Asm_8 : 6420 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 6421 (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; 6422def VLD3DUPdWB_fixed_Asm_16 : 6423 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 6424 (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; 6425def VLD3DUPdWB_fixed_Asm_32 : 6426 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 6427 (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; 6428def VLD3DUPqWB_fixed_Asm_8 : 6429 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 6430 (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; 6431def VLD3DUPqWB_fixed_Asm_16 : 6432 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 6433 (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; 6434def VLD3DUPqWB_fixed_Asm_32 : 6435 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 6436 (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; 6437def VLD3DUPdWB_register_Asm_8 : 6438 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 6439 (ins VecListThreeDAllLanes:$list, addrmode6:$addr, 6440 rGPR:$Rm, pred:$p)>; 6441def VLD3DUPdWB_register_Asm_16 : 6442 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 6443 (ins VecListThreeDAllLanes:$list, addrmode6:$addr, 6444 rGPR:$Rm, pred:$p)>; 6445def VLD3DUPdWB_register_Asm_32 : 6446 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 6447 (ins VecListThreeDAllLanes:$list, addrmode6:$addr, 6448 rGPR:$Rm, pred:$p)>; 6449def VLD3DUPqWB_register_Asm_8 : 6450 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 6451 (ins VecListThreeQAllLanes:$list, addrmode6:$addr, 6452 rGPR:$Rm, pred:$p)>; 6453def VLD3DUPqWB_register_Asm_16 : 6454 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 6455 (ins VecListThreeQAllLanes:$list, addrmode6:$addr, 6456 rGPR:$Rm, pred:$p)>; 6457def VLD3DUPqWB_register_Asm_32 : 6458 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 6459 (ins VecListThreeQAllLanes:$list, addrmode6:$addr, 6460 rGPR:$Rm, pred:$p)>; 6461 6462 6463// VLD3 single-lane pseudo-instructions. These need special handling for 6464// the lane index that an InstAlias can't handle, so we use these instead. 6465def VLD3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 6466 (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6467def VLD3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 6468 (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6469def VLD3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 6470 (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6471def VLD3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 6472 (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6473def VLD3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 6474 (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6475 6476def VLD3LNdWB_fixed_Asm_8 : 6477 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 6478 (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6479def VLD3LNdWB_fixed_Asm_16 : 6480 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 6481 (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6482def VLD3LNdWB_fixed_Asm_32 : 6483 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 6484 (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6485def VLD3LNqWB_fixed_Asm_16 : 6486 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 6487 (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6488def VLD3LNqWB_fixed_Asm_32 : 6489 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 6490 (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6491def VLD3LNdWB_register_Asm_8 : 6492 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 6493 (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, 6494 rGPR:$Rm, pred:$p)>; 6495def VLD3LNdWB_register_Asm_16 : 6496 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 6497 (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, 6498 rGPR:$Rm, pred:$p)>; 6499def VLD3LNdWB_register_Asm_32 : 6500 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 6501 (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, 6502 rGPR:$Rm, pred:$p)>; 6503def VLD3LNqWB_register_Asm_16 : 6504 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 6505 (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, 6506 rGPR:$Rm, pred:$p)>; 6507def VLD3LNqWB_register_Asm_32 : 6508 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 6509 (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, 6510 rGPR:$Rm, pred:$p)>; 6511 6512// VLD3 multiple structure pseudo-instructions. These need special handling for 6513// the vector operands that the normal instructions don't yet model. 6514// FIXME: Remove these when the register classes and instructions are updated. 6515def VLD3dAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 6516 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; 6517def VLD3dAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 6518 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; 6519def VLD3dAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 6520 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; 6521def VLD3qAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 6522 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; 6523def VLD3qAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 6524 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; 6525def VLD3qAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 6526 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; 6527 6528def VLD3dWB_fixed_Asm_8 : 6529 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 6530 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; 6531def VLD3dWB_fixed_Asm_16 : 6532 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 6533 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; 6534def VLD3dWB_fixed_Asm_32 : 6535 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 6536 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; 6537def VLD3qWB_fixed_Asm_8 : 6538 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 6539 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; 6540def VLD3qWB_fixed_Asm_16 : 6541 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 6542 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; 6543def VLD3qWB_fixed_Asm_32 : 6544 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 6545 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; 6546def VLD3dWB_register_Asm_8 : 6547 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 6548 (ins VecListThreeD:$list, addrmode6:$addr, 6549 rGPR:$Rm, pred:$p)>; 6550def VLD3dWB_register_Asm_16 : 6551 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 6552 (ins VecListThreeD:$list, addrmode6:$addr, 6553 rGPR:$Rm, pred:$p)>; 6554def VLD3dWB_register_Asm_32 : 6555 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 6556 (ins VecListThreeD:$list, addrmode6:$addr, 6557 rGPR:$Rm, pred:$p)>; 6558def VLD3qWB_register_Asm_8 : 6559 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 6560 (ins VecListThreeQ:$list, addrmode6:$addr, 6561 rGPR:$Rm, pred:$p)>; 6562def VLD3qWB_register_Asm_16 : 6563 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 6564 (ins VecListThreeQ:$list, addrmode6:$addr, 6565 rGPR:$Rm, pred:$p)>; 6566def VLD3qWB_register_Asm_32 : 6567 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 6568 (ins VecListThreeQ:$list, addrmode6:$addr, 6569 rGPR:$Rm, pred:$p)>; 6570 6571// VST3 single-lane pseudo-instructions. These need special handling for 6572// the lane index that an InstAlias can't handle, so we use these instead. 6573def VST3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", 6574 (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6575def VST3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", 6576 (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6577def VST3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", 6578 (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6579def VST3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", 6580 (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6581def VST3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", 6582 (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6583 6584def VST3LNdWB_fixed_Asm_8 : 6585 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", 6586 (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6587def VST3LNdWB_fixed_Asm_16 : 6588 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", 6589 (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6590def VST3LNdWB_fixed_Asm_32 : 6591 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", 6592 (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6593def VST3LNqWB_fixed_Asm_16 : 6594 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", 6595 (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6596def VST3LNqWB_fixed_Asm_32 : 6597 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", 6598 (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6599def VST3LNdWB_register_Asm_8 : 6600 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", 6601 (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, 6602 rGPR:$Rm, pred:$p)>; 6603def VST3LNdWB_register_Asm_16 : 6604 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", 6605 (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, 6606 rGPR:$Rm, pred:$p)>; 6607def VST3LNdWB_register_Asm_32 : 6608 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", 6609 (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, 6610 rGPR:$Rm, pred:$p)>; 6611def VST3LNqWB_register_Asm_16 : 6612 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", 6613 (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, 6614 rGPR:$Rm, pred:$p)>; 6615def VST3LNqWB_register_Asm_32 : 6616 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", 6617 (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, 6618 rGPR:$Rm, pred:$p)>; 6619 6620 6621// VST3 multiple structure pseudo-instructions. These need special handling for 6622// the vector operands that the normal instructions don't yet model. 6623// FIXME: Remove these when the register classes and instructions are updated. 6624def VST3dAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", 6625 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; 6626def VST3dAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", 6627 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; 6628def VST3dAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", 6629 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; 6630def VST3qAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", 6631 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; 6632def VST3qAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", 6633 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; 6634def VST3qAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", 6635 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; 6636 6637def VST3dWB_fixed_Asm_8 : 6638 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", 6639 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; 6640def VST3dWB_fixed_Asm_16 : 6641 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", 6642 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; 6643def VST3dWB_fixed_Asm_32 : 6644 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", 6645 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; 6646def VST3qWB_fixed_Asm_8 : 6647 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", 6648 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; 6649def VST3qWB_fixed_Asm_16 : 6650 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", 6651 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; 6652def VST3qWB_fixed_Asm_32 : 6653 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", 6654 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; 6655def VST3dWB_register_Asm_8 : 6656 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", 6657 (ins VecListThreeD:$list, addrmode6:$addr, 6658 rGPR:$Rm, pred:$p)>; 6659def VST3dWB_register_Asm_16 : 6660 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", 6661 (ins VecListThreeD:$list, addrmode6:$addr, 6662 rGPR:$Rm, pred:$p)>; 6663def VST3dWB_register_Asm_32 : 6664 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", 6665 (ins VecListThreeD:$list, addrmode6:$addr, 6666 rGPR:$Rm, pred:$p)>; 6667def VST3qWB_register_Asm_8 : 6668 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", 6669 (ins VecListThreeQ:$list, addrmode6:$addr, 6670 rGPR:$Rm, pred:$p)>; 6671def VST3qWB_register_Asm_16 : 6672 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", 6673 (ins VecListThreeQ:$list, addrmode6:$addr, 6674 rGPR:$Rm, pred:$p)>; 6675def VST3qWB_register_Asm_32 : 6676 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", 6677 (ins VecListThreeQ:$list, addrmode6:$addr, 6678 rGPR:$Rm, pred:$p)>; 6679 6680// VLD4 all-lanes pseudo-instructions. These need special handling for 6681// the lane index that an InstAlias can't handle, so we use these instead. 6682def VLD4DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 6683 (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; 6684def VLD4DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 6685 (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; 6686def VLD4DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 6687 (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; 6688def VLD4DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 6689 (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; 6690def VLD4DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 6691 (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; 6692def VLD4DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 6693 (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; 6694 6695def VLD4DUPdWB_fixed_Asm_8 : 6696 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 6697 (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; 6698def VLD4DUPdWB_fixed_Asm_16 : 6699 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 6700 (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; 6701def VLD4DUPdWB_fixed_Asm_32 : 6702 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 6703 (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; 6704def VLD4DUPqWB_fixed_Asm_8 : 6705 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 6706 (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; 6707def VLD4DUPqWB_fixed_Asm_16 : 6708 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 6709 (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; 6710def VLD4DUPqWB_fixed_Asm_32 : 6711 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 6712 (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; 6713def VLD4DUPdWB_register_Asm_8 : 6714 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 6715 (ins VecListFourDAllLanes:$list, addrmode6:$addr, 6716 rGPR:$Rm, pred:$p)>; 6717def VLD4DUPdWB_register_Asm_16 : 6718 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 6719 (ins VecListFourDAllLanes:$list, addrmode6:$addr, 6720 rGPR:$Rm, pred:$p)>; 6721def VLD4DUPdWB_register_Asm_32 : 6722 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 6723 (ins VecListFourDAllLanes:$list, addrmode6:$addr, 6724 rGPR:$Rm, pred:$p)>; 6725def VLD4DUPqWB_register_Asm_8 : 6726 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 6727 (ins VecListFourQAllLanes:$list, addrmode6:$addr, 6728 rGPR:$Rm, pred:$p)>; 6729def VLD4DUPqWB_register_Asm_16 : 6730 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 6731 (ins VecListFourQAllLanes:$list, addrmode6:$addr, 6732 rGPR:$Rm, pred:$p)>; 6733def VLD4DUPqWB_register_Asm_32 : 6734 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 6735 (ins VecListFourQAllLanes:$list, addrmode6:$addr, 6736 rGPR:$Rm, pred:$p)>; 6737 6738 6739// VLD4 single-lane pseudo-instructions. These need special handling for 6740// the lane index that an InstAlias can't handle, so we use these instead. 6741def VLD4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 6742 (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6743def VLD4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 6744 (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6745def VLD4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 6746 (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6747def VLD4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 6748 (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6749def VLD4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 6750 (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6751 6752def VLD4LNdWB_fixed_Asm_8 : 6753 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 6754 (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6755def VLD4LNdWB_fixed_Asm_16 : 6756 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 6757 (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6758def VLD4LNdWB_fixed_Asm_32 : 6759 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 6760 (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6761def VLD4LNqWB_fixed_Asm_16 : 6762 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 6763 (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6764def VLD4LNqWB_fixed_Asm_32 : 6765 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 6766 (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6767def VLD4LNdWB_register_Asm_8 : 6768 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 6769 (ins VecListFourDByteIndexed:$list, addrmode6:$addr, 6770 rGPR:$Rm, pred:$p)>; 6771def VLD4LNdWB_register_Asm_16 : 6772 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 6773 (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, 6774 rGPR:$Rm, pred:$p)>; 6775def VLD4LNdWB_register_Asm_32 : 6776 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 6777 (ins VecListFourDWordIndexed:$list, addrmode6:$addr, 6778 rGPR:$Rm, pred:$p)>; 6779def VLD4LNqWB_register_Asm_16 : 6780 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 6781 (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, 6782 rGPR:$Rm, pred:$p)>; 6783def VLD4LNqWB_register_Asm_32 : 6784 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 6785 (ins VecListFourQWordIndexed:$list, addrmode6:$addr, 6786 rGPR:$Rm, pred:$p)>; 6787 6788 6789 6790// VLD4 multiple structure pseudo-instructions. These need special handling for 6791// the vector operands that the normal instructions don't yet model. 6792// FIXME: Remove these when the register classes and instructions are updated. 6793def VLD4dAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 6794 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; 6795def VLD4dAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 6796 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; 6797def VLD4dAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 6798 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; 6799def VLD4qAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 6800 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; 6801def VLD4qAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 6802 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; 6803def VLD4qAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 6804 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; 6805 6806def VLD4dWB_fixed_Asm_8 : 6807 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 6808 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; 6809def VLD4dWB_fixed_Asm_16 : 6810 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 6811 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; 6812def VLD4dWB_fixed_Asm_32 : 6813 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 6814 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; 6815def VLD4qWB_fixed_Asm_8 : 6816 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 6817 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; 6818def VLD4qWB_fixed_Asm_16 : 6819 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 6820 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; 6821def VLD4qWB_fixed_Asm_32 : 6822 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 6823 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; 6824def VLD4dWB_register_Asm_8 : 6825 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 6826 (ins VecListFourD:$list, addrmode6:$addr, 6827 rGPR:$Rm, pred:$p)>; 6828def VLD4dWB_register_Asm_16 : 6829 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 6830 (ins VecListFourD:$list, addrmode6:$addr, 6831 rGPR:$Rm, pred:$p)>; 6832def VLD4dWB_register_Asm_32 : 6833 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 6834 (ins VecListFourD:$list, addrmode6:$addr, 6835 rGPR:$Rm, pred:$p)>; 6836def VLD4qWB_register_Asm_8 : 6837 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 6838 (ins VecListFourQ:$list, addrmode6:$addr, 6839 rGPR:$Rm, pred:$p)>; 6840def VLD4qWB_register_Asm_16 : 6841 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 6842 (ins VecListFourQ:$list, addrmode6:$addr, 6843 rGPR:$Rm, pred:$p)>; 6844def VLD4qWB_register_Asm_32 : 6845 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 6846 (ins VecListFourQ:$list, addrmode6:$addr, 6847 rGPR:$Rm, pred:$p)>; 6848 6849// VST4 single-lane pseudo-instructions. These need special handling for 6850// the lane index that an InstAlias can't handle, so we use these instead. 6851def VST4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", 6852 (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6853def VST4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", 6854 (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6855def VST4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", 6856 (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6857def VST4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", 6858 (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6859def VST4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", 6860 (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6861 6862def VST4LNdWB_fixed_Asm_8 : 6863 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", 6864 (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6865def VST4LNdWB_fixed_Asm_16 : 6866 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", 6867 (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6868def VST4LNdWB_fixed_Asm_32 : 6869 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", 6870 (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6871def VST4LNqWB_fixed_Asm_16 : 6872 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", 6873 (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6874def VST4LNqWB_fixed_Asm_32 : 6875 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", 6876 (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6877def VST4LNdWB_register_Asm_8 : 6878 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", 6879 (ins VecListFourDByteIndexed:$list, addrmode6:$addr, 6880 rGPR:$Rm, pred:$p)>; 6881def VST4LNdWB_register_Asm_16 : 6882 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", 6883 (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, 6884 rGPR:$Rm, pred:$p)>; 6885def VST4LNdWB_register_Asm_32 : 6886 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", 6887 (ins VecListFourDWordIndexed:$list, addrmode6:$addr, 6888 rGPR:$Rm, pred:$p)>; 6889def VST4LNqWB_register_Asm_16 : 6890 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", 6891 (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, 6892 rGPR:$Rm, pred:$p)>; 6893def VST4LNqWB_register_Asm_32 : 6894 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", 6895 (ins VecListFourQWordIndexed:$list, addrmode6:$addr, 6896 rGPR:$Rm, pred:$p)>; 6897 6898 6899// VST4 multiple structure pseudo-instructions. These need special handling for 6900// the vector operands that the normal instructions don't yet model. 6901// FIXME: Remove these when the register classes and instructions are updated. 6902def VST4dAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", 6903 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; 6904def VST4dAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", 6905 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; 6906def VST4dAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", 6907 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; 6908def VST4qAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", 6909 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; 6910def VST4qAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", 6911 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; 6912def VST4qAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", 6913 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; 6914 6915def VST4dWB_fixed_Asm_8 : 6916 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", 6917 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; 6918def VST4dWB_fixed_Asm_16 : 6919 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", 6920 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; 6921def VST4dWB_fixed_Asm_32 : 6922 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", 6923 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; 6924def VST4qWB_fixed_Asm_8 : 6925 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", 6926 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; 6927def VST4qWB_fixed_Asm_16 : 6928 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", 6929 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; 6930def VST4qWB_fixed_Asm_32 : 6931 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", 6932 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; 6933def VST4dWB_register_Asm_8 : 6934 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", 6935 (ins VecListFourD:$list, addrmode6:$addr, 6936 rGPR:$Rm, pred:$p)>; 6937def VST4dWB_register_Asm_16 : 6938 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", 6939 (ins VecListFourD:$list, addrmode6:$addr, 6940 rGPR:$Rm, pred:$p)>; 6941def VST4dWB_register_Asm_32 : 6942 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", 6943 (ins VecListFourD:$list, addrmode6:$addr, 6944 rGPR:$Rm, pred:$p)>; 6945def VST4qWB_register_Asm_8 : 6946 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", 6947 (ins VecListFourQ:$list, addrmode6:$addr, 6948 rGPR:$Rm, pred:$p)>; 6949def VST4qWB_register_Asm_16 : 6950 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", 6951 (ins VecListFourQ:$list, addrmode6:$addr, 6952 rGPR:$Rm, pred:$p)>; 6953def VST4qWB_register_Asm_32 : 6954 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", 6955 (ins VecListFourQ:$list, addrmode6:$addr, 6956 rGPR:$Rm, pred:$p)>; 6957 6958// VMOV/VMVN takes an optional datatype suffix 6959defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", 6960 (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>; 6961defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", 6962 (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>; 6963 6964defm : NEONDTAnyInstAlias<"vmvn${p}", "$Vd, $Vm", 6965 (VMVNd DPR:$Vd, DPR:$Vm, pred:$p)>; 6966defm : NEONDTAnyInstAlias<"vmvn${p}", "$Vd, $Vm", 6967 (VMVNq QPR:$Vd, QPR:$Vm, pred:$p)>; 6968 6969// VCLT (register) is an assembler alias for VCGT w/ the operands reversed. 6970// D-register versions. 6971def : NEONInstAlias<"vcle${p}.s8 $Dd, $Dn, $Dm", 6972 (VCGEsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 6973def : NEONInstAlias<"vcle${p}.s16 $Dd, $Dn, $Dm", 6974 (VCGEsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 6975def : NEONInstAlias<"vcle${p}.s32 $Dd, $Dn, $Dm", 6976 (VCGEsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 6977def : NEONInstAlias<"vcle${p}.u8 $Dd, $Dn, $Dm", 6978 (VCGEuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 6979def : NEONInstAlias<"vcle${p}.u16 $Dd, $Dn, $Dm", 6980 (VCGEuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 6981def : NEONInstAlias<"vcle${p}.u32 $Dd, $Dn, $Dm", 6982 (VCGEuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 6983def : NEONInstAlias<"vcle${p}.f32 $Dd, $Dn, $Dm", 6984 (VCGEfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 6985// Q-register versions. 6986def : NEONInstAlias<"vcle${p}.s8 $Qd, $Qn, $Qm", 6987 (VCGEsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 6988def : NEONInstAlias<"vcle${p}.s16 $Qd, $Qn, $Qm", 6989 (VCGEsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 6990def : NEONInstAlias<"vcle${p}.s32 $Qd, $Qn, $Qm", 6991 (VCGEsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 6992def : NEONInstAlias<"vcle${p}.u8 $Qd, $Qn, $Qm", 6993 (VCGEuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 6994def : NEONInstAlias<"vcle${p}.u16 $Qd, $Qn, $Qm", 6995 (VCGEuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 6996def : NEONInstAlias<"vcle${p}.u32 $Qd, $Qn, $Qm", 6997 (VCGEuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 6998def : NEONInstAlias<"vcle${p}.f32 $Qd, $Qn, $Qm", 6999 (VCGEfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 7000 7001// VCLT (register) is an assembler alias for VCGT w/ the operands reversed. 7002// D-register versions. 7003def : NEONInstAlias<"vclt${p}.s8 $Dd, $Dn, $Dm", 7004 (VCGTsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 7005def : NEONInstAlias<"vclt${p}.s16 $Dd, $Dn, $Dm", 7006 (VCGTsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 7007def : NEONInstAlias<"vclt${p}.s32 $Dd, $Dn, $Dm", 7008 (VCGTsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 7009def : NEONInstAlias<"vclt${p}.u8 $Dd, $Dn, $Dm", 7010 (VCGTuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 7011def : NEONInstAlias<"vclt${p}.u16 $Dd, $Dn, $Dm", 7012 (VCGTuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 7013def : NEONInstAlias<"vclt${p}.u32 $Dd, $Dn, $Dm", 7014 (VCGTuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 7015def : NEONInstAlias<"vclt${p}.f32 $Dd, $Dn, $Dm", 7016 (VCGTfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 7017// Q-register versions. 7018def : NEONInstAlias<"vclt${p}.s8 $Qd, $Qn, $Qm", 7019 (VCGTsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 7020def : NEONInstAlias<"vclt${p}.s16 $Qd, $Qn, $Qm", 7021 (VCGTsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 7022def : NEONInstAlias<"vclt${p}.s32 $Qd, $Qn, $Qm", 7023 (VCGTsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 7024def : NEONInstAlias<"vclt${p}.u8 $Qd, $Qn, $Qm", 7025 (VCGTuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 7026def : NEONInstAlias<"vclt${p}.u16 $Qd, $Qn, $Qm", 7027 (VCGTuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 7028def : NEONInstAlias<"vclt${p}.u32 $Qd, $Qn, $Qm", 7029 (VCGTuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 7030def : NEONInstAlias<"vclt${p}.f32 $Qd, $Qn, $Qm", 7031 (VCGTfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 7032 7033// VSWP allows, but does not require, a type suffix. 7034defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm", 7035 (VSWPd DPR:$Vd, DPR:$Vm, pred:$p)>; 7036defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm", 7037 (VSWPq QPR:$Vd, QPR:$Vm, pred:$p)>; 7038 7039// VBIF, VBIT, and VBSL allow, but do not require, a type suffix. 7040defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm", 7041 (VBIFd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 7042defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm", 7043 (VBITd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 7044defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm", 7045 (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 7046defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm", 7047 (VBIFq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 7048defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm", 7049 (VBITq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 7050defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm", 7051 (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 7052 7053// "vmov Rd, #-imm" can be handled via "vmvn". 7054def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm", 7055 (VMVNv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; 7056def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm", 7057 (VMVNv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; 7058def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm", 7059 (VMOVv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; 7060def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm", 7061 (VMOVv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; 7062 7063// 'gas' compatibility aliases for quad-word instructions. Strictly speaking, 7064// these should restrict to just the Q register variants, but the register 7065// classes are enough to match correctly regardless, so we keep it simple 7066// and just use MnemonicAlias. 7067def : NEONMnemonicAlias<"vbicq", "vbic">; 7068def : NEONMnemonicAlias<"vandq", "vand">; 7069def : NEONMnemonicAlias<"veorq", "veor">; 7070def : NEONMnemonicAlias<"vorrq", "vorr">; 7071 7072def : NEONMnemonicAlias<"vmovq", "vmov">; 7073def : NEONMnemonicAlias<"vmvnq", "vmvn">; 7074// Explicit versions for floating point so that the FPImm variants get 7075// handled early. The parser gets confused otherwise. 7076def : NEONMnemonicAlias<"vmovq.f32", "vmov.f32">; 7077def : NEONMnemonicAlias<"vmovq.f64", "vmov.f64">; 7078 7079def : NEONMnemonicAlias<"vaddq", "vadd">; 7080def : NEONMnemonicAlias<"vsubq", "vsub">; 7081 7082def : NEONMnemonicAlias<"vminq", "vmin">; 7083def : NEONMnemonicAlias<"vmaxq", "vmax">; 7084 7085def : NEONMnemonicAlias<"vmulq", "vmul">; 7086 7087def : NEONMnemonicAlias<"vabsq", "vabs">; 7088 7089def : NEONMnemonicAlias<"vshlq", "vshl">; 7090def : NEONMnemonicAlias<"vshrq", "vshr">; 7091 7092def : NEONMnemonicAlias<"vcvtq", "vcvt">; 7093 7094def : NEONMnemonicAlias<"vcleq", "vcle">; 7095def : NEONMnemonicAlias<"vceqq", "vceq">; 7096 7097def : NEONMnemonicAlias<"vzipq", "vzip">; 7098def : NEONMnemonicAlias<"vswpq", "vswp">; 7099 7100def : NEONMnemonicAlias<"vrecpeq.f32", "vrecpe.f32">; 7101def : NEONMnemonicAlias<"vrecpeq.u32", "vrecpe.u32">; 7102 7103 7104// Alias for loading floating point immediates that aren't representable 7105// using the vmov.f32 encoding but the bitpattern is representable using 7106// the .i32 encoding. 7107def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm", 7108 (VMOVv4i32 QPR:$Vd, nImmVMOVI32:$imm, pred:$p)>; 7109def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm", 7110 (VMOVv2i32 DPR:$Vd, nImmVMOVI32:$imm, pred:$p)>; 7111