AArch64InstrNEON.td revision 263508
1//===-- AArch64InstrNEON.td - NEON support for AArch64 -----*- tablegen -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file describes the AArch64 NEON instruction set. 11// 12//===----------------------------------------------------------------------===// 13 14//===----------------------------------------------------------------------===// 15// NEON-specific DAG Nodes. 16//===----------------------------------------------------------------------===// 17def Neon_bsl : SDNode<"AArch64ISD::NEON_BSL", SDTypeProfile<1, 3, 18 [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, 19 SDTCisSameAs<0, 3>]>>; 20 21// (outs Result), (ins Imm, OpCmode) 22def SDT_Neon_movi : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVT<1, i32>]>; 23 24def Neon_movi : SDNode<"AArch64ISD::NEON_MOVIMM", SDT_Neon_movi>; 25 26def Neon_mvni : SDNode<"AArch64ISD::NEON_MVNIMM", SDT_Neon_movi>; 27 28// (outs Result), (ins Imm) 29def Neon_fmovi : SDNode<"AArch64ISD::NEON_FMOVIMM", SDTypeProfile<1, 1, 30 [SDTCisVec<0>, SDTCisVT<1, i32>]>>; 31 32// (outs Result), (ins LHS, RHS, CondCode) 33def Neon_cmp : SDNode<"AArch64ISD::NEON_CMP", SDTypeProfile<1, 3, 34 [SDTCisVec<0>, SDTCisSameAs<1, 2>]>>; 35 36// (outs Result), (ins LHS, 0/0.0 constant, CondCode) 37def Neon_cmpz : SDNode<"AArch64ISD::NEON_CMPZ", SDTypeProfile<1, 3, 38 [SDTCisVec<0>, SDTCisVec<1>]>>; 39 40// (outs Result), (ins LHS, RHS) 41def Neon_tst : SDNode<"AArch64ISD::NEON_TST", SDTypeProfile<1, 2, 42 [SDTCisVec<0>, SDTCisSameAs<1, 2>]>>; 43 44def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, 45 SDTCisVT<2, i32>]>; 46def Neon_sqrshlImm : SDNode<"AArch64ISD::NEON_QSHLs", SDTARMVSH>; 47def Neon_uqrshlImm : SDNode<"AArch64ISD::NEON_QSHLu", SDTARMVSH>; 48 49def SDTPERMUTE : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, 50 SDTCisSameAs<0, 2>]>; 51def Neon_uzp1 : SDNode<"AArch64ISD::NEON_UZP1", SDTPERMUTE>; 52def Neon_uzp2 : SDNode<"AArch64ISD::NEON_UZP2", SDTPERMUTE>; 53def Neon_zip1 : SDNode<"AArch64ISD::NEON_ZIP1", SDTPERMUTE>; 54def Neon_zip2 : SDNode<"AArch64ISD::NEON_ZIP2", SDTPERMUTE>; 55def Neon_trn1 : SDNode<"AArch64ISD::NEON_TRN1", SDTPERMUTE>; 56def Neon_trn2 : SDNode<"AArch64ISD::NEON_TRN2", SDTPERMUTE>; 57 58def SDTVSHUF : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0, 1>]>; 59def Neon_rev64 : SDNode<"AArch64ISD::NEON_REV64", SDTVSHUF>; 60def Neon_rev32 : SDNode<"AArch64ISD::NEON_REV32", SDTVSHUF>; 61def Neon_rev16 : SDNode<"AArch64ISD::NEON_REV16", SDTVSHUF>; 62def Neon_vdup : SDNode<"AArch64ISD::NEON_VDUP", SDTypeProfile<1, 1, 63 [SDTCisVec<0>]>>; 64def Neon_vduplane : SDNode<"AArch64ISD::NEON_VDUPLANE", SDTypeProfile<1, 2, 65 [SDTCisVec<0>, SDTCisVec<1>, SDTCisVT<2, i64>]>>; 66def Neon_vextract : SDNode<"AArch64ISD::NEON_VEXTRACT", SDTypeProfile<1, 3, 67 [SDTCisVec<0>, SDTCisSameAs<0, 1>, 68 SDTCisSameAs<0, 2>, SDTCisVT<3, i64>]>>; 69 70def SDT_assertext : SDTypeProfile<1, 1, 71 [SDTCisInt<0>, SDTCisInt<1>, SDTCisSameAs<1, 0>]>; 72def assertsext : SDNode<"ISD::AssertSext", SDT_assertext>; 73def assertzext : SDNode<"ISD::AssertZext", SDT_assertext>; 74 75//===----------------------------------------------------------------------===// 76// Multiclasses 77//===----------------------------------------------------------------------===// 78 79multiclass NeonI_3VSame_B_sizes<bit u, bits<2> size, bits<5> opcode, 80 string asmop, SDPatternOperator opnode8B, 81 SDPatternOperator opnode16B, 82 bit Commutable = 0> { 83 let isCommutable = Commutable in { 84 def _8B : NeonI_3VSame<0b0, u, size, opcode, 85 (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm), 86 asmop # "\t$Rd.8b, $Rn.8b, $Rm.8b", 87 [(set (v8i8 VPR64:$Rd), 88 (v8i8 (opnode8B (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))))], 89 NoItinerary>; 90 91 def _16B : NeonI_3VSame<0b1, u, size, opcode, 92 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm), 93 asmop # "\t$Rd.16b, $Rn.16b, $Rm.16b", 94 [(set (v16i8 VPR128:$Rd), 95 (v16i8 (opnode16B (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))))], 96 NoItinerary>; 97 } 98 99} 100 101multiclass NeonI_3VSame_HS_sizes<bit u, bits<5> opcode, 102 string asmop, SDPatternOperator opnode, 103 bit Commutable = 0> { 104 let isCommutable = Commutable in { 105 def _4H : NeonI_3VSame<0b0, u, 0b01, opcode, 106 (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm), 107 asmop # "\t$Rd.4h, $Rn.4h, $Rm.4h", 108 [(set (v4i16 VPR64:$Rd), 109 (v4i16 (opnode (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))))], 110 NoItinerary>; 111 112 def _8H : NeonI_3VSame<0b1, u, 0b01, opcode, 113 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm), 114 asmop # "\t$Rd.8h, $Rn.8h, $Rm.8h", 115 [(set (v8i16 VPR128:$Rd), 116 (v8i16 (opnode (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))))], 117 NoItinerary>; 118 119 def _2S : NeonI_3VSame<0b0, u, 0b10, opcode, 120 (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm), 121 asmop # "\t$Rd.2s, $Rn.2s, $Rm.2s", 122 [(set (v2i32 VPR64:$Rd), 123 (v2i32 (opnode (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))))], 124 NoItinerary>; 125 126 def _4S : NeonI_3VSame<0b1, u, 0b10, opcode, 127 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm), 128 asmop # "\t$Rd.4s, $Rn.4s, $Rm.4s", 129 [(set (v4i32 VPR128:$Rd), 130 (v4i32 (opnode (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))))], 131 NoItinerary>; 132 } 133} 134multiclass NeonI_3VSame_BHS_sizes<bit u, bits<5> opcode, 135 string asmop, SDPatternOperator opnode, 136 bit Commutable = 0> 137 : NeonI_3VSame_HS_sizes<u, opcode, asmop, opnode, Commutable> { 138 let isCommutable = Commutable in { 139 def _8B : NeonI_3VSame<0b0, u, 0b00, opcode, 140 (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm), 141 asmop # "\t$Rd.8b, $Rn.8b, $Rm.8b", 142 [(set (v8i8 VPR64:$Rd), 143 (v8i8 (opnode (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))))], 144 NoItinerary>; 145 146 def _16B : NeonI_3VSame<0b1, u, 0b00, opcode, 147 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm), 148 asmop # "\t$Rd.16b, $Rn.16b, $Rm.16b", 149 [(set (v16i8 VPR128:$Rd), 150 (v16i8 (opnode (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))))], 151 NoItinerary>; 152 } 153} 154 155multiclass NeonI_3VSame_BHSD_sizes<bit u, bits<5> opcode, 156 string asmop, SDPatternOperator opnode, 157 bit Commutable = 0> 158 : NeonI_3VSame_BHS_sizes<u, opcode, asmop, opnode, Commutable> { 159 let isCommutable = Commutable in { 160 def _2D : NeonI_3VSame<0b1, u, 0b11, opcode, 161 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm), 162 asmop # "\t$Rd.2d, $Rn.2d, $Rm.2d", 163 [(set (v2i64 VPR128:$Rd), 164 (v2i64 (opnode (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))))], 165 NoItinerary>; 166 } 167} 168 169// Multiclass NeonI_3VSame_SD_sizes: Operand types are floating point types, 170// but Result types can be integer or floating point types. 171multiclass NeonI_3VSame_SD_sizes<bit u, bit size, bits<5> opcode, 172 string asmop, SDPatternOperator opnode2S, 173 SDPatternOperator opnode4S, 174 SDPatternOperator opnode2D, 175 ValueType ResTy2S, ValueType ResTy4S, 176 ValueType ResTy2D, bit Commutable = 0> { 177 let isCommutable = Commutable in { 178 def _2S : NeonI_3VSame<0b0, u, {size, 0b0}, opcode, 179 (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm), 180 asmop # "\t$Rd.2s, $Rn.2s, $Rm.2s", 181 [(set (ResTy2S VPR64:$Rd), 182 (ResTy2S (opnode2S (v2f32 VPR64:$Rn), (v2f32 VPR64:$Rm))))], 183 NoItinerary>; 184 185 def _4S : NeonI_3VSame<0b1, u, {size, 0b0}, opcode, 186 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm), 187 asmop # "\t$Rd.4s, $Rn.4s, $Rm.4s", 188 [(set (ResTy4S VPR128:$Rd), 189 (ResTy4S (opnode4S (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rm))))], 190 NoItinerary>; 191 192 def _2D : NeonI_3VSame<0b1, u, {size, 0b1}, opcode, 193 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm), 194 asmop # "\t$Rd.2d, $Rn.2d, $Rm.2d", 195 [(set (ResTy2D VPR128:$Rd), 196 (ResTy2D (opnode2D (v2f64 VPR128:$Rn), (v2f64 VPR128:$Rm))))], 197 NoItinerary>; 198 } 199} 200 201//===----------------------------------------------------------------------===// 202// Instruction Definitions 203//===----------------------------------------------------------------------===// 204 205// Vector Arithmetic Instructions 206 207// Vector Add (Integer and Floating-Point) 208 209defm ADDvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b10000, "add", add, 1>; 210defm FADDvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11010, "fadd", fadd, fadd, fadd, 211 v2f32, v4f32, v2f64, 1>; 212 213// Vector Sub (Integer and Floating-Point) 214 215defm SUBvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b10000, "sub", sub, 0>; 216defm FSUBvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11010, "fsub", fsub, fsub, fsub, 217 v2f32, v4f32, v2f64, 0>; 218 219// Vector Multiply (Integer and Floating-Point) 220 221defm MULvvv : NeonI_3VSame_BHS_sizes<0b0, 0b10011, "mul", mul, 1>; 222defm FMULvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11011, "fmul", fmul, fmul, fmul, 223 v2f32, v4f32, v2f64, 1>; 224 225// Vector Multiply (Polynomial) 226 227defm PMULvvv : NeonI_3VSame_B_sizes<0b1, 0b00, 0b10011, "pmul", 228 int_arm_neon_vmulp, int_arm_neon_vmulp, 1>; 229 230// Vector Multiply-accumulate and Multiply-subtract (Integer) 231 232// class NeonI_3VSame_Constraint_impl: NeonI_3VSame with no data type and 233// two operands constraints. 234class NeonI_3VSame_Constraint_impl<string asmop, string asmlane, 235 RegisterOperand VPRC, ValueType OpTy, bit q, bit u, bits<2> size, 236 bits<5> opcode, SDPatternOperator opnode> 237 : NeonI_3VSame<q, u, size, opcode, 238 (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, VPRC:$Rm), 239 asmop # "\t$Rd" # asmlane # ", $Rn" # asmlane # ", $Rm" # asmlane, 240 [(set (OpTy VPRC:$Rd), 241 (OpTy (opnode (OpTy VPRC:$src), (OpTy VPRC:$Rn), (OpTy VPRC:$Rm))))], 242 NoItinerary> { 243 let Constraints = "$src = $Rd"; 244} 245 246def Neon_mla : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm), 247 (add node:$Ra, (mul node:$Rn, node:$Rm))>; 248 249def Neon_mls : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm), 250 (sub node:$Ra, (mul node:$Rn, node:$Rm))>; 251 252 253def MLAvvv_8B: NeonI_3VSame_Constraint_impl<"mla", ".8b", VPR64, v8i8, 254 0b0, 0b0, 0b00, 0b10010, Neon_mla>; 255def MLAvvv_16B: NeonI_3VSame_Constraint_impl<"mla", ".16b", VPR128, v16i8, 256 0b1, 0b0, 0b00, 0b10010, Neon_mla>; 257def MLAvvv_4H: NeonI_3VSame_Constraint_impl<"mla", ".4h", VPR64, v4i16, 258 0b0, 0b0, 0b01, 0b10010, Neon_mla>; 259def MLAvvv_8H: NeonI_3VSame_Constraint_impl<"mla", ".8h", VPR128, v8i16, 260 0b1, 0b0, 0b01, 0b10010, Neon_mla>; 261def MLAvvv_2S: NeonI_3VSame_Constraint_impl<"mla", ".2s", VPR64, v2i32, 262 0b0, 0b0, 0b10, 0b10010, Neon_mla>; 263def MLAvvv_4S: NeonI_3VSame_Constraint_impl<"mla", ".4s", VPR128, v4i32, 264 0b1, 0b0, 0b10, 0b10010, Neon_mla>; 265 266def MLSvvv_8B: NeonI_3VSame_Constraint_impl<"mls", ".8b", VPR64, v8i8, 267 0b0, 0b1, 0b00, 0b10010, Neon_mls>; 268def MLSvvv_16B: NeonI_3VSame_Constraint_impl<"mls", ".16b", VPR128, v16i8, 269 0b1, 0b1, 0b00, 0b10010, Neon_mls>; 270def MLSvvv_4H: NeonI_3VSame_Constraint_impl<"mls", ".4h", VPR64, v4i16, 271 0b0, 0b1, 0b01, 0b10010, Neon_mls>; 272def MLSvvv_8H: NeonI_3VSame_Constraint_impl<"mls", ".8h", VPR128, v8i16, 273 0b1, 0b1, 0b01, 0b10010, Neon_mls>; 274def MLSvvv_2S: NeonI_3VSame_Constraint_impl<"mls", ".2s", VPR64, v2i32, 275 0b0, 0b1, 0b10, 0b10010, Neon_mls>; 276def MLSvvv_4S: NeonI_3VSame_Constraint_impl<"mls", ".4s", VPR128, v4i32, 277 0b1, 0b1, 0b10, 0b10010, Neon_mls>; 278 279// Vector Multiply-accumulate and Multiply-subtract (Floating Point) 280 281def Neon_fmla : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm), 282 (fadd node:$Ra, (fmul node:$Rn, node:$Rm))>; 283 284def Neon_fmls : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm), 285 (fsub node:$Ra, (fmul node:$Rn, node:$Rm))>; 286 287let Predicates = [HasNEON, UseFusedMAC] in { 288def FMLAvvv_2S: NeonI_3VSame_Constraint_impl<"fmla", ".2s", VPR64, v2f32, 289 0b0, 0b0, 0b00, 0b11001, Neon_fmla>; 290def FMLAvvv_4S: NeonI_3VSame_Constraint_impl<"fmla", ".4s", VPR128, v4f32, 291 0b1, 0b0, 0b00, 0b11001, Neon_fmla>; 292def FMLAvvv_2D: NeonI_3VSame_Constraint_impl<"fmla", ".2d", VPR128, v2f64, 293 0b1, 0b0, 0b01, 0b11001, Neon_fmla>; 294 295def FMLSvvv_2S: NeonI_3VSame_Constraint_impl<"fmls", ".2s", VPR64, v2f32, 296 0b0, 0b0, 0b10, 0b11001, Neon_fmls>; 297def FMLSvvv_4S: NeonI_3VSame_Constraint_impl<"fmls", ".4s", VPR128, v4f32, 298 0b1, 0b0, 0b10, 0b11001, Neon_fmls>; 299def FMLSvvv_2D: NeonI_3VSame_Constraint_impl<"fmls", ".2d", VPR128, v2f64, 300 0b1, 0b0, 0b11, 0b11001, Neon_fmls>; 301} 302 303// We're also allowed to match the fma instruction regardless of compile 304// options. 305def : Pat<(v2f32 (fma VPR64:$Rn, VPR64:$Rm, VPR64:$Ra)), 306 (FMLAvvv_2S VPR64:$Ra, VPR64:$Rn, VPR64:$Rm)>; 307def : Pat<(v4f32 (fma VPR128:$Rn, VPR128:$Rm, VPR128:$Ra)), 308 (FMLAvvv_4S VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>; 309def : Pat<(v2f64 (fma VPR128:$Rn, VPR128:$Rm, VPR128:$Ra)), 310 (FMLAvvv_2D VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>; 311 312def : Pat<(v2f32 (fma (fneg VPR64:$Rn), VPR64:$Rm, VPR64:$Ra)), 313 (FMLSvvv_2S VPR64:$Ra, VPR64:$Rn, VPR64:$Rm)>; 314def : Pat<(v4f32 (fma (fneg VPR128:$Rn), VPR128:$Rm, VPR128:$Ra)), 315 (FMLSvvv_4S VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>; 316def : Pat<(v2f64 (fma (fneg VPR128:$Rn), VPR128:$Rm, VPR128:$Ra)), 317 (FMLSvvv_2D VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>; 318 319// Vector Divide (Floating-Point) 320 321defm FDIVvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11111, "fdiv", fdiv, fdiv, fdiv, 322 v2f32, v4f32, v2f64, 0>; 323 324// Vector Bitwise Operations 325 326// Vector Bitwise AND 327 328defm ANDvvv : NeonI_3VSame_B_sizes<0b0, 0b00, 0b00011, "and", and, and, 1>; 329 330// Vector Bitwise Exclusive OR 331 332defm EORvvv : NeonI_3VSame_B_sizes<0b1, 0b00, 0b00011, "eor", xor, xor, 1>; 333 334// Vector Bitwise OR 335 336defm ORRvvv : NeonI_3VSame_B_sizes<0b0, 0b10, 0b00011, "orr", or, or, 1>; 337 338// ORR disassembled as MOV if Vn==Vm 339 340// Vector Move - register 341// Alias for ORR if Vn=Vm. 342// FIXME: This is actually the preferred syntax but TableGen can't deal with 343// custom printing of aliases. 344def : NeonInstAlias<"mov $Rd.8b, $Rn.8b", 345 (ORRvvv_8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rn), 0>; 346def : NeonInstAlias<"mov $Rd.16b, $Rn.16b", 347 (ORRvvv_16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rn), 0>; 348 349// The MOVI instruction takes two immediate operands. The first is the 350// immediate encoding, while the second is the cmode. A cmode of 14, or 351// 0b1110, produces a MOVI operation, rather than a MVNI, ORR, or BIC. 352def Neon_AllZero : PatFrag<(ops), (Neon_movi (i32 0), (i32 14))>; 353def Neon_AllOne : PatFrag<(ops), (Neon_movi (i32 255), (i32 14))>; 354 355def Neon_not8B : PatFrag<(ops node:$in), 356 (xor node:$in, (bitconvert (v8i8 Neon_AllOne)))>; 357def Neon_not16B : PatFrag<(ops node:$in), 358 (xor node:$in, (bitconvert (v16i8 Neon_AllOne)))>; 359 360def Neon_orn8B : PatFrag<(ops node:$Rn, node:$Rm), 361 (or node:$Rn, (Neon_not8B node:$Rm))>; 362 363def Neon_orn16B : PatFrag<(ops node:$Rn, node:$Rm), 364 (or node:$Rn, (Neon_not16B node:$Rm))>; 365 366def Neon_bic8B : PatFrag<(ops node:$Rn, node:$Rm), 367 (and node:$Rn, (Neon_not8B node:$Rm))>; 368 369def Neon_bic16B : PatFrag<(ops node:$Rn, node:$Rm), 370 (and node:$Rn, (Neon_not16B node:$Rm))>; 371 372 373// Vector Bitwise OR NOT - register 374 375defm ORNvvv : NeonI_3VSame_B_sizes<0b0, 0b11, 0b00011, "orn", 376 Neon_orn8B, Neon_orn16B, 0>; 377 378// Vector Bitwise Bit Clear (AND NOT) - register 379 380defm BICvvv : NeonI_3VSame_B_sizes<0b0, 0b01, 0b00011, "bic", 381 Neon_bic8B, Neon_bic16B, 0>; 382 383multiclass Neon_bitwise2V_patterns<SDPatternOperator opnode8B, 384 SDPatternOperator opnode16B, 385 Instruction INST8B, 386 Instruction INST16B> { 387 def : Pat<(v2i32 (opnode8B VPR64:$Rn, VPR64:$Rm)), 388 (INST8B VPR64:$Rn, VPR64:$Rm)>; 389 def : Pat<(v4i16 (opnode8B VPR64:$Rn, VPR64:$Rm)), 390 (INST8B VPR64:$Rn, VPR64:$Rm)>; 391 def : Pat<(v1i64 (opnode8B VPR64:$Rn, VPR64:$Rm)), 392 (INST8B VPR64:$Rn, VPR64:$Rm)>; 393 def : Pat<(v4i32 (opnode16B VPR128:$Rn, VPR128:$Rm)), 394 (INST16B VPR128:$Rn, VPR128:$Rm)>; 395 def : Pat<(v8i16 (opnode16B VPR128:$Rn, VPR128:$Rm)), 396 (INST16B VPR128:$Rn, VPR128:$Rm)>; 397 def : Pat<(v2i64 (opnode16B VPR128:$Rn, VPR128:$Rm)), 398 (INST16B VPR128:$Rn, VPR128:$Rm)>; 399} 400 401// Additional patterns for bitwise instructions AND, EOR, ORR, BIC, ORN 402defm : Neon_bitwise2V_patterns<and, and, ANDvvv_8B, ANDvvv_16B>; 403defm : Neon_bitwise2V_patterns<or, or, ORRvvv_8B, ORRvvv_16B>; 404defm : Neon_bitwise2V_patterns<xor, xor, EORvvv_8B, EORvvv_16B>; 405defm : Neon_bitwise2V_patterns<Neon_bic8B, Neon_bic16B, BICvvv_8B, BICvvv_16B>; 406defm : Neon_bitwise2V_patterns<Neon_orn8B, Neon_orn16B, ORNvvv_8B, ORNvvv_16B>; 407 408// Vector Bitwise Select 409def BSLvvv_8B : NeonI_3VSame_Constraint_impl<"bsl", ".8b", VPR64, v8i8, 410 0b0, 0b1, 0b01, 0b00011, Neon_bsl>; 411 412def BSLvvv_16B : NeonI_3VSame_Constraint_impl<"bsl", ".16b", VPR128, v16i8, 413 0b1, 0b1, 0b01, 0b00011, Neon_bsl>; 414 415multiclass Neon_bitwise3V_patterns<SDPatternOperator opnode, 416 Instruction INST8B, 417 Instruction INST16B> { 418 // Disassociate type from instruction definition 419 def : Pat<(v2i32 (opnode VPR64:$src,VPR64:$Rn, VPR64:$Rm)), 420 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>; 421 def : Pat<(v4i16 (opnode VPR64:$src, VPR64:$Rn, VPR64:$Rm)), 422 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>; 423 def : Pat<(v1i64 (opnode VPR64:$src, VPR64:$Rn, VPR64:$Rm)), 424 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>; 425 def : Pat<(v4i32 (opnode VPR128:$src, VPR128:$Rn, VPR128:$Rm)), 426 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>; 427 def : Pat<(v8i16 (opnode VPR128:$src, VPR128:$Rn, VPR128:$Rm)), 428 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>; 429 def : Pat<(v2i64 (opnode VPR128:$src, VPR128:$Rn, VPR128:$Rm)), 430 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>; 431 432 // Allow to match BSL instruction pattern with non-constant operand 433 def : Pat<(v8i8 (or (and VPR64:$Rn, VPR64:$Rd), 434 (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))), 435 (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>; 436 def : Pat<(v4i16 (or (and VPR64:$Rn, VPR64:$Rd), 437 (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))), 438 (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>; 439 def : Pat<(v2i32 (or (and VPR64:$Rn, VPR64:$Rd), 440 (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))), 441 (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>; 442 def : Pat<(v1i64 (or (and VPR64:$Rn, VPR64:$Rd), 443 (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))), 444 (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>; 445 def : Pat<(v16i8 (or (and VPR128:$Rn, VPR128:$Rd), 446 (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))), 447 (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>; 448 def : Pat<(v8i16 (or (and VPR128:$Rn, VPR128:$Rd), 449 (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))), 450 (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>; 451 def : Pat<(v4i32 (or (and VPR128:$Rn, VPR128:$Rd), 452 (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))), 453 (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>; 454 def : Pat<(v2i64 (or (and VPR128:$Rn, VPR128:$Rd), 455 (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))), 456 (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>; 457 458 // Allow to match llvm.arm.* intrinsics. 459 def : Pat<(v8i8 (int_arm_neon_vbsl (v8i8 VPR64:$src), 460 (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))), 461 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>; 462 def : Pat<(v4i16 (int_arm_neon_vbsl (v4i16 VPR64:$src), 463 (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))), 464 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>; 465 def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 VPR64:$src), 466 (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))), 467 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>; 468 def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 VPR64:$src), 469 (v1i64 VPR64:$Rn), (v1i64 VPR64:$Rm))), 470 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>; 471 def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 VPR64:$src), 472 (v2f32 VPR64:$Rn), (v2f32 VPR64:$Rm))), 473 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>; 474 def : Pat<(v1f64 (int_arm_neon_vbsl (v1f64 VPR64:$src), 475 (v1f64 VPR64:$Rn), (v1f64 VPR64:$Rm))), 476 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>; 477 def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 VPR128:$src), 478 (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))), 479 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>; 480 def : Pat<(v8i16 (int_arm_neon_vbsl (v8i16 VPR128:$src), 481 (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))), 482 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>; 483 def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 VPR128:$src), 484 (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))), 485 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>; 486 def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 VPR128:$src), 487 (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))), 488 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>; 489 def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 VPR128:$src), 490 (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rm))), 491 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>; 492 def : Pat<(v2f64 (int_arm_neon_vbsl (v2f64 VPR128:$src), 493 (v2f64 VPR128:$Rn), (v2f64 VPR128:$Rm))), 494 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>; 495} 496 497// Additional patterns for bitwise instruction BSL 498defm: Neon_bitwise3V_patterns<Neon_bsl, BSLvvv_8B, BSLvvv_16B>; 499 500def Neon_NoBSLop : PatFrag<(ops node:$src, node:$Rn, node:$Rm), 501 (Neon_bsl node:$src, node:$Rn, node:$Rm), 502 [{ (void)N; return false; }]>; 503 504// Vector Bitwise Insert if True 505 506def BITvvv_8B : NeonI_3VSame_Constraint_impl<"bit", ".8b", VPR64, v8i8, 507 0b0, 0b1, 0b10, 0b00011, Neon_NoBSLop>; 508def BITvvv_16B : NeonI_3VSame_Constraint_impl<"bit", ".16b", VPR128, v16i8, 509 0b1, 0b1, 0b10, 0b00011, Neon_NoBSLop>; 510 511// Vector Bitwise Insert if False 512 513def BIFvvv_8B : NeonI_3VSame_Constraint_impl<"bif", ".8b", VPR64, v8i8, 514 0b0, 0b1, 0b11, 0b00011, Neon_NoBSLop>; 515def BIFvvv_16B : NeonI_3VSame_Constraint_impl<"bif", ".16b", VPR128, v16i8, 516 0b1, 0b1, 0b11, 0b00011, Neon_NoBSLop>; 517 518// Vector Absolute Difference and Accumulate (Signed, Unsigned) 519 520def Neon_uaba : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm), 521 (add node:$Ra, (int_arm_neon_vabdu node:$Rn, node:$Rm))>; 522def Neon_saba : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm), 523 (add node:$Ra, (int_arm_neon_vabds node:$Rn, node:$Rm))>; 524 525// Vector Absolute Difference and Accumulate (Unsigned) 526def UABAvvv_8B : NeonI_3VSame_Constraint_impl<"uaba", ".8b", VPR64, v8i8, 527 0b0, 0b1, 0b00, 0b01111, Neon_uaba>; 528def UABAvvv_16B : NeonI_3VSame_Constraint_impl<"uaba", ".16b", VPR128, v16i8, 529 0b1, 0b1, 0b00, 0b01111, Neon_uaba>; 530def UABAvvv_4H : NeonI_3VSame_Constraint_impl<"uaba", ".4h", VPR64, v4i16, 531 0b0, 0b1, 0b01, 0b01111, Neon_uaba>; 532def UABAvvv_8H : NeonI_3VSame_Constraint_impl<"uaba", ".8h", VPR128, v8i16, 533 0b1, 0b1, 0b01, 0b01111, Neon_uaba>; 534def UABAvvv_2S : NeonI_3VSame_Constraint_impl<"uaba", ".2s", VPR64, v2i32, 535 0b0, 0b1, 0b10, 0b01111, Neon_uaba>; 536def UABAvvv_4S : NeonI_3VSame_Constraint_impl<"uaba", ".4s", VPR128, v4i32, 537 0b1, 0b1, 0b10, 0b01111, Neon_uaba>; 538 539// Vector Absolute Difference and Accumulate (Signed) 540def SABAvvv_8B : NeonI_3VSame_Constraint_impl<"saba", ".8b", VPR64, v8i8, 541 0b0, 0b0, 0b00, 0b01111, Neon_saba>; 542def SABAvvv_16B : NeonI_3VSame_Constraint_impl<"saba", ".16b", VPR128, v16i8, 543 0b1, 0b0, 0b00, 0b01111, Neon_saba>; 544def SABAvvv_4H : NeonI_3VSame_Constraint_impl<"saba", ".4h", VPR64, v4i16, 545 0b0, 0b0, 0b01, 0b01111, Neon_saba>; 546def SABAvvv_8H : NeonI_3VSame_Constraint_impl<"saba", ".8h", VPR128, v8i16, 547 0b1, 0b0, 0b01, 0b01111, Neon_saba>; 548def SABAvvv_2S : NeonI_3VSame_Constraint_impl<"saba", ".2s", VPR64, v2i32, 549 0b0, 0b0, 0b10, 0b01111, Neon_saba>; 550def SABAvvv_4S : NeonI_3VSame_Constraint_impl<"saba", ".4s", VPR128, v4i32, 551 0b1, 0b0, 0b10, 0b01111, Neon_saba>; 552 553 554// Vector Absolute Difference (Signed, Unsigned) 555defm UABDvvv : NeonI_3VSame_BHS_sizes<0b1, 0b01110, "uabd", int_arm_neon_vabdu, 0>; 556defm SABDvvv : NeonI_3VSame_BHS_sizes<0b0, 0b01110, "sabd", int_arm_neon_vabds, 0>; 557 558// Vector Absolute Difference (Floating Point) 559defm FABDvvv: NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11010, "fabd", 560 int_arm_neon_vabds, int_arm_neon_vabds, 561 int_arm_neon_vabds, v2f32, v4f32, v2f64, 0>; 562 563// Vector Reciprocal Step (Floating Point) 564defm FRECPSvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11111, "frecps", 565 int_arm_neon_vrecps, int_arm_neon_vrecps, 566 int_arm_neon_vrecps, 567 v2f32, v4f32, v2f64, 0>; 568 569// Vector Reciprocal Square Root Step (Floating Point) 570defm FRSQRTSvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11111, "frsqrts", 571 int_arm_neon_vrsqrts, 572 int_arm_neon_vrsqrts, 573 int_arm_neon_vrsqrts, 574 v2f32, v4f32, v2f64, 0>; 575 576// Vector Comparisons 577 578def Neon_cmeq : PatFrag<(ops node:$lhs, node:$rhs), 579 (Neon_cmp node:$lhs, node:$rhs, SETEQ)>; 580def Neon_cmphs : PatFrag<(ops node:$lhs, node:$rhs), 581 (Neon_cmp node:$lhs, node:$rhs, SETUGE)>; 582def Neon_cmge : PatFrag<(ops node:$lhs, node:$rhs), 583 (Neon_cmp node:$lhs, node:$rhs, SETGE)>; 584def Neon_cmhi : PatFrag<(ops node:$lhs, node:$rhs), 585 (Neon_cmp node:$lhs, node:$rhs, SETUGT)>; 586def Neon_cmgt : PatFrag<(ops node:$lhs, node:$rhs), 587 (Neon_cmp node:$lhs, node:$rhs, SETGT)>; 588 589// NeonI_compare_aliases class: swaps register operands to implement 590// comparison aliases, e.g., CMLE is alias for CMGE with operands reversed. 591class NeonI_compare_aliases<string asmop, string asmlane, 592 Instruction inst, RegisterOperand VPRC> 593 : NeonInstAlias<asmop # "\t$Rd" # asmlane #", $Rn" # asmlane # 594 ", $Rm" # asmlane, 595 (inst VPRC:$Rd, VPRC:$Rm, VPRC:$Rn), 0b0>; 596 597// Vector Comparisons (Integer) 598 599// Vector Compare Mask Equal (Integer) 600let isCommutable =1 in { 601defm CMEQvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b10001, "cmeq", Neon_cmeq, 0>; 602} 603 604// Vector Compare Mask Higher or Same (Unsigned Integer) 605defm CMHSvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00111, "cmhs", Neon_cmphs, 0>; 606 607// Vector Compare Mask Greater Than or Equal (Integer) 608defm CMGEvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00111, "cmge", Neon_cmge, 0>; 609 610// Vector Compare Mask Higher (Unsigned Integer) 611defm CMHIvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00110, "cmhi", Neon_cmhi, 0>; 612 613// Vector Compare Mask Greater Than (Integer) 614defm CMGTvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00110, "cmgt", Neon_cmgt, 0>; 615 616// Vector Compare Mask Bitwise Test (Integer) 617defm CMTSTvvv: NeonI_3VSame_BHSD_sizes<0b0, 0b10001, "cmtst", Neon_tst, 0>; 618 619// Vector Compare Mask Less or Same (Unsigned Integer) 620// CMLS is alias for CMHS with operands reversed. 621def CMLSvvv_8B : NeonI_compare_aliases<"cmls", ".8b", CMHSvvv_8B, VPR64>; 622def CMLSvvv_16B : NeonI_compare_aliases<"cmls", ".16b", CMHSvvv_16B, VPR128>; 623def CMLSvvv_4H : NeonI_compare_aliases<"cmls", ".4h", CMHSvvv_4H, VPR64>; 624def CMLSvvv_8H : NeonI_compare_aliases<"cmls", ".8h", CMHSvvv_8H, VPR128>; 625def CMLSvvv_2S : NeonI_compare_aliases<"cmls", ".2s", CMHSvvv_2S, VPR64>; 626def CMLSvvv_4S : NeonI_compare_aliases<"cmls", ".4s", CMHSvvv_4S, VPR128>; 627def CMLSvvv_2D : NeonI_compare_aliases<"cmls", ".2d", CMHSvvv_2D, VPR128>; 628 629// Vector Compare Mask Less Than or Equal (Integer) 630// CMLE is alias for CMGE with operands reversed. 631def CMLEvvv_8B : NeonI_compare_aliases<"cmle", ".8b", CMGEvvv_8B, VPR64>; 632def CMLEvvv_16B : NeonI_compare_aliases<"cmle", ".16b", CMGEvvv_16B, VPR128>; 633def CMLEvvv_4H : NeonI_compare_aliases<"cmle", ".4h", CMGEvvv_4H, VPR64>; 634def CMLEvvv_8H : NeonI_compare_aliases<"cmle", ".8h", CMGEvvv_8H, VPR128>; 635def CMLEvvv_2S : NeonI_compare_aliases<"cmle", ".2s", CMGEvvv_2S, VPR64>; 636def CMLEvvv_4S : NeonI_compare_aliases<"cmle", ".4s", CMGEvvv_4S, VPR128>; 637def CMLEvvv_2D : NeonI_compare_aliases<"cmle", ".2d", CMGEvvv_2D, VPR128>; 638 639// Vector Compare Mask Lower (Unsigned Integer) 640// CMLO is alias for CMHI with operands reversed. 641def CMLOvvv_8B : NeonI_compare_aliases<"cmlo", ".8b", CMHIvvv_8B, VPR64>; 642def CMLOvvv_16B : NeonI_compare_aliases<"cmlo", ".16b", CMHIvvv_16B, VPR128>; 643def CMLOvvv_4H : NeonI_compare_aliases<"cmlo", ".4h", CMHIvvv_4H, VPR64>; 644def CMLOvvv_8H : NeonI_compare_aliases<"cmlo", ".8h", CMHIvvv_8H, VPR128>; 645def CMLOvvv_2S : NeonI_compare_aliases<"cmlo", ".2s", CMHIvvv_2S, VPR64>; 646def CMLOvvv_4S : NeonI_compare_aliases<"cmlo", ".4s", CMHIvvv_4S, VPR128>; 647def CMLOvvv_2D : NeonI_compare_aliases<"cmlo", ".2d", CMHIvvv_2D, VPR128>; 648 649// Vector Compare Mask Less Than (Integer) 650// CMLT is alias for CMGT with operands reversed. 651def CMLTvvv_8B : NeonI_compare_aliases<"cmlt", ".8b", CMGTvvv_8B, VPR64>; 652def CMLTvvv_16B : NeonI_compare_aliases<"cmlt", ".16b", CMGTvvv_16B, VPR128>; 653def CMLTvvv_4H : NeonI_compare_aliases<"cmlt", ".4h", CMGTvvv_4H, VPR64>; 654def CMLTvvv_8H : NeonI_compare_aliases<"cmlt", ".8h", CMGTvvv_8H, VPR128>; 655def CMLTvvv_2S : NeonI_compare_aliases<"cmlt", ".2s", CMGTvvv_2S, VPR64>; 656def CMLTvvv_4S : NeonI_compare_aliases<"cmlt", ".4s", CMGTvvv_4S, VPR128>; 657def CMLTvvv_2D : NeonI_compare_aliases<"cmlt", ".2d", CMGTvvv_2D, VPR128>; 658 659 660def neon_uimm0_asmoperand : AsmOperandClass 661{ 662 let Name = "UImm0"; 663 let PredicateMethod = "isUImm<0>"; 664 let RenderMethod = "addImmOperands"; 665} 666 667def neon_uimm0 : Operand<i32>, ImmLeaf<i32, [{return Imm == 0;}]> { 668 let ParserMatchClass = neon_uimm0_asmoperand; 669 let PrintMethod = "printNeonUImm0Operand"; 670 671} 672 673multiclass NeonI_cmpz_sizes<bit u, bits<5> opcode, string asmop, CondCode CC> 674{ 675 def _8B : NeonI_2VMisc<0b0, u, 0b00, opcode, 676 (outs VPR64:$Rd), (ins VPR64:$Rn, neon_uimm0:$Imm), 677 asmop # "\t$Rd.8b, $Rn.8b, $Imm", 678 [(set (v8i8 VPR64:$Rd), 679 (v8i8 (Neon_cmpz (v8i8 VPR64:$Rn), (i32 imm:$Imm), CC)))], 680 NoItinerary>; 681 682 def _16B : NeonI_2VMisc<0b1, u, 0b00, opcode, 683 (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm), 684 asmop # "\t$Rd.16b, $Rn.16b, $Imm", 685 [(set (v16i8 VPR128:$Rd), 686 (v16i8 (Neon_cmpz (v16i8 VPR128:$Rn), (i32 imm:$Imm), CC)))], 687 NoItinerary>; 688 689 def _4H : NeonI_2VMisc<0b0, u, 0b01, opcode, 690 (outs VPR64:$Rd), (ins VPR64:$Rn, neon_uimm0:$Imm), 691 asmop # "\t$Rd.4h, $Rn.4h, $Imm", 692 [(set (v4i16 VPR64:$Rd), 693 (v4i16 (Neon_cmpz (v4i16 VPR64:$Rn), (i32 imm:$Imm), CC)))], 694 NoItinerary>; 695 696 def _8H : NeonI_2VMisc<0b1, u, 0b01, opcode, 697 (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm), 698 asmop # "\t$Rd.8h, $Rn.8h, $Imm", 699 [(set (v8i16 VPR128:$Rd), 700 (v8i16 (Neon_cmpz (v8i16 VPR128:$Rn), (i32 imm:$Imm), CC)))], 701 NoItinerary>; 702 703 def _2S : NeonI_2VMisc<0b0, u, 0b10, opcode, 704 (outs VPR64:$Rd), (ins VPR64:$Rn, neon_uimm0:$Imm), 705 asmop # "\t$Rd.2s, $Rn.2s, $Imm", 706 [(set (v2i32 VPR64:$Rd), 707 (v2i32 (Neon_cmpz (v2i32 VPR64:$Rn), (i32 imm:$Imm), CC)))], 708 NoItinerary>; 709 710 def _4S : NeonI_2VMisc<0b1, u, 0b10, opcode, 711 (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm), 712 asmop # "\t$Rd.4s, $Rn.4s, $Imm", 713 [(set (v4i32 VPR128:$Rd), 714 (v4i32 (Neon_cmpz (v4i32 VPR128:$Rn), (i32 imm:$Imm), CC)))], 715 NoItinerary>; 716 717 def _2D : NeonI_2VMisc<0b1, u, 0b11, opcode, 718 (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm), 719 asmop # "\t$Rd.2d, $Rn.2d, $Imm", 720 [(set (v2i64 VPR128:$Rd), 721 (v2i64 (Neon_cmpz (v2i64 VPR128:$Rn), (i32 imm:$Imm), CC)))], 722 NoItinerary>; 723} 724 725// Vector Compare Mask Equal to Zero (Integer) 726defm CMEQvvi : NeonI_cmpz_sizes<0b0, 0b01001, "cmeq", SETEQ>; 727 728// Vector Compare Mask Greater Than or Equal to Zero (Signed Integer) 729defm CMGEvvi : NeonI_cmpz_sizes<0b1, 0b01000, "cmge", SETGE>; 730 731// Vector Compare Mask Greater Than Zero (Signed Integer) 732defm CMGTvvi : NeonI_cmpz_sizes<0b0, 0b01000, "cmgt", SETGT>; 733 734// Vector Compare Mask Less Than or Equal To Zero (Signed Integer) 735defm CMLEvvi : NeonI_cmpz_sizes<0b1, 0b01001, "cmle", SETLE>; 736 737// Vector Compare Mask Less Than Zero (Signed Integer) 738defm CMLTvvi : NeonI_cmpz_sizes<0b0, 0b01010, "cmlt", SETLT>; 739 740// Vector Comparisons (Floating Point) 741 742// Vector Compare Mask Equal (Floating Point) 743let isCommutable =1 in { 744defm FCMEQvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11100, "fcmeq", Neon_cmeq, 745 Neon_cmeq, Neon_cmeq, 746 v2i32, v4i32, v2i64, 0>; 747} 748 749// Vector Compare Mask Greater Than Or Equal (Floating Point) 750defm FCMGEvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11100, "fcmge", Neon_cmge, 751 Neon_cmge, Neon_cmge, 752 v2i32, v4i32, v2i64, 0>; 753 754// Vector Compare Mask Greater Than (Floating Point) 755defm FCMGTvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11100, "fcmgt", Neon_cmgt, 756 Neon_cmgt, Neon_cmgt, 757 v2i32, v4i32, v2i64, 0>; 758 759// Vector Compare Mask Less Than Or Equal (Floating Point) 760// FCMLE is alias for FCMGE with operands reversed. 761def FCMLEvvv_2S : NeonI_compare_aliases<"fcmle", ".2s", FCMGEvvv_2S, VPR64>; 762def FCMLEvvv_4S : NeonI_compare_aliases<"fcmle", ".4s", FCMGEvvv_4S, VPR128>; 763def FCMLEvvv_2D : NeonI_compare_aliases<"fcmle", ".2d", FCMGEvvv_2D, VPR128>; 764 765// Vector Compare Mask Less Than (Floating Point) 766// FCMLT is alias for FCMGT with operands reversed. 767def FCMLTvvv_2S : NeonI_compare_aliases<"fcmlt", ".2s", FCMGTvvv_2S, VPR64>; 768def FCMLTvvv_4S : NeonI_compare_aliases<"fcmlt", ".4s", FCMGTvvv_4S, VPR128>; 769def FCMLTvvv_2D : NeonI_compare_aliases<"fcmlt", ".2d", FCMGTvvv_2D, VPR128>; 770 771 772multiclass NeonI_fpcmpz_sizes<bit u, bit size, bits<5> opcode, 773 string asmop, CondCode CC> 774{ 775 def _2S : NeonI_2VMisc<0b0, u, {size, 0b0}, opcode, 776 (outs VPR64:$Rd), (ins VPR64:$Rn, fpz32:$FPImm), 777 asmop # "\t$Rd.2s, $Rn.2s, $FPImm", 778 [(set (v2i32 VPR64:$Rd), 779 (v2i32 (Neon_cmpz (v2f32 VPR64:$Rn), (f32 fpimm:$FPImm), CC)))], 780 NoItinerary>; 781 782 def _4S : NeonI_2VMisc<0b1, u, {size, 0b0}, opcode, 783 (outs VPR128:$Rd), (ins VPR128:$Rn, fpz32:$FPImm), 784 asmop # "\t$Rd.4s, $Rn.4s, $FPImm", 785 [(set (v4i32 VPR128:$Rd), 786 (v4i32 (Neon_cmpz (v4f32 VPR128:$Rn), (f32 fpimm:$FPImm), CC)))], 787 NoItinerary>; 788 789 def _2D : NeonI_2VMisc<0b1, u, {size, 0b1}, opcode, 790 (outs VPR128:$Rd), (ins VPR128:$Rn, fpz32:$FPImm), 791 asmop # "\t$Rd.2d, $Rn.2d, $FPImm", 792 [(set (v2i64 VPR128:$Rd), 793 (v2i64 (Neon_cmpz (v2f64 VPR128:$Rn), (f32 fpimm:$FPImm), CC)))], 794 NoItinerary>; 795} 796 797// Vector Compare Mask Equal to Zero (Floating Point) 798defm FCMEQvvi : NeonI_fpcmpz_sizes<0b0, 0b1, 0b01101, "fcmeq", SETEQ>; 799 800// Vector Compare Mask Greater Than or Equal to Zero (Floating Point) 801defm FCMGEvvi : NeonI_fpcmpz_sizes<0b1, 0b1, 0b01100, "fcmge", SETGE>; 802 803// Vector Compare Mask Greater Than Zero (Floating Point) 804defm FCMGTvvi : NeonI_fpcmpz_sizes<0b0, 0b1, 0b01100, "fcmgt", SETGT>; 805 806// Vector Compare Mask Less Than or Equal To Zero (Floating Point) 807defm FCMLEvvi : NeonI_fpcmpz_sizes<0b1, 0b1, 0b01101, "fcmle", SETLE>; 808 809// Vector Compare Mask Less Than Zero (Floating Point) 810defm FCMLTvvi : NeonI_fpcmpz_sizes<0b0, 0b1, 0b01110, "fcmlt", SETLT>; 811 812// Vector Absolute Comparisons (Floating Point) 813 814// Vector Absolute Compare Mask Greater Than Or Equal (Floating Point) 815defm FACGEvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11101, "facge", 816 int_arm_neon_vacged, int_arm_neon_vacgeq, 817 int_aarch64_neon_vacgeq, 818 v2i32, v4i32, v2i64, 0>; 819 820// Vector Absolute Compare Mask Greater Than (Floating Point) 821defm FACGTvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11101, "facgt", 822 int_arm_neon_vacgtd, int_arm_neon_vacgtq, 823 int_aarch64_neon_vacgtq, 824 v2i32, v4i32, v2i64, 0>; 825 826// Vector Absolute Compare Mask Less Than Or Equal (Floating Point) 827// FACLE is alias for FACGE with operands reversed. 828def FACLEvvv_2S : NeonI_compare_aliases<"facle", ".2s", FACGEvvv_2S, VPR64>; 829def FACLEvvv_4S : NeonI_compare_aliases<"facle", ".4s", FACGEvvv_4S, VPR128>; 830def FACLEvvv_2D : NeonI_compare_aliases<"facle", ".2d", FACGEvvv_2D, VPR128>; 831 832// Vector Absolute Compare Mask Less Than (Floating Point) 833// FACLT is alias for FACGT with operands reversed. 834def FACLTvvv_2S : NeonI_compare_aliases<"faclt", ".2s", FACGTvvv_2S, VPR64>; 835def FACLTvvv_4S : NeonI_compare_aliases<"faclt", ".4s", FACGTvvv_4S, VPR128>; 836def FACLTvvv_2D : NeonI_compare_aliases<"faclt", ".2d", FACGTvvv_2D, VPR128>; 837 838// Vector halving add (Integer Signed, Unsigned) 839defm SHADDvvv : NeonI_3VSame_BHS_sizes<0b0, 0b00000, "shadd", 840 int_arm_neon_vhadds, 1>; 841defm UHADDvvv : NeonI_3VSame_BHS_sizes<0b1, 0b00000, "uhadd", 842 int_arm_neon_vhaddu, 1>; 843 844// Vector halving sub (Integer Signed, Unsigned) 845defm SHSUBvvv : NeonI_3VSame_BHS_sizes<0b0, 0b00100, "shsub", 846 int_arm_neon_vhsubs, 0>; 847defm UHSUBvvv : NeonI_3VSame_BHS_sizes<0b1, 0b00100, "uhsub", 848 int_arm_neon_vhsubu, 0>; 849 850// Vector rouding halving add (Integer Signed, Unsigned) 851defm SRHADDvvv : NeonI_3VSame_BHS_sizes<0b0, 0b00010, "srhadd", 852 int_arm_neon_vrhadds, 1>; 853defm URHADDvvv : NeonI_3VSame_BHS_sizes<0b1, 0b00010, "urhadd", 854 int_arm_neon_vrhaddu, 1>; 855 856// Vector Saturating add (Integer Signed, Unsigned) 857defm SQADDvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00001, "sqadd", 858 int_arm_neon_vqadds, 1>; 859defm UQADDvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00001, "uqadd", 860 int_arm_neon_vqaddu, 1>; 861 862// Vector Saturating sub (Integer Signed, Unsigned) 863defm SQSUBvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00101, "sqsub", 864 int_arm_neon_vqsubs, 1>; 865defm UQSUBvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00101, "uqsub", 866 int_arm_neon_vqsubu, 1>; 867 868// Vector Shift Left (Signed and Unsigned Integer) 869defm SSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01000, "sshl", 870 int_arm_neon_vshifts, 1>; 871defm USHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01000, "ushl", 872 int_arm_neon_vshiftu, 1>; 873 874// Vector Saturating Shift Left (Signed and Unsigned Integer) 875defm SQSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01001, "sqshl", 876 int_arm_neon_vqshifts, 1>; 877defm UQSHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01001, "uqshl", 878 int_arm_neon_vqshiftu, 1>; 879 880// Vector Rouding Shift Left (Signed and Unsigned Integer) 881defm SRSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01010, "srshl", 882 int_arm_neon_vrshifts, 1>; 883defm URSHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01010, "urshl", 884 int_arm_neon_vrshiftu, 1>; 885 886// Vector Saturating Rouding Shift Left (Signed and Unsigned Integer) 887defm SQRSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01011, "sqrshl", 888 int_arm_neon_vqrshifts, 1>; 889defm UQRSHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01011, "uqrshl", 890 int_arm_neon_vqrshiftu, 1>; 891 892// Vector Maximum (Signed and Unsigned Integer) 893defm SMAXvvv : NeonI_3VSame_BHS_sizes<0b0, 0b01100, "smax", int_arm_neon_vmaxs, 1>; 894defm UMAXvvv : NeonI_3VSame_BHS_sizes<0b1, 0b01100, "umax", int_arm_neon_vmaxu, 1>; 895 896// Vector Minimum (Signed and Unsigned Integer) 897defm SMINvvv : NeonI_3VSame_BHS_sizes<0b0, 0b01101, "smin", int_arm_neon_vmins, 1>; 898defm UMINvvv : NeonI_3VSame_BHS_sizes<0b1, 0b01101, "umin", int_arm_neon_vminu, 1>; 899 900// Vector Maximum (Floating Point) 901defm FMAXvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11110, "fmax", 902 int_arm_neon_vmaxs, int_arm_neon_vmaxs, 903 int_arm_neon_vmaxs, v2f32, v4f32, v2f64, 1>; 904 905// Vector Minimum (Floating Point) 906defm FMINvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11110, "fmin", 907 int_arm_neon_vmins, int_arm_neon_vmins, 908 int_arm_neon_vmins, v2f32, v4f32, v2f64, 1>; 909 910// Vector maxNum (Floating Point) - prefer a number over a quiet NaN) 911defm FMAXNMvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11000, "fmaxnm", 912 int_aarch64_neon_vmaxnm, 913 int_aarch64_neon_vmaxnm, 914 int_aarch64_neon_vmaxnm, 915 v2f32, v4f32, v2f64, 1>; 916 917// Vector minNum (Floating Point) - prefer a number over a quiet NaN) 918defm FMINNMvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11000, "fminnm", 919 int_aarch64_neon_vminnm, 920 int_aarch64_neon_vminnm, 921 int_aarch64_neon_vminnm, 922 v2f32, v4f32, v2f64, 1>; 923 924// Vector Maximum Pairwise (Signed and Unsigned Integer) 925defm SMAXPvvv : NeonI_3VSame_BHS_sizes<0b0, 0b10100, "smaxp", int_arm_neon_vpmaxs, 1>; 926defm UMAXPvvv : NeonI_3VSame_BHS_sizes<0b1, 0b10100, "umaxp", int_arm_neon_vpmaxu, 1>; 927 928// Vector Minimum Pairwise (Signed and Unsigned Integer) 929defm SMINPvvv : NeonI_3VSame_BHS_sizes<0b0, 0b10101, "sminp", int_arm_neon_vpmins, 1>; 930defm UMINPvvv : NeonI_3VSame_BHS_sizes<0b1, 0b10101, "uminp", int_arm_neon_vpminu, 1>; 931 932// Vector Maximum Pairwise (Floating Point) 933defm FMAXPvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11110, "fmaxp", 934 int_arm_neon_vpmaxs, int_arm_neon_vpmaxs, 935 int_arm_neon_vpmaxs, v2f32, v4f32, v2f64, 1>; 936 937// Vector Minimum Pairwise (Floating Point) 938defm FMINPvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11110, "fminp", 939 int_arm_neon_vpmins, int_arm_neon_vpmins, 940 int_arm_neon_vpmins, v2f32, v4f32, v2f64, 1>; 941 942// Vector maxNum Pairwise (Floating Point) - prefer a number over a quiet NaN) 943defm FMAXNMPvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11000, "fmaxnmp", 944 int_aarch64_neon_vpmaxnm, 945 int_aarch64_neon_vpmaxnm, 946 int_aarch64_neon_vpmaxnm, 947 v2f32, v4f32, v2f64, 1>; 948 949// Vector minNum Pairwise (Floating Point) - prefer a number over a quiet NaN) 950defm FMINNMPvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11000, "fminnmp", 951 int_aarch64_neon_vpminnm, 952 int_aarch64_neon_vpminnm, 953 int_aarch64_neon_vpminnm, 954 v2f32, v4f32, v2f64, 1>; 955 956// Vector Addition Pairwise (Integer) 957defm ADDP : NeonI_3VSame_BHSD_sizes<0b0, 0b10111, "addp", int_arm_neon_vpadd, 1>; 958 959// Vector Addition Pairwise (Floating Point) 960defm FADDP : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11010, "faddp", 961 int_arm_neon_vpadd, 962 int_arm_neon_vpadd, 963 int_arm_neon_vpadd, 964 v2f32, v4f32, v2f64, 1>; 965 966// Vector Saturating Doubling Multiply High 967defm SQDMULHvvv : NeonI_3VSame_HS_sizes<0b0, 0b10110, "sqdmulh", 968 int_arm_neon_vqdmulh, 1>; 969 970// Vector Saturating Rouding Doubling Multiply High 971defm SQRDMULHvvv : NeonI_3VSame_HS_sizes<0b1, 0b10110, "sqrdmulh", 972 int_arm_neon_vqrdmulh, 1>; 973 974// Vector Multiply Extended (Floating Point) 975defm FMULXvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11011, "fmulx", 976 int_aarch64_neon_vmulx, 977 int_aarch64_neon_vmulx, 978 int_aarch64_neon_vmulx, 979 v2f32, v4f32, v2f64, 1>; 980 981// Vector Immediate Instructions 982 983multiclass neon_mov_imm_shift_asmoperands<string PREFIX> 984{ 985 def _asmoperand : AsmOperandClass 986 { 987 let Name = "NeonMovImmShift" # PREFIX; 988 let RenderMethod = "addNeonMovImmShift" # PREFIX # "Operands"; 989 let PredicateMethod = "isNeonMovImmShift" # PREFIX; 990 } 991} 992 993// Definition of vector immediates shift operands 994 995// The selectable use-cases extract the shift operation 996// information from the OpCmode fields encoded in the immediate. 997def neon_mod_shift_imm_XFORM : SDNodeXForm<imm, [{ 998 uint64_t OpCmode = N->getZExtValue(); 999 unsigned ShiftImm; 1000 unsigned ShiftOnesIn; 1001 unsigned HasShift = 1002 A64Imms::decodeNeonModShiftImm(OpCmode, ShiftImm, ShiftOnesIn); 1003 if (!HasShift) return SDValue(); 1004 return CurDAG->getTargetConstant(ShiftImm, MVT::i32); 1005}]>; 1006 1007// Vector immediates shift operands which accept LSL and MSL 1008// shift operators with shift value in the range of 0, 8, 16, 24 (LSL), 1009// or 0, 8 (LSLH) or 8, 16 (MSL). 1010defm neon_mov_imm_LSL : neon_mov_imm_shift_asmoperands<"LSL">; 1011defm neon_mov_imm_MSL : neon_mov_imm_shift_asmoperands<"MSL">; 1012// LSLH restricts shift amount to 0, 8 out of 0, 8, 16, 24 1013defm neon_mov_imm_LSLH : neon_mov_imm_shift_asmoperands<"LSLH">; 1014 1015multiclass neon_mov_imm_shift_operands<string PREFIX, 1016 string HALF, string ISHALF, code pred> 1017{ 1018 def _operand : Operand<i32>, ImmLeaf<i32, pred, neon_mod_shift_imm_XFORM> 1019 { 1020 let PrintMethod = 1021 "printNeonMovImmShiftOperand<A64SE::" # PREFIX # ", " # ISHALF # ">"; 1022 let DecoderMethod = 1023 "DecodeNeonMovImmShiftOperand<A64SE::" # PREFIX # ", " # ISHALF # ">"; 1024 let ParserMatchClass = 1025 !cast<AsmOperandClass>("neon_mov_imm_" # PREFIX # HALF # "_asmoperand"); 1026 } 1027} 1028 1029defm neon_mov_imm_LSL : neon_mov_imm_shift_operands<"LSL", "", "false", [{ 1030 unsigned ShiftImm; 1031 unsigned ShiftOnesIn; 1032 unsigned HasShift = 1033 A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn); 1034 return (HasShift && !ShiftOnesIn); 1035}]>; 1036 1037defm neon_mov_imm_MSL : neon_mov_imm_shift_operands<"MSL", "", "false", [{ 1038 unsigned ShiftImm; 1039 unsigned ShiftOnesIn; 1040 unsigned HasShift = 1041 A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn); 1042 return (HasShift && ShiftOnesIn); 1043}]>; 1044 1045defm neon_mov_imm_LSLH : neon_mov_imm_shift_operands<"LSL", "H", "true", [{ 1046 unsigned ShiftImm; 1047 unsigned ShiftOnesIn; 1048 unsigned HasShift = 1049 A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn); 1050 return (HasShift && !ShiftOnesIn); 1051}]>; 1052 1053def neon_uimm1_asmoperand : AsmOperandClass 1054{ 1055 let Name = "UImm1"; 1056 let PredicateMethod = "isUImm<1>"; 1057 let RenderMethod = "addImmOperands"; 1058} 1059 1060def neon_uimm2_asmoperand : AsmOperandClass 1061{ 1062 let Name = "UImm2"; 1063 let PredicateMethod = "isUImm<2>"; 1064 let RenderMethod = "addImmOperands"; 1065} 1066 1067def neon_uimm8_asmoperand : AsmOperandClass 1068{ 1069 let Name = "UImm8"; 1070 let PredicateMethod = "isUImm<8>"; 1071 let RenderMethod = "addImmOperands"; 1072} 1073 1074def neon_uimm8 : Operand<i32>, ImmLeaf<i32, [{(void)Imm; return true;}]> { 1075 let ParserMatchClass = neon_uimm8_asmoperand; 1076 let PrintMethod = "printUImmHexOperand"; 1077} 1078 1079def neon_uimm64_mask_asmoperand : AsmOperandClass 1080{ 1081 let Name = "NeonUImm64Mask"; 1082 let PredicateMethod = "isNeonUImm64Mask"; 1083 let RenderMethod = "addNeonUImm64MaskOperands"; 1084} 1085 1086// MCOperand for 64-bit bytemask with each byte having only the 1087// value 0x00 and 0xff is encoded as an unsigned 8-bit value 1088def neon_uimm64_mask : Operand<i32>, ImmLeaf<i32, [{(void)Imm; return true;}]> { 1089 let ParserMatchClass = neon_uimm64_mask_asmoperand; 1090 let PrintMethod = "printNeonUImm64MaskOperand"; 1091} 1092 1093multiclass NeonI_mov_imm_lsl_sizes<string asmop, bit op, 1094 SDPatternOperator opnode> 1095{ 1096 // shift zeros, per word 1097 def _2S : NeonI_1VModImm<0b0, op, 1098 (outs VPR64:$Rd), 1099 (ins neon_uimm8:$Imm, 1100 neon_mov_imm_LSL_operand:$Simm), 1101 !strconcat(asmop, "\t$Rd.2s, $Imm$Simm"), 1102 [(set (v2i32 VPR64:$Rd), 1103 (v2i32 (opnode (timm:$Imm), 1104 (neon_mov_imm_LSL_operand:$Simm))))], 1105 NoItinerary> { 1106 bits<2> Simm; 1107 let cmode = {0b0, Simm{1}, Simm{0}, 0b0}; 1108 } 1109 1110 def _4S : NeonI_1VModImm<0b1, op, 1111 (outs VPR128:$Rd), 1112 (ins neon_uimm8:$Imm, 1113 neon_mov_imm_LSL_operand:$Simm), 1114 !strconcat(asmop, "\t$Rd.4s, $Imm$Simm"), 1115 [(set (v4i32 VPR128:$Rd), 1116 (v4i32 (opnode (timm:$Imm), 1117 (neon_mov_imm_LSL_operand:$Simm))))], 1118 NoItinerary> { 1119 bits<2> Simm; 1120 let cmode = {0b0, Simm{1}, Simm{0}, 0b0}; 1121 } 1122 1123 // shift zeros, per halfword 1124 def _4H : NeonI_1VModImm<0b0, op, 1125 (outs VPR64:$Rd), 1126 (ins neon_uimm8:$Imm, 1127 neon_mov_imm_LSLH_operand:$Simm), 1128 !strconcat(asmop, "\t$Rd.4h, $Imm$Simm"), 1129 [(set (v4i16 VPR64:$Rd), 1130 (v4i16 (opnode (timm:$Imm), 1131 (neon_mov_imm_LSLH_operand:$Simm))))], 1132 NoItinerary> { 1133 bit Simm; 1134 let cmode = {0b1, 0b0, Simm, 0b0}; 1135 } 1136 1137 def _8H : NeonI_1VModImm<0b1, op, 1138 (outs VPR128:$Rd), 1139 (ins neon_uimm8:$Imm, 1140 neon_mov_imm_LSLH_operand:$Simm), 1141 !strconcat(asmop, "\t$Rd.8h, $Imm$Simm"), 1142 [(set (v8i16 VPR128:$Rd), 1143 (v8i16 (opnode (timm:$Imm), 1144 (neon_mov_imm_LSLH_operand:$Simm))))], 1145 NoItinerary> { 1146 bit Simm; 1147 let cmode = {0b1, 0b0, Simm, 0b0}; 1148 } 1149} 1150 1151multiclass NeonI_mov_imm_with_constraint_lsl_sizes<string asmop, bit op, 1152 SDPatternOperator opnode, 1153 SDPatternOperator neonopnode> 1154{ 1155 let Constraints = "$src = $Rd" in { 1156 // shift zeros, per word 1157 def _2S : NeonI_1VModImm<0b0, op, 1158 (outs VPR64:$Rd), 1159 (ins VPR64:$src, neon_uimm8:$Imm, 1160 neon_mov_imm_LSL_operand:$Simm), 1161 !strconcat(asmop, "\t$Rd.2s, $Imm$Simm"), 1162 [(set (v2i32 VPR64:$Rd), 1163 (v2i32 (opnode (v2i32 VPR64:$src), 1164 (v2i32 (bitconvert (v2i32 (neonopnode timm:$Imm, 1165 neon_mov_imm_LSL_operand:$Simm)))))))], 1166 NoItinerary> { 1167 bits<2> Simm; 1168 let cmode = {0b0, Simm{1}, Simm{0}, 0b1}; 1169 } 1170 1171 def _4S : NeonI_1VModImm<0b1, op, 1172 (outs VPR128:$Rd), 1173 (ins VPR128:$src, neon_uimm8:$Imm, 1174 neon_mov_imm_LSL_operand:$Simm), 1175 !strconcat(asmop, "\t$Rd.4s, $Imm$Simm"), 1176 [(set (v4i32 VPR128:$Rd), 1177 (v4i32 (opnode (v4i32 VPR128:$src), 1178 (v4i32 (bitconvert (v4i32 (neonopnode timm:$Imm, 1179 neon_mov_imm_LSL_operand:$Simm)))))))], 1180 NoItinerary> { 1181 bits<2> Simm; 1182 let cmode = {0b0, Simm{1}, Simm{0}, 0b1}; 1183 } 1184 1185 // shift zeros, per halfword 1186 def _4H : NeonI_1VModImm<0b0, op, 1187 (outs VPR64:$Rd), 1188 (ins VPR64:$src, neon_uimm8:$Imm, 1189 neon_mov_imm_LSLH_operand:$Simm), 1190 !strconcat(asmop, "\t$Rd.4h, $Imm$Simm"), 1191 [(set (v4i16 VPR64:$Rd), 1192 (v4i16 (opnode (v4i16 VPR64:$src), 1193 (v4i16 (bitconvert (v4i16 (neonopnode timm:$Imm, 1194 neon_mov_imm_LSL_operand:$Simm)))))))], 1195 NoItinerary> { 1196 bit Simm; 1197 let cmode = {0b1, 0b0, Simm, 0b1}; 1198 } 1199 1200 def _8H : NeonI_1VModImm<0b1, op, 1201 (outs VPR128:$Rd), 1202 (ins VPR128:$src, neon_uimm8:$Imm, 1203 neon_mov_imm_LSLH_operand:$Simm), 1204 !strconcat(asmop, "\t$Rd.8h, $Imm$Simm"), 1205 [(set (v8i16 VPR128:$Rd), 1206 (v8i16 (opnode (v8i16 VPR128:$src), 1207 (v8i16 (bitconvert (v8i16 (neonopnode timm:$Imm, 1208 neon_mov_imm_LSL_operand:$Simm)))))))], 1209 NoItinerary> { 1210 bit Simm; 1211 let cmode = {0b1, 0b0, Simm, 0b1}; 1212 } 1213 } 1214} 1215 1216multiclass NeonI_mov_imm_msl_sizes<string asmop, bit op, 1217 SDPatternOperator opnode> 1218{ 1219 // shift ones, per word 1220 def _2S : NeonI_1VModImm<0b0, op, 1221 (outs VPR64:$Rd), 1222 (ins neon_uimm8:$Imm, 1223 neon_mov_imm_MSL_operand:$Simm), 1224 !strconcat(asmop, "\t$Rd.2s, $Imm$Simm"), 1225 [(set (v2i32 VPR64:$Rd), 1226 (v2i32 (opnode (timm:$Imm), 1227 (neon_mov_imm_MSL_operand:$Simm))))], 1228 NoItinerary> { 1229 bit Simm; 1230 let cmode = {0b1, 0b1, 0b0, Simm}; 1231 } 1232 1233 def _4S : NeonI_1VModImm<0b1, op, 1234 (outs VPR128:$Rd), 1235 (ins neon_uimm8:$Imm, 1236 neon_mov_imm_MSL_operand:$Simm), 1237 !strconcat(asmop, "\t$Rd.4s, $Imm$Simm"), 1238 [(set (v4i32 VPR128:$Rd), 1239 (v4i32 (opnode (timm:$Imm), 1240 (neon_mov_imm_MSL_operand:$Simm))))], 1241 NoItinerary> { 1242 bit Simm; 1243 let cmode = {0b1, 0b1, 0b0, Simm}; 1244 } 1245} 1246 1247// Vector Move Immediate Shifted 1248let isReMaterializable = 1 in { 1249defm MOVIvi_lsl : NeonI_mov_imm_lsl_sizes<"movi", 0b0, Neon_movi>; 1250} 1251 1252// Vector Move Inverted Immediate Shifted 1253let isReMaterializable = 1 in { 1254defm MVNIvi_lsl : NeonI_mov_imm_lsl_sizes<"mvni", 0b1, Neon_mvni>; 1255} 1256 1257// Vector Bitwise Bit Clear (AND NOT) - immediate 1258let isReMaterializable = 1 in { 1259defm BICvi_lsl : NeonI_mov_imm_with_constraint_lsl_sizes<"bic", 0b1, 1260 and, Neon_mvni>; 1261} 1262 1263// Vector Bitwise OR - immedidate 1264 1265let isReMaterializable = 1 in { 1266defm ORRvi_lsl : NeonI_mov_imm_with_constraint_lsl_sizes<"orr", 0b0, 1267 or, Neon_movi>; 1268} 1269 1270// Additional patterns for Vector Bitwise Bit Clear (AND NOT) - immedidate 1271// LowerBUILD_VECTOR favors lowering MOVI over MVNI. 1272// BIC immediate instructions selection requires additional patterns to 1273// transform Neon_movi operands into BIC immediate operands 1274 1275def neon_mov_imm_LSLH_transform_XFORM : SDNodeXForm<imm, [{ 1276 uint64_t OpCmode = N->getZExtValue(); 1277 unsigned ShiftImm; 1278 unsigned ShiftOnesIn; 1279 (void)A64Imms::decodeNeonModShiftImm(OpCmode, ShiftImm, ShiftOnesIn); 1280 // LSLH restricts shift amount to 0, 8 which are encoded as 0 and 1 1281 // Transform encoded shift amount 0 to 1 and 1 to 0. 1282 return CurDAG->getTargetConstant(!ShiftImm, MVT::i32); 1283}]>; 1284 1285def neon_mov_imm_LSLH_transform_operand 1286 : ImmLeaf<i32, [{ 1287 unsigned ShiftImm; 1288 unsigned ShiftOnesIn; 1289 unsigned HasShift = 1290 A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn); 1291 return (HasShift && !ShiftOnesIn); }], 1292 neon_mov_imm_LSLH_transform_XFORM>; 1293 1294// Transform (and A, (4h Neon_movi 0xff)) -> BIC 4h (A, 0x00, LSL 8) 1295// Transform (and A, (4h Neon_movi 0xff LSL #8)) -> BIC 4h (A, 0x00) 1296def : Pat<(v4i16 (and VPR64:$src, 1297 (v4i16 (Neon_movi 255, neon_mov_imm_LSLH_transform_operand:$Simm)))), 1298 (BICvi_lsl_4H VPR64:$src, 0, 1299 neon_mov_imm_LSLH_transform_operand:$Simm)>; 1300 1301// Transform (and A, (8h Neon_movi 8h 0xff)) -> BIC 8h (A, 0x00, LSL 8) 1302// Transform (and A, (8h Neon_movi 0xff LSL #8)) -> BIC 8h (A, 0x00) 1303def : Pat<(v8i16 (and VPR128:$src, 1304 (v8i16 (Neon_movi 255, neon_mov_imm_LSLH_transform_operand:$Simm)))), 1305 (BICvi_lsl_8H VPR128:$src, 0, 1306 neon_mov_imm_LSLH_transform_operand:$Simm)>; 1307 1308 1309multiclass Neon_bitwiseVi_patterns<SDPatternOperator opnode, 1310 SDPatternOperator neonopnode, 1311 Instruction INST4H, 1312 Instruction INST8H> { 1313 def : Pat<(v8i8 (opnode VPR64:$src, 1314 (bitconvert(v4i16 (neonopnode timm:$Imm, 1315 neon_mov_imm_LSLH_operand:$Simm))))), 1316 (INST4H VPR64:$src, neon_uimm8:$Imm, 1317 neon_mov_imm_LSLH_operand:$Simm)>; 1318 def : Pat<(v1i64 (opnode VPR64:$src, 1319 (bitconvert(v4i16 (neonopnode timm:$Imm, 1320 neon_mov_imm_LSLH_operand:$Simm))))), 1321 (INST4H VPR64:$src, neon_uimm8:$Imm, 1322 neon_mov_imm_LSLH_operand:$Simm)>; 1323 1324 def : Pat<(v16i8 (opnode VPR128:$src, 1325 (bitconvert(v8i16 (neonopnode timm:$Imm, 1326 neon_mov_imm_LSLH_operand:$Simm))))), 1327 (INST8H VPR128:$src, neon_uimm8:$Imm, 1328 neon_mov_imm_LSLH_operand:$Simm)>; 1329 def : Pat<(v4i32 (opnode VPR128:$src, 1330 (bitconvert(v8i16 (neonopnode timm:$Imm, 1331 neon_mov_imm_LSLH_operand:$Simm))))), 1332 (INST8H VPR128:$src, neon_uimm8:$Imm, 1333 neon_mov_imm_LSLH_operand:$Simm)>; 1334 def : Pat<(v2i64 (opnode VPR128:$src, 1335 (bitconvert(v8i16 (neonopnode timm:$Imm, 1336 neon_mov_imm_LSLH_operand:$Simm))))), 1337 (INST8H VPR128:$src, neon_uimm8:$Imm, 1338 neon_mov_imm_LSLH_operand:$Simm)>; 1339} 1340 1341// Additional patterns for Vector Vector Bitwise Bit Clear (AND NOT) - immediate 1342defm : Neon_bitwiseVi_patterns<or, Neon_mvni, BICvi_lsl_4H, BICvi_lsl_8H>; 1343 1344// Additional patterns for Vector Bitwise OR - immedidate 1345defm : Neon_bitwiseVi_patterns<or, Neon_movi, ORRvi_lsl_4H, ORRvi_lsl_8H>; 1346 1347 1348// Vector Move Immediate Masked 1349let isReMaterializable = 1 in { 1350defm MOVIvi_msl : NeonI_mov_imm_msl_sizes<"movi", 0b0, Neon_movi>; 1351} 1352 1353// Vector Move Inverted Immediate Masked 1354let isReMaterializable = 1 in { 1355defm MVNIvi_msl : NeonI_mov_imm_msl_sizes<"mvni", 0b1, Neon_mvni>; 1356} 1357 1358class NeonI_mov_imm_lsl_aliases<string asmop, string asmlane, 1359 Instruction inst, RegisterOperand VPRC> 1360 : NeonInstAlias<!strconcat(asmop, "\t$Rd," # asmlane # ", $Imm"), 1361 (inst VPRC:$Rd, neon_uimm8:$Imm, 0), 0b0>; 1362 1363// Aliases for Vector Move Immediate Shifted 1364def : NeonI_mov_imm_lsl_aliases<"movi", ".2s", MOVIvi_lsl_2S, VPR64>; 1365def : NeonI_mov_imm_lsl_aliases<"movi", ".4s", MOVIvi_lsl_4S, VPR128>; 1366def : NeonI_mov_imm_lsl_aliases<"movi", ".4h", MOVIvi_lsl_4H, VPR64>; 1367def : NeonI_mov_imm_lsl_aliases<"movi", ".8h", MOVIvi_lsl_8H, VPR128>; 1368 1369// Aliases for Vector Move Inverted Immediate Shifted 1370def : NeonI_mov_imm_lsl_aliases<"mvni", ".2s", MVNIvi_lsl_2S, VPR64>; 1371def : NeonI_mov_imm_lsl_aliases<"mvni", ".4s", MVNIvi_lsl_4S, VPR128>; 1372def : NeonI_mov_imm_lsl_aliases<"mvni", ".4h", MVNIvi_lsl_4H, VPR64>; 1373def : NeonI_mov_imm_lsl_aliases<"mvni", ".8h", MVNIvi_lsl_8H, VPR128>; 1374 1375// Aliases for Vector Bitwise Bit Clear (AND NOT) - immediate 1376def : NeonI_mov_imm_lsl_aliases<"bic", ".2s", BICvi_lsl_2S, VPR64>; 1377def : NeonI_mov_imm_lsl_aliases<"bic", ".4s", BICvi_lsl_4S, VPR128>; 1378def : NeonI_mov_imm_lsl_aliases<"bic", ".4h", BICvi_lsl_4H, VPR64>; 1379def : NeonI_mov_imm_lsl_aliases<"bic", ".8h", BICvi_lsl_8H, VPR128>; 1380 1381// Aliases for Vector Bitwise OR - immedidate 1382def : NeonI_mov_imm_lsl_aliases<"orr", ".2s", ORRvi_lsl_2S, VPR64>; 1383def : NeonI_mov_imm_lsl_aliases<"orr", ".4s", ORRvi_lsl_4S, VPR128>; 1384def : NeonI_mov_imm_lsl_aliases<"orr", ".4h", ORRvi_lsl_4H, VPR64>; 1385def : NeonI_mov_imm_lsl_aliases<"orr", ".8h", ORRvi_lsl_8H, VPR128>; 1386 1387// Vector Move Immediate - per byte 1388let isReMaterializable = 1 in { 1389def MOVIvi_8B : NeonI_1VModImm<0b0, 0b0, 1390 (outs VPR64:$Rd), (ins neon_uimm8:$Imm), 1391 "movi\t$Rd.8b, $Imm", 1392 [(set (v8i8 VPR64:$Rd), 1393 (v8i8 (Neon_movi (timm:$Imm), (i32 imm))))], 1394 NoItinerary> { 1395 let cmode = 0b1110; 1396} 1397 1398def MOVIvi_16B : NeonI_1VModImm<0b1, 0b0, 1399 (outs VPR128:$Rd), (ins neon_uimm8:$Imm), 1400 "movi\t$Rd.16b, $Imm", 1401 [(set (v16i8 VPR128:$Rd), 1402 (v16i8 (Neon_movi (timm:$Imm), (i32 imm))))], 1403 NoItinerary> { 1404 let cmode = 0b1110; 1405} 1406} 1407 1408// Vector Move Immediate - bytemask, per double word 1409let isReMaterializable = 1 in { 1410def MOVIvi_2D : NeonI_1VModImm<0b1, 0b1, 1411 (outs VPR128:$Rd), (ins neon_uimm64_mask:$Imm), 1412 "movi\t $Rd.2d, $Imm", 1413 [(set (v2i64 VPR128:$Rd), 1414 (v2i64 (Neon_movi (timm:$Imm), (i32 imm))))], 1415 NoItinerary> { 1416 let cmode = 0b1110; 1417} 1418} 1419 1420// Vector Move Immediate - bytemask, one doubleword 1421 1422let isReMaterializable = 1 in { 1423def MOVIdi : NeonI_1VModImm<0b0, 0b1, 1424 (outs FPR64:$Rd), (ins neon_uimm64_mask:$Imm), 1425 "movi\t $Rd, $Imm", 1426 [(set (v1i64 FPR64:$Rd), 1427 (v1i64 (Neon_movi (timm:$Imm), (i32 imm))))], 1428 NoItinerary> { 1429 let cmode = 0b1110; 1430} 1431} 1432 1433// Vector Floating Point Move Immediate 1434 1435class NeonI_FMOV_impl<string asmlane, RegisterOperand VPRC, ValueType OpTy, 1436 Operand immOpType, bit q, bit op> 1437 : NeonI_1VModImm<q, op, 1438 (outs VPRC:$Rd), (ins immOpType:$Imm), 1439 "fmov\t$Rd" # asmlane # ", $Imm", 1440 [(set (OpTy VPRC:$Rd), 1441 (OpTy (Neon_fmovi (timm:$Imm))))], 1442 NoItinerary> { 1443 let cmode = 0b1111; 1444 } 1445 1446let isReMaterializable = 1 in { 1447def FMOVvi_2S : NeonI_FMOV_impl<".2s", VPR64, v2f32, fmov32_operand, 0b0, 0b0>; 1448def FMOVvi_4S : NeonI_FMOV_impl<".4s", VPR128, v4f32, fmov32_operand, 0b1, 0b0>; 1449def FMOVvi_2D : NeonI_FMOV_impl<".2d", VPR128, v2f64, fmov64_operand, 0b1, 0b1>; 1450} 1451 1452// Vector Shift (Immediate) 1453// Immediate in [0, 63] 1454def imm0_63 : Operand<i32> { 1455 let ParserMatchClass = uimm6_asmoperand; 1456} 1457 1458// Shift Right/Left Immediate - The immh:immb field of these shifts are encoded 1459// as follows: 1460// 1461// Offset Encoding 1462// 8 immh:immb<6:3> = '0001xxx', <imm> is encoded in immh:immb<2:0> 1463// 16 immh:immb<6:4> = '001xxxx', <imm> is encoded in immh:immb<3:0> 1464// 32 immh:immb<6:5> = '01xxxxx', <imm> is encoded in immh:immb<4:0> 1465// 64 immh:immb<6> = '1xxxxxx', <imm> is encoded in immh:immb<5:0> 1466// 1467// The shift right immediate amount, in the range 1 to element bits, is computed 1468// as Offset - UInt(immh:immb). The shift left immediate amount, in the range 0 1469// to element bits - 1, is computed as UInt(immh:immb) - Offset. 1470 1471class shr_imm_asmoperands<string OFFSET> : AsmOperandClass { 1472 let Name = "ShrImm" # OFFSET; 1473 let RenderMethod = "addImmOperands"; 1474 let DiagnosticType = "ShrImm" # OFFSET; 1475} 1476 1477class shr_imm<string OFFSET> : Operand<i32> { 1478 let EncoderMethod = "getShiftRightImm" # OFFSET; 1479 let DecoderMethod = "DecodeShiftRightImm" # OFFSET; 1480 let ParserMatchClass = 1481 !cast<AsmOperandClass>("shr_imm" # OFFSET # "_asmoperand"); 1482} 1483 1484def shr_imm8_asmoperand : shr_imm_asmoperands<"8">; 1485def shr_imm16_asmoperand : shr_imm_asmoperands<"16">; 1486def shr_imm32_asmoperand : shr_imm_asmoperands<"32">; 1487def shr_imm64_asmoperand : shr_imm_asmoperands<"64">; 1488 1489def shr_imm8 : shr_imm<"8">, ImmLeaf<i32, [{return Imm > 0 && Imm <= 8;}]>; 1490def shr_imm16 : shr_imm<"16">, ImmLeaf<i32, [{return Imm > 0 && Imm <= 16;}]>; 1491def shr_imm32 : shr_imm<"32">, ImmLeaf<i32, [{return Imm > 0 && Imm <= 32;}]>; 1492def shr_imm64 : shr_imm<"64">, ImmLeaf<i32, [{return Imm > 0 && Imm <= 64;}]>; 1493 1494class shl_imm_asmoperands<string OFFSET> : AsmOperandClass { 1495 let Name = "ShlImm" # OFFSET; 1496 let RenderMethod = "addImmOperands"; 1497 let DiagnosticType = "ShlImm" # OFFSET; 1498} 1499 1500class shl_imm<string OFFSET> : Operand<i32> { 1501 let EncoderMethod = "getShiftLeftImm" # OFFSET; 1502 let DecoderMethod = "DecodeShiftLeftImm" # OFFSET; 1503 let ParserMatchClass = 1504 !cast<AsmOperandClass>("shl_imm" # OFFSET # "_asmoperand"); 1505} 1506 1507def shl_imm8_asmoperand : shl_imm_asmoperands<"8">; 1508def shl_imm16_asmoperand : shl_imm_asmoperands<"16">; 1509def shl_imm32_asmoperand : shl_imm_asmoperands<"32">; 1510def shl_imm64_asmoperand : shl_imm_asmoperands<"64">; 1511 1512def shl_imm8 : shl_imm<"8">, ImmLeaf<i32, [{return Imm >= 0 && Imm < 8;}]>; 1513def shl_imm16 : shl_imm<"16">, ImmLeaf<i32, [{return Imm >= 0 && Imm < 16;}]>; 1514def shl_imm32 : shl_imm<"32">, ImmLeaf<i32, [{return Imm >= 0 && Imm < 32;}]>; 1515def shl_imm64 : shl_imm<"64">, ImmLeaf<i32, [{return Imm >= 0 && Imm < 64;}]>; 1516 1517class N2VShift<bit q, bit u, bits<5> opcode, string asmop, string T, 1518 RegisterOperand VPRC, ValueType Ty, Operand ImmTy, SDNode OpNode> 1519 : NeonI_2VShiftImm<q, u, opcode, 1520 (outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm), 1521 asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm", 1522 [(set (Ty VPRC:$Rd), 1523 (Ty (OpNode (Ty VPRC:$Rn), 1524 (Ty (Neon_vdup (i32 ImmTy:$Imm))))))], 1525 NoItinerary>; 1526 1527multiclass NeonI_N2VShL<bit u, bits<5> opcode, string asmop> { 1528 // 64-bit vector types. 1529 def _8B : N2VShift<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shl_imm8, shl> { 1530 let Inst{22-19} = 0b0001; // immh:immb = 0001xxx 1531 } 1532 1533 def _4H : N2VShift<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shl_imm16, shl> { 1534 let Inst{22-20} = 0b001; // immh:immb = 001xxxx 1535 } 1536 1537 def _2S : N2VShift<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shl_imm32, shl> { 1538 let Inst{22-21} = 0b01; // immh:immb = 01xxxxx 1539 } 1540 1541 // 128-bit vector types. 1542 def _16B : N2VShift<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shl_imm8, shl> { 1543 let Inst{22-19} = 0b0001; // immh:immb = 0001xxx 1544 } 1545 1546 def _8H : N2VShift<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shl_imm16, shl> { 1547 let Inst{22-20} = 0b001; // immh:immb = 001xxxx 1548 } 1549 1550 def _4S : N2VShift<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shl_imm32, shl> { 1551 let Inst{22-21} = 0b01; // immh:immb = 01xxxxx 1552 } 1553 1554 def _2D : N2VShift<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shl_imm64, shl> { 1555 let Inst{22} = 0b1; // immh:immb = 1xxxxxx 1556 } 1557} 1558 1559multiclass NeonI_N2VShR<bit u, bits<5> opcode, string asmop, SDNode OpNode> { 1560 def _8B : N2VShift<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8, 1561 OpNode> { 1562 let Inst{22-19} = 0b0001; 1563 } 1564 1565 def _4H : N2VShift<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16, 1566 OpNode> { 1567 let Inst{22-20} = 0b001; 1568 } 1569 1570 def _2S : N2VShift<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32, 1571 OpNode> { 1572 let Inst{22-21} = 0b01; 1573 } 1574 1575 def _16B : N2VShift<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8, 1576 OpNode> { 1577 let Inst{22-19} = 0b0001; 1578 } 1579 1580 def _8H : N2VShift<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16, 1581 OpNode> { 1582 let Inst{22-20} = 0b001; 1583 } 1584 1585 def _4S : N2VShift<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32, 1586 OpNode> { 1587 let Inst{22-21} = 0b01; 1588 } 1589 1590 def _2D : N2VShift<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64, 1591 OpNode> { 1592 let Inst{22} = 0b1; 1593 } 1594} 1595 1596// Shift left 1597defm SHLvvi : NeonI_N2VShL<0b0, 0b01010, "shl">; 1598 1599// Shift right 1600defm SSHRvvi : NeonI_N2VShR<0b0, 0b00000, "sshr", sra>; 1601defm USHRvvi : NeonI_N2VShR<0b1, 0b00000, "ushr", srl>; 1602 1603def Neon_High16B : PatFrag<(ops node:$in), 1604 (extract_subvector (v16i8 node:$in), (iPTR 8))>; 1605def Neon_High8H : PatFrag<(ops node:$in), 1606 (extract_subvector (v8i16 node:$in), (iPTR 4))>; 1607def Neon_High4S : PatFrag<(ops node:$in), 1608 (extract_subvector (v4i32 node:$in), (iPTR 2))>; 1609def Neon_High2D : PatFrag<(ops node:$in), 1610 (extract_subvector (v2i64 node:$in), (iPTR 1))>; 1611def Neon_High4float : PatFrag<(ops node:$in), 1612 (extract_subvector (v4f32 node:$in), (iPTR 2))>; 1613def Neon_High2double : PatFrag<(ops node:$in), 1614 (extract_subvector (v2f64 node:$in), (iPTR 1))>; 1615 1616def Neon_Low16B : PatFrag<(ops node:$in), 1617 (v8i8 (extract_subvector (v16i8 node:$in), 1618 (iPTR 0)))>; 1619def Neon_Low8H : PatFrag<(ops node:$in), 1620 (v4i16 (extract_subvector (v8i16 node:$in), 1621 (iPTR 0)))>; 1622def Neon_Low4S : PatFrag<(ops node:$in), 1623 (v2i32 (extract_subvector (v4i32 node:$in), 1624 (iPTR 0)))>; 1625def Neon_Low2D : PatFrag<(ops node:$in), 1626 (v1i64 (extract_subvector (v2i64 node:$in), 1627 (iPTR 0)))>; 1628def Neon_Low4float : PatFrag<(ops node:$in), 1629 (v2f32 (extract_subvector (v4f32 node:$in), 1630 (iPTR 0)))>; 1631def Neon_Low2double : PatFrag<(ops node:$in), 1632 (v1f64 (extract_subvector (v2f64 node:$in), 1633 (iPTR 0)))>; 1634 1635class N2VShiftLong<bit q, bit u, bits<5> opcode, string asmop, string DestT, 1636 string SrcT, ValueType DestTy, ValueType SrcTy, 1637 Operand ImmTy, SDPatternOperator ExtOp> 1638 : NeonI_2VShiftImm<q, u, opcode, (outs VPR128:$Rd), 1639 (ins VPR64:$Rn, ImmTy:$Imm), 1640 asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm", 1641 [(set (DestTy VPR128:$Rd), 1642 (DestTy (shl 1643 (DestTy (ExtOp (SrcTy VPR64:$Rn))), 1644 (DestTy (Neon_vdup (i32 ImmTy:$Imm))))))], 1645 NoItinerary>; 1646 1647class N2VShiftLongHigh<bit q, bit u, bits<5> opcode, string asmop, string DestT, 1648 string SrcT, ValueType DestTy, ValueType SrcTy, 1649 int StartIndex, Operand ImmTy, 1650 SDPatternOperator ExtOp, PatFrag getTop> 1651 : NeonI_2VShiftImm<q, u, opcode, (outs VPR128:$Rd), 1652 (ins VPR128:$Rn, ImmTy:$Imm), 1653 asmop # "2\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm", 1654 [(set (DestTy VPR128:$Rd), 1655 (DestTy (shl 1656 (DestTy (ExtOp 1657 (SrcTy (getTop VPR128:$Rn)))), 1658 (DestTy (Neon_vdup (i32 ImmTy:$Imm))))))], 1659 NoItinerary>; 1660 1661multiclass NeonI_N2VShLL<string prefix, bit u, bits<5> opcode, string asmop, 1662 SDNode ExtOp> { 1663 // 64-bit vector types. 1664 def _8B : N2VShiftLong<0b0, u, opcode, asmop, "8h", "8b", v8i16, v8i8, 1665 shl_imm8, ExtOp> { 1666 let Inst{22-19} = 0b0001; // immh:immb = 0001xxx 1667 } 1668 1669 def _4H : N2VShiftLong<0b0, u, opcode, asmop, "4s", "4h", v4i32, v4i16, 1670 shl_imm16, ExtOp> { 1671 let Inst{22-20} = 0b001; // immh:immb = 001xxxx 1672 } 1673 1674 def _2S : N2VShiftLong<0b0, u, opcode, asmop, "2d", "2s", v2i64, v2i32, 1675 shl_imm32, ExtOp> { 1676 let Inst{22-21} = 0b01; // immh:immb = 01xxxxx 1677 } 1678 1679 // 128-bit vector types 1680 def _16B : N2VShiftLongHigh<0b1, u, opcode, asmop, "8h", "16b", v8i16, v8i8, 1681 8, shl_imm8, ExtOp, Neon_High16B> { 1682 let Inst{22-19} = 0b0001; // immh:immb = 0001xxx 1683 } 1684 1685 def _8H : N2VShiftLongHigh<0b1, u, opcode, asmop, "4s", "8h", v4i32, v4i16, 1686 4, shl_imm16, ExtOp, Neon_High8H> { 1687 let Inst{22-20} = 0b001; // immh:immb = 001xxxx 1688 } 1689 1690 def _4S : N2VShiftLongHigh<0b1, u, opcode, asmop, "2d", "4s", v2i64, v2i32, 1691 2, shl_imm32, ExtOp, Neon_High4S> { 1692 let Inst{22-21} = 0b01; // immh:immb = 01xxxxx 1693 } 1694 1695 // Use other patterns to match when the immediate is 0. 1696 def : Pat<(v8i16 (ExtOp (v8i8 VPR64:$Rn))), 1697 (!cast<Instruction>(prefix # "_8B") VPR64:$Rn, 0)>; 1698 1699 def : Pat<(v4i32 (ExtOp (v4i16 VPR64:$Rn))), 1700 (!cast<Instruction>(prefix # "_4H") VPR64:$Rn, 0)>; 1701 1702 def : Pat<(v2i64 (ExtOp (v2i32 VPR64:$Rn))), 1703 (!cast<Instruction>(prefix # "_2S") VPR64:$Rn, 0)>; 1704 1705 def : Pat<(v8i16 (ExtOp (v8i8 (Neon_High16B VPR128:$Rn)))), 1706 (!cast<Instruction>(prefix # "_16B") VPR128:$Rn, 0)>; 1707 1708 def : Pat<(v4i32 (ExtOp (v4i16 (Neon_High8H VPR128:$Rn)))), 1709 (!cast<Instruction>(prefix # "_8H") VPR128:$Rn, 0)>; 1710 1711 def : Pat<(v2i64 (ExtOp (v2i32 (Neon_High4S VPR128:$Rn)))), 1712 (!cast<Instruction>(prefix # "_4S") VPR128:$Rn, 0)>; 1713} 1714 1715// Shift left long 1716defm SSHLLvvi : NeonI_N2VShLL<"SSHLLvvi", 0b0, 0b10100, "sshll", sext>; 1717defm USHLLvvi : NeonI_N2VShLL<"USHLLvvi", 0b1, 0b10100, "ushll", zext>; 1718 1719// Rounding/Saturating shift 1720class N2VShift_RQ<bit q, bit u, bits<5> opcode, string asmop, string T, 1721 RegisterOperand VPRC, ValueType Ty, Operand ImmTy, 1722 SDPatternOperator OpNode> 1723 : NeonI_2VShiftImm<q, u, opcode, 1724 (outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm), 1725 asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm", 1726 [(set (Ty VPRC:$Rd), (Ty (OpNode (Ty VPRC:$Rn), 1727 (i32 ImmTy:$Imm))))], 1728 NoItinerary>; 1729 1730// shift right (vector by immediate) 1731multiclass NeonI_N2VShR_RQ<bit u, bits<5> opcode, string asmop, 1732 SDPatternOperator OpNode> { 1733 def _8B : N2VShift_RQ<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8, 1734 OpNode> { 1735 let Inst{22-19} = 0b0001; 1736 } 1737 1738 def _4H : N2VShift_RQ<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16, 1739 OpNode> { 1740 let Inst{22-20} = 0b001; 1741 } 1742 1743 def _2S : N2VShift_RQ<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32, 1744 OpNode> { 1745 let Inst{22-21} = 0b01; 1746 } 1747 1748 def _16B : N2VShift_RQ<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8, 1749 OpNode> { 1750 let Inst{22-19} = 0b0001; 1751 } 1752 1753 def _8H : N2VShift_RQ<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16, 1754 OpNode> { 1755 let Inst{22-20} = 0b001; 1756 } 1757 1758 def _4S : N2VShift_RQ<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32, 1759 OpNode> { 1760 let Inst{22-21} = 0b01; 1761 } 1762 1763 def _2D : N2VShift_RQ<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64, 1764 OpNode> { 1765 let Inst{22} = 0b1; 1766 } 1767} 1768 1769multiclass NeonI_N2VShL_Q<bit u, bits<5> opcode, string asmop, 1770 SDPatternOperator OpNode> { 1771 // 64-bit vector types. 1772 def _8B : N2VShift_RQ<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shl_imm8, 1773 OpNode> { 1774 let Inst{22-19} = 0b0001; 1775 } 1776 1777 def _4H : N2VShift_RQ<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shl_imm16, 1778 OpNode> { 1779 let Inst{22-20} = 0b001; 1780 } 1781 1782 def _2S : N2VShift_RQ<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shl_imm32, 1783 OpNode> { 1784 let Inst{22-21} = 0b01; 1785 } 1786 1787 // 128-bit vector types. 1788 def _16B : N2VShift_RQ<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shl_imm8, 1789 OpNode> { 1790 let Inst{22-19} = 0b0001; 1791 } 1792 1793 def _8H : N2VShift_RQ<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shl_imm16, 1794 OpNode> { 1795 let Inst{22-20} = 0b001; 1796 } 1797 1798 def _4S : N2VShift_RQ<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shl_imm32, 1799 OpNode> { 1800 let Inst{22-21} = 0b01; 1801 } 1802 1803 def _2D : N2VShift_RQ<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shl_imm64, 1804 OpNode> { 1805 let Inst{22} = 0b1; 1806 } 1807} 1808 1809// Rounding shift right 1810defm SRSHRvvi : NeonI_N2VShR_RQ<0b0, 0b00100, "srshr", 1811 int_aarch64_neon_vsrshr>; 1812defm URSHRvvi : NeonI_N2VShR_RQ<0b1, 0b00100, "urshr", 1813 int_aarch64_neon_vurshr>; 1814 1815// Saturating shift left unsigned 1816defm SQSHLUvvi : NeonI_N2VShL_Q<0b1, 0b01100, "sqshlu", int_aarch64_neon_vsqshlu>; 1817 1818// Saturating shift left 1819defm SQSHLvvi : NeonI_N2VShL_Q<0b0, 0b01110, "sqshl", Neon_sqrshlImm>; 1820defm UQSHLvvi : NeonI_N2VShL_Q<0b1, 0b01110, "uqshl", Neon_uqrshlImm>; 1821 1822class N2VShiftAdd<bit q, bit u, bits<5> opcode, string asmop, string T, 1823 RegisterOperand VPRC, ValueType Ty, Operand ImmTy, 1824 SDNode OpNode> 1825 : NeonI_2VShiftImm<q, u, opcode, 1826 (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm), 1827 asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm", 1828 [(set (Ty VPRC:$Rd), (Ty (add (Ty VPRC:$src), 1829 (Ty (OpNode (Ty VPRC:$Rn), 1830 (Ty (Neon_vdup (i32 ImmTy:$Imm))))))))], 1831 NoItinerary> { 1832 let Constraints = "$src = $Rd"; 1833} 1834 1835// Shift Right accumulate 1836multiclass NeonI_N2VShRAdd<bit u, bits<5> opcode, string asmop, SDNode OpNode> { 1837 def _8B : N2VShiftAdd<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8, 1838 OpNode> { 1839 let Inst{22-19} = 0b0001; 1840 } 1841 1842 def _4H : N2VShiftAdd<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16, 1843 OpNode> { 1844 let Inst{22-20} = 0b001; 1845 } 1846 1847 def _2S : N2VShiftAdd<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32, 1848 OpNode> { 1849 let Inst{22-21} = 0b01; 1850 } 1851 1852 def _16B : N2VShiftAdd<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8, 1853 OpNode> { 1854 let Inst{22-19} = 0b0001; 1855 } 1856 1857 def _8H : N2VShiftAdd<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16, 1858 OpNode> { 1859 let Inst{22-20} = 0b001; 1860 } 1861 1862 def _4S : N2VShiftAdd<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32, 1863 OpNode> { 1864 let Inst{22-21} = 0b01; 1865 } 1866 1867 def _2D : N2VShiftAdd<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64, 1868 OpNode> { 1869 let Inst{22} = 0b1; 1870 } 1871} 1872 1873// Shift right and accumulate 1874defm SSRAvvi : NeonI_N2VShRAdd<0, 0b00010, "ssra", sra>; 1875defm USRAvvi : NeonI_N2VShRAdd<1, 0b00010, "usra", srl>; 1876 1877// Rounding shift accumulate 1878class N2VShiftAdd_R<bit q, bit u, bits<5> opcode, string asmop, string T, 1879 RegisterOperand VPRC, ValueType Ty, Operand ImmTy, 1880 SDPatternOperator OpNode> 1881 : NeonI_2VShiftImm<q, u, opcode, 1882 (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm), 1883 asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm", 1884 [(set (Ty VPRC:$Rd), (Ty (add (Ty VPRC:$src), 1885 (Ty (OpNode (Ty VPRC:$Rn), (i32 ImmTy:$Imm))))))], 1886 NoItinerary> { 1887 let Constraints = "$src = $Rd"; 1888} 1889 1890multiclass NeonI_N2VShRAdd_R<bit u, bits<5> opcode, string asmop, 1891 SDPatternOperator OpNode> { 1892 def _8B : N2VShiftAdd_R<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8, 1893 OpNode> { 1894 let Inst{22-19} = 0b0001; 1895 } 1896 1897 def _4H : N2VShiftAdd_R<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16, 1898 OpNode> { 1899 let Inst{22-20} = 0b001; 1900 } 1901 1902 def _2S : N2VShiftAdd_R<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32, 1903 OpNode> { 1904 let Inst{22-21} = 0b01; 1905 } 1906 1907 def _16B : N2VShiftAdd_R<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8, 1908 OpNode> { 1909 let Inst{22-19} = 0b0001; 1910 } 1911 1912 def _8H : N2VShiftAdd_R<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16, 1913 OpNode> { 1914 let Inst{22-20} = 0b001; 1915 } 1916 1917 def _4S : N2VShiftAdd_R<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32, 1918 OpNode> { 1919 let Inst{22-21} = 0b01; 1920 } 1921 1922 def _2D : N2VShiftAdd_R<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64, 1923 OpNode> { 1924 let Inst{22} = 0b1; 1925 } 1926} 1927 1928// Rounding shift right and accumulate 1929defm SRSRAvvi : NeonI_N2VShRAdd_R<0, 0b00110, "srsra", int_aarch64_neon_vsrshr>; 1930defm URSRAvvi : NeonI_N2VShRAdd_R<1, 0b00110, "ursra", int_aarch64_neon_vurshr>; 1931 1932// Shift insert by immediate 1933class N2VShiftIns<bit q, bit u, bits<5> opcode, string asmop, string T, 1934 RegisterOperand VPRC, ValueType Ty, Operand ImmTy, 1935 SDPatternOperator OpNode> 1936 : NeonI_2VShiftImm<q, u, opcode, 1937 (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm), 1938 asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm", 1939 [(set (Ty VPRC:$Rd), (Ty (OpNode (Ty VPRC:$src), (Ty VPRC:$Rn), 1940 (i32 ImmTy:$Imm))))], 1941 NoItinerary> { 1942 let Constraints = "$src = $Rd"; 1943} 1944 1945// shift left insert (vector by immediate) 1946multiclass NeonI_N2VShLIns<bit u, bits<5> opcode, string asmop> { 1947 def _8B : N2VShiftIns<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shl_imm8, 1948 int_aarch64_neon_vsli> { 1949 let Inst{22-19} = 0b0001; 1950 } 1951 1952 def _4H : N2VShiftIns<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shl_imm16, 1953 int_aarch64_neon_vsli> { 1954 let Inst{22-20} = 0b001; 1955 } 1956 1957 def _2S : N2VShiftIns<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shl_imm32, 1958 int_aarch64_neon_vsli> { 1959 let Inst{22-21} = 0b01; 1960 } 1961 1962 // 128-bit vector types 1963 def _16B : N2VShiftIns<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shl_imm8, 1964 int_aarch64_neon_vsli> { 1965 let Inst{22-19} = 0b0001; 1966 } 1967 1968 def _8H : N2VShiftIns<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shl_imm16, 1969 int_aarch64_neon_vsli> { 1970 let Inst{22-20} = 0b001; 1971 } 1972 1973 def _4S : N2VShiftIns<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shl_imm32, 1974 int_aarch64_neon_vsli> { 1975 let Inst{22-21} = 0b01; 1976 } 1977 1978 def _2D : N2VShiftIns<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shl_imm64, 1979 int_aarch64_neon_vsli> { 1980 let Inst{22} = 0b1; 1981 } 1982} 1983 1984// shift right insert (vector by immediate) 1985multiclass NeonI_N2VShRIns<bit u, bits<5> opcode, string asmop> { 1986 // 64-bit vector types. 1987 def _8B : N2VShiftIns<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8, 1988 int_aarch64_neon_vsri> { 1989 let Inst{22-19} = 0b0001; 1990 } 1991 1992 def _4H : N2VShiftIns<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16, 1993 int_aarch64_neon_vsri> { 1994 let Inst{22-20} = 0b001; 1995 } 1996 1997 def _2S : N2VShiftIns<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32, 1998 int_aarch64_neon_vsri> { 1999 let Inst{22-21} = 0b01; 2000 } 2001 2002 // 128-bit vector types 2003 def _16B : N2VShiftIns<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8, 2004 int_aarch64_neon_vsri> { 2005 let Inst{22-19} = 0b0001; 2006 } 2007 2008 def _8H : N2VShiftIns<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16, 2009 int_aarch64_neon_vsri> { 2010 let Inst{22-20} = 0b001; 2011 } 2012 2013 def _4S : N2VShiftIns<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32, 2014 int_aarch64_neon_vsri> { 2015 let Inst{22-21} = 0b01; 2016 } 2017 2018 def _2D : N2VShiftIns<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64, 2019 int_aarch64_neon_vsri> { 2020 let Inst{22} = 0b1; 2021 } 2022} 2023 2024// Shift left and insert 2025defm SLIvvi : NeonI_N2VShLIns<0b1, 0b01010, "sli">; 2026 2027// Shift right and insert 2028defm SRIvvi : NeonI_N2VShRIns<0b1, 0b01000, "sri">; 2029 2030class N2VShR_Narrow<bit q, bit u, bits<5> opcode, string asmop, string DestT, 2031 string SrcT, Operand ImmTy> 2032 : NeonI_2VShiftImm<q, u, opcode, 2033 (outs VPR64:$Rd), (ins VPR128:$Rn, ImmTy:$Imm), 2034 asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm", 2035 [], NoItinerary>; 2036 2037class N2VShR_Narrow_Hi<bit q, bit u, bits<5> opcode, string asmop, string DestT, 2038 string SrcT, Operand ImmTy> 2039 : NeonI_2VShiftImm<q, u, opcode, (outs VPR128:$Rd), 2040 (ins VPR128:$src, VPR128:$Rn, ImmTy:$Imm), 2041 asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm", 2042 [], NoItinerary> { 2043 let Constraints = "$src = $Rd"; 2044} 2045 2046// left long shift by immediate 2047multiclass NeonI_N2VShR_Narrow<bit u, bits<5> opcode, string asmop> { 2048 def _8B : N2VShR_Narrow<0b0, u, opcode, asmop, "8b", "8h", shr_imm8> { 2049 let Inst{22-19} = 0b0001; 2050 } 2051 2052 def _4H : N2VShR_Narrow<0b0, u, opcode, asmop, "4h", "4s", shr_imm16> { 2053 let Inst{22-20} = 0b001; 2054 } 2055 2056 def _2S : N2VShR_Narrow<0b0, u, opcode, asmop, "2s", "2d", shr_imm32> { 2057 let Inst{22-21} = 0b01; 2058 } 2059 2060 // Shift Narrow High 2061 def _16B : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "16b", "8h", 2062 shr_imm8> { 2063 let Inst{22-19} = 0b0001; 2064 } 2065 2066 def _8H : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "8h", "4s", 2067 shr_imm16> { 2068 let Inst{22-20} = 0b001; 2069 } 2070 2071 def _4S : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "4s", "2d", 2072 shr_imm32> { 2073 let Inst{22-21} = 0b01; 2074 } 2075} 2076 2077// Shift right narrow 2078defm SHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10000, "shrn">; 2079 2080// Shift right narrow (prefix Q is saturating, prefix R is rounding) 2081defm QSHRUNvvi :NeonI_N2VShR_Narrow<0b1, 0b10000, "sqshrun">; 2082defm RSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10001, "rshrn">; 2083defm QRSHRUNvvi : NeonI_N2VShR_Narrow<0b1, 0b10001, "sqrshrun">; 2084defm SQSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10010, "sqshrn">; 2085defm UQSHRNvvi : NeonI_N2VShR_Narrow<0b1, 0b10010, "uqshrn">; 2086defm SQRSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10011, "sqrshrn">; 2087defm UQRSHRNvvi : NeonI_N2VShR_Narrow<0b1, 0b10011, "uqrshrn">; 2088 2089def Neon_combine_2D : PatFrag<(ops node:$Rm, node:$Rn), 2090 (v2i64 (concat_vectors (v1i64 node:$Rm), 2091 (v1i64 node:$Rn)))>; 2092def Neon_combine_8H : PatFrag<(ops node:$Rm, node:$Rn), 2093 (v8i16 (concat_vectors (v4i16 node:$Rm), 2094 (v4i16 node:$Rn)))>; 2095def Neon_combine_4S : PatFrag<(ops node:$Rm, node:$Rn), 2096 (v4i32 (concat_vectors (v2i32 node:$Rm), 2097 (v2i32 node:$Rn)))>; 2098def Neon_combine_4f : PatFrag<(ops node:$Rm, node:$Rn), 2099 (v4f32 (concat_vectors (v2f32 node:$Rm), 2100 (v2f32 node:$Rn)))>; 2101def Neon_combine_2d : PatFrag<(ops node:$Rm, node:$Rn), 2102 (v2f64 (concat_vectors (v1f64 node:$Rm), 2103 (v1f64 node:$Rn)))>; 2104 2105def Neon_lshrImm8H : PatFrag<(ops node:$lhs, node:$rhs), 2106 (v8i16 (srl (v8i16 node:$lhs), 2107 (v8i16 (Neon_vdup (i32 node:$rhs)))))>; 2108def Neon_lshrImm4S : PatFrag<(ops node:$lhs, node:$rhs), 2109 (v4i32 (srl (v4i32 node:$lhs), 2110 (v4i32 (Neon_vdup (i32 node:$rhs)))))>; 2111def Neon_lshrImm2D : PatFrag<(ops node:$lhs, node:$rhs), 2112 (v2i64 (srl (v2i64 node:$lhs), 2113 (v2i64 (Neon_vdup (i32 node:$rhs)))))>; 2114def Neon_ashrImm8H : PatFrag<(ops node:$lhs, node:$rhs), 2115 (v8i16 (sra (v8i16 node:$lhs), 2116 (v8i16 (Neon_vdup (i32 node:$rhs)))))>; 2117def Neon_ashrImm4S : PatFrag<(ops node:$lhs, node:$rhs), 2118 (v4i32 (sra (v4i32 node:$lhs), 2119 (v4i32 (Neon_vdup (i32 node:$rhs)))))>; 2120def Neon_ashrImm2D : PatFrag<(ops node:$lhs, node:$rhs), 2121 (v2i64 (sra (v2i64 node:$lhs), 2122 (v2i64 (Neon_vdup (i32 node:$rhs)))))>; 2123 2124// Normal shift right narrow is matched by IR (srl/sra, trunc, concat_vectors) 2125multiclass Neon_shiftNarrow_patterns<string shr> { 2126 def : Pat<(v8i8 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm8H") VPR128:$Rn, 2127 (i32 shr_imm8:$Imm)))), 2128 (SHRNvvi_8B VPR128:$Rn, imm:$Imm)>; 2129 def : Pat<(v4i16 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm4S") VPR128:$Rn, 2130 (i32 shr_imm16:$Imm)))), 2131 (SHRNvvi_4H VPR128:$Rn, imm:$Imm)>; 2132 def : Pat<(v2i32 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm2D") VPR128:$Rn, 2133 (i32 shr_imm32:$Imm)))), 2134 (SHRNvvi_2S VPR128:$Rn, imm:$Imm)>; 2135 2136 def : Pat<(Neon_combine_2D (v1i64 VPR64:$src), (v1i64 (bitconvert 2137 (v8i8 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm8H") 2138 VPR128:$Rn, (i32 shr_imm8:$Imm))))))), 2139 (SHRNvvi_16B (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64)), 2140 VPR128:$Rn, imm:$Imm)>; 2141 def : Pat<(Neon_combine_2D (v1i64 VPR64:$src), (v1i64 (bitconvert 2142 (v4i16 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm4S") 2143 VPR128:$Rn, (i32 shr_imm16:$Imm))))))), 2144 (SHRNvvi_8H (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64), 2145 VPR128:$Rn, imm:$Imm)>; 2146 def : Pat<(Neon_combine_2D (v1i64 VPR64:$src), (v1i64 (bitconvert 2147 (v2i32 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm2D") 2148 VPR128:$Rn, (i32 shr_imm32:$Imm))))))), 2149 (SHRNvvi_4S (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64), 2150 VPR128:$Rn, imm:$Imm)>; 2151} 2152 2153multiclass Neon_shiftNarrow_QR_patterns<SDPatternOperator op, string prefix> { 2154 def : Pat<(v8i8 (op (v8i16 VPR128:$Rn), shr_imm8:$Imm)), 2155 (!cast<Instruction>(prefix # "_8B") VPR128:$Rn, imm:$Imm)>; 2156 def : Pat<(v4i16 (op (v4i32 VPR128:$Rn), shr_imm16:$Imm)), 2157 (!cast<Instruction>(prefix # "_4H") VPR128:$Rn, imm:$Imm)>; 2158 def : Pat<(v2i32 (op (v2i64 VPR128:$Rn), shr_imm32:$Imm)), 2159 (!cast<Instruction>(prefix # "_2S") VPR128:$Rn, imm:$Imm)>; 2160 2161 def : Pat<(Neon_combine_2D (v1i64 VPR64:$src), 2162 (v1i64 (bitconvert (v8i8 2163 (op (v8i16 VPR128:$Rn), shr_imm8:$Imm))))), 2164 (!cast<Instruction>(prefix # "_16B") 2165 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64), 2166 VPR128:$Rn, imm:$Imm)>; 2167 def : Pat<(Neon_combine_2D (v1i64 VPR64:$src), 2168 (v1i64 (bitconvert (v4i16 2169 (op (v4i32 VPR128:$Rn), shr_imm16:$Imm))))), 2170 (!cast<Instruction>(prefix # "_8H") 2171 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64), 2172 VPR128:$Rn, imm:$Imm)>; 2173 def : Pat<(Neon_combine_2D (v1i64 VPR64:$src), 2174 (v1i64 (bitconvert (v2i32 2175 (op (v2i64 VPR128:$Rn), shr_imm32:$Imm))))), 2176 (!cast<Instruction>(prefix # "_4S") 2177 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64), 2178 VPR128:$Rn, imm:$Imm)>; 2179} 2180 2181defm : Neon_shiftNarrow_patterns<"lshr">; 2182defm : Neon_shiftNarrow_patterns<"ashr">; 2183 2184defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqshrun, "QSHRUNvvi">; 2185defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vrshrn, "RSHRNvvi">; 2186defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqrshrun, "QRSHRUNvvi">; 2187defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqshrn, "SQSHRNvvi">; 2188defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vuqshrn, "UQSHRNvvi">; 2189defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqrshrn, "SQRSHRNvvi">; 2190defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vuqrshrn, "UQRSHRNvvi">; 2191 2192// Convert fix-point and float-pointing 2193class N2VCvt_Fx<bit q, bit u, bits<5> opcode, string asmop, string T, 2194 RegisterOperand VPRC, ValueType DestTy, ValueType SrcTy, 2195 Operand ImmTy, SDPatternOperator IntOp> 2196 : NeonI_2VShiftImm<q, u, opcode, 2197 (outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm), 2198 asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm", 2199 [(set (DestTy VPRC:$Rd), (DestTy (IntOp (SrcTy VPRC:$Rn), 2200 (i32 ImmTy:$Imm))))], 2201 NoItinerary>; 2202 2203multiclass NeonI_N2VCvt_Fx2fp<bit u, bits<5> opcode, string asmop, 2204 SDPatternOperator IntOp> { 2205 def _2S : N2VCvt_Fx<0, u, opcode, asmop, "2s", VPR64, v2f32, v2i32, 2206 shr_imm32, IntOp> { 2207 let Inst{22-21} = 0b01; 2208 } 2209 2210 def _4S : N2VCvt_Fx<1, u, opcode, asmop, "4s", VPR128, v4f32, v4i32, 2211 shr_imm32, IntOp> { 2212 let Inst{22-21} = 0b01; 2213 } 2214 2215 def _2D : N2VCvt_Fx<1, u, opcode, asmop, "2d", VPR128, v2f64, v2i64, 2216 shr_imm64, IntOp> { 2217 let Inst{22} = 0b1; 2218 } 2219} 2220 2221multiclass NeonI_N2VCvt_Fp2fx<bit u, bits<5> opcode, string asmop, 2222 SDPatternOperator IntOp> { 2223 def _2S : N2VCvt_Fx<0, u, opcode, asmop, "2s", VPR64, v2i32, v2f32, 2224 shr_imm32, IntOp> { 2225 let Inst{22-21} = 0b01; 2226 } 2227 2228 def _4S : N2VCvt_Fx<1, u, opcode, asmop, "4s", VPR128, v4i32, v4f32, 2229 shr_imm32, IntOp> { 2230 let Inst{22-21} = 0b01; 2231 } 2232 2233 def _2D : N2VCvt_Fx<1, u, opcode, asmop, "2d", VPR128, v2i64, v2f64, 2234 shr_imm64, IntOp> { 2235 let Inst{22} = 0b1; 2236 } 2237} 2238 2239// Convert fixed-point to floating-point 2240defm VCVTxs2f : NeonI_N2VCvt_Fx2fp<0, 0b11100, "scvtf", 2241 int_arm_neon_vcvtfxs2fp>; 2242defm VCVTxu2f : NeonI_N2VCvt_Fx2fp<1, 0b11100, "ucvtf", 2243 int_arm_neon_vcvtfxu2fp>; 2244 2245// Convert floating-point to fixed-point 2246defm VCVTf2xs : NeonI_N2VCvt_Fp2fx<0, 0b11111, "fcvtzs", 2247 int_arm_neon_vcvtfp2fxs>; 2248defm VCVTf2xu : NeonI_N2VCvt_Fp2fx<1, 0b11111, "fcvtzu", 2249 int_arm_neon_vcvtfp2fxu>; 2250 2251multiclass Neon_sshll2_0<SDNode ext> 2252{ 2253 def _v8i8 : PatFrag<(ops node:$Rn), 2254 (v8i16 (ext (v8i8 (Neon_High16B node:$Rn))))>; 2255 def _v4i16 : PatFrag<(ops node:$Rn), 2256 (v4i32 (ext (v4i16 (Neon_High8H node:$Rn))))>; 2257 def _v2i32 : PatFrag<(ops node:$Rn), 2258 (v2i64 (ext (v2i32 (Neon_High4S node:$Rn))))>; 2259} 2260 2261defm NI_sext_high : Neon_sshll2_0<sext>; 2262defm NI_zext_high : Neon_sshll2_0<zext>; 2263 2264 2265//===----------------------------------------------------------------------===// 2266// Multiclasses for NeonI_Across 2267//===----------------------------------------------------------------------===// 2268 2269// Variant 1 2270 2271multiclass NeonI_2VAcross_1<bit u, bits<5> opcode, 2272 string asmop, SDPatternOperator opnode> 2273{ 2274 def _1h8b: NeonI_2VAcross<0b0, u, 0b00, opcode, 2275 (outs FPR16:$Rd), (ins VPR64:$Rn), 2276 asmop # "\t$Rd, $Rn.8b", 2277 [(set (v1i16 FPR16:$Rd), 2278 (v1i16 (opnode (v8i8 VPR64:$Rn))))], 2279 NoItinerary>; 2280 2281 def _1h16b: NeonI_2VAcross<0b1, u, 0b00, opcode, 2282 (outs FPR16:$Rd), (ins VPR128:$Rn), 2283 asmop # "\t$Rd, $Rn.16b", 2284 [(set (v1i16 FPR16:$Rd), 2285 (v1i16 (opnode (v16i8 VPR128:$Rn))))], 2286 NoItinerary>; 2287 2288 def _1s4h: NeonI_2VAcross<0b0, u, 0b01, opcode, 2289 (outs FPR32:$Rd), (ins VPR64:$Rn), 2290 asmop # "\t$Rd, $Rn.4h", 2291 [(set (v1i32 FPR32:$Rd), 2292 (v1i32 (opnode (v4i16 VPR64:$Rn))))], 2293 NoItinerary>; 2294 2295 def _1s8h: NeonI_2VAcross<0b1, u, 0b01, opcode, 2296 (outs FPR32:$Rd), (ins VPR128:$Rn), 2297 asmop # "\t$Rd, $Rn.8h", 2298 [(set (v1i32 FPR32:$Rd), 2299 (v1i32 (opnode (v8i16 VPR128:$Rn))))], 2300 NoItinerary>; 2301 2302 // _1d2s doesn't exist! 2303 2304 def _1d4s: NeonI_2VAcross<0b1, u, 0b10, opcode, 2305 (outs FPR64:$Rd), (ins VPR128:$Rn), 2306 asmop # "\t$Rd, $Rn.4s", 2307 [(set (v1i64 FPR64:$Rd), 2308 (v1i64 (opnode (v4i32 VPR128:$Rn))))], 2309 NoItinerary>; 2310} 2311 2312defm SADDLV : NeonI_2VAcross_1<0b0, 0b00011, "saddlv", int_aarch64_neon_saddlv>; 2313defm UADDLV : NeonI_2VAcross_1<0b1, 0b00011, "uaddlv", int_aarch64_neon_uaddlv>; 2314 2315// Variant 2 2316 2317multiclass NeonI_2VAcross_2<bit u, bits<5> opcode, 2318 string asmop, SDPatternOperator opnode> 2319{ 2320 def _1b8b: NeonI_2VAcross<0b0, u, 0b00, opcode, 2321 (outs FPR8:$Rd), (ins VPR64:$Rn), 2322 asmop # "\t$Rd, $Rn.8b", 2323 [(set (v1i8 FPR8:$Rd), 2324 (v1i8 (opnode (v8i8 VPR64:$Rn))))], 2325 NoItinerary>; 2326 2327 def _1b16b: NeonI_2VAcross<0b1, u, 0b00, opcode, 2328 (outs FPR8:$Rd), (ins VPR128:$Rn), 2329 asmop # "\t$Rd, $Rn.16b", 2330 [(set (v1i8 FPR8:$Rd), 2331 (v1i8 (opnode (v16i8 VPR128:$Rn))))], 2332 NoItinerary>; 2333 2334 def _1h4h: NeonI_2VAcross<0b0, u, 0b01, opcode, 2335 (outs FPR16:$Rd), (ins VPR64:$Rn), 2336 asmop # "\t$Rd, $Rn.4h", 2337 [(set (v1i16 FPR16:$Rd), 2338 (v1i16 (opnode (v4i16 VPR64:$Rn))))], 2339 NoItinerary>; 2340 2341 def _1h8h: NeonI_2VAcross<0b1, u, 0b01, opcode, 2342 (outs FPR16:$Rd), (ins VPR128:$Rn), 2343 asmop # "\t$Rd, $Rn.8h", 2344 [(set (v1i16 FPR16:$Rd), 2345 (v1i16 (opnode (v8i16 VPR128:$Rn))))], 2346 NoItinerary>; 2347 2348 // _1s2s doesn't exist! 2349 2350 def _1s4s: NeonI_2VAcross<0b1, u, 0b10, opcode, 2351 (outs FPR32:$Rd), (ins VPR128:$Rn), 2352 asmop # "\t$Rd, $Rn.4s", 2353 [(set (v1i32 FPR32:$Rd), 2354 (v1i32 (opnode (v4i32 VPR128:$Rn))))], 2355 NoItinerary>; 2356} 2357 2358defm SMAXV : NeonI_2VAcross_2<0b0, 0b01010, "smaxv", int_aarch64_neon_smaxv>; 2359defm UMAXV : NeonI_2VAcross_2<0b1, 0b01010, "umaxv", int_aarch64_neon_umaxv>; 2360 2361defm SMINV : NeonI_2VAcross_2<0b0, 0b11010, "sminv", int_aarch64_neon_sminv>; 2362defm UMINV : NeonI_2VAcross_2<0b1, 0b11010, "uminv", int_aarch64_neon_uminv>; 2363 2364defm ADDV : NeonI_2VAcross_2<0b0, 0b11011, "addv", int_aarch64_neon_vaddv>; 2365 2366// Variant 3 2367 2368multiclass NeonI_2VAcross_3<bit u, bits<5> opcode, bits<2> size, 2369 string asmop, SDPatternOperator opnode> { 2370 def _1s4s: NeonI_2VAcross<0b1, u, size, opcode, 2371 (outs FPR32:$Rd), (ins VPR128:$Rn), 2372 asmop # "\t$Rd, $Rn.4s", 2373 [(set (v1f32 FPR32:$Rd), 2374 (v1f32 (opnode (v4f32 VPR128:$Rn))))], 2375 NoItinerary>; 2376} 2377 2378defm FMAXNMV : NeonI_2VAcross_3<0b1, 0b01100, 0b00, "fmaxnmv", 2379 int_aarch64_neon_vmaxnmv>; 2380defm FMINNMV : NeonI_2VAcross_3<0b1, 0b01100, 0b10, "fminnmv", 2381 int_aarch64_neon_vminnmv>; 2382 2383defm FMAXV : NeonI_2VAcross_3<0b1, 0b01111, 0b00, "fmaxv", 2384 int_aarch64_neon_vmaxv>; 2385defm FMINV : NeonI_2VAcross_3<0b1, 0b01111, 0b10, "fminv", 2386 int_aarch64_neon_vminv>; 2387 2388// The followings are for instruction class (Perm) 2389 2390class NeonI_Permute<bit q, bits<2> size, bits<3> opcode, 2391 string asmop, RegisterOperand OpVPR, string OpS, 2392 SDPatternOperator opnode, ValueType Ty> 2393 : NeonI_Perm<q, size, opcode, 2394 (outs OpVPR:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm), 2395 asmop # "\t$Rd." # OpS # ", $Rn." # OpS # ", $Rm." # OpS, 2396 [(set (Ty OpVPR:$Rd), 2397 (Ty (opnode (Ty OpVPR:$Rn), (Ty OpVPR:$Rm))))], 2398 NoItinerary>; 2399 2400multiclass NeonI_Perm_pat<bits<3> opcode, string asmop, 2401 SDPatternOperator opnode> { 2402 def _8b : NeonI_Permute<0b0, 0b00, opcode, asmop, 2403 VPR64, "8b", opnode, v8i8>; 2404 def _16b : NeonI_Permute<0b1, 0b00, opcode, asmop, 2405 VPR128, "16b",opnode, v16i8>; 2406 def _4h : NeonI_Permute<0b0, 0b01, opcode, asmop, 2407 VPR64, "4h", opnode, v4i16>; 2408 def _8h : NeonI_Permute<0b1, 0b01, opcode, asmop, 2409 VPR128, "8h", opnode, v8i16>; 2410 def _2s : NeonI_Permute<0b0, 0b10, opcode, asmop, 2411 VPR64, "2s", opnode, v2i32>; 2412 def _4s : NeonI_Permute<0b1, 0b10, opcode, asmop, 2413 VPR128, "4s", opnode, v4i32>; 2414 def _2d : NeonI_Permute<0b1, 0b11, opcode, asmop, 2415 VPR128, "2d", opnode, v2i64>; 2416} 2417 2418defm UZP1vvv : NeonI_Perm_pat<0b001, "uzp1", Neon_uzp1>; 2419defm TRN1vvv : NeonI_Perm_pat<0b010, "trn1", Neon_trn1>; 2420defm ZIP1vvv : NeonI_Perm_pat<0b011, "zip1", Neon_zip1>; 2421defm UZP2vvv : NeonI_Perm_pat<0b101, "uzp2", Neon_uzp2>; 2422defm TRN2vvv : NeonI_Perm_pat<0b110, "trn2", Neon_trn2>; 2423defm ZIP2vvv : NeonI_Perm_pat<0b111, "zip2", Neon_zip2>; 2424 2425multiclass NeonI_Perm_float_pat<string INS, SDPatternOperator opnode> { 2426 def : Pat<(v2f32 (opnode (v2f32 VPR64:$Rn), (v2f32 VPR64:$Rm))), 2427 (!cast<Instruction>(INS # "_2s") VPR64:$Rn, VPR64:$Rm)>; 2428 2429 def : Pat<(v4f32 (opnode (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rm))), 2430 (!cast<Instruction>(INS # "_4s") VPR128:$Rn, VPR128:$Rm)>; 2431 2432 def : Pat<(v2f64 (opnode (v2f64 VPR128:$Rn), (v2f64 VPR128:$Rm))), 2433 (!cast<Instruction>(INS # "_2d") VPR128:$Rn, VPR128:$Rm)>; 2434} 2435 2436defm : NeonI_Perm_float_pat<"UZP1vvv", Neon_uzp1>; 2437defm : NeonI_Perm_float_pat<"UZP2vvv", Neon_uzp2>; 2438defm : NeonI_Perm_float_pat<"ZIP1vvv", Neon_zip1>; 2439defm : NeonI_Perm_float_pat<"ZIP2vvv", Neon_zip2>; 2440defm : NeonI_Perm_float_pat<"TRN1vvv", Neon_trn1>; 2441defm : NeonI_Perm_float_pat<"TRN2vvv", Neon_trn2>; 2442 2443// The followings are for instruction class (3V Diff) 2444 2445// normal long/long2 pattern 2446class NeonI_3VDL<bit q, bit u, bits<2> size, bits<4> opcode, 2447 string asmop, string ResS, string OpS, 2448 SDPatternOperator opnode, SDPatternOperator ext, 2449 RegisterOperand OpVPR, 2450 ValueType ResTy, ValueType OpTy> 2451 : NeonI_3VDiff<q, u, size, opcode, 2452 (outs VPR128:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm), 2453 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS, 2454 [(set (ResTy VPR128:$Rd), 2455 (ResTy (opnode (ResTy (ext (OpTy OpVPR:$Rn))), 2456 (ResTy (ext (OpTy OpVPR:$Rm))))))], 2457 NoItinerary>; 2458 2459multiclass NeonI_3VDL_s<bit u, bits<4> opcode, 2460 string asmop, SDPatternOperator opnode, 2461 bit Commutable = 0> { 2462 let isCommutable = Commutable in { 2463 def _8h8b : NeonI_3VDL<0b0, u, 0b00, opcode, asmop, "8h", "8b", 2464 opnode, sext, VPR64, v8i16, v8i8>; 2465 def _4s4h : NeonI_3VDL<0b0, u, 0b01, opcode, asmop, "4s", "4h", 2466 opnode, sext, VPR64, v4i32, v4i16>; 2467 def _2d2s : NeonI_3VDL<0b0, u, 0b10, opcode, asmop, "2d", "2s", 2468 opnode, sext, VPR64, v2i64, v2i32>; 2469 } 2470} 2471 2472multiclass NeonI_3VDL2_s<bit u, bits<4> opcode, string asmop, 2473 SDPatternOperator opnode, bit Commutable = 0> { 2474 let isCommutable = Commutable in { 2475 def _8h16b : NeonI_3VDL<0b1, u, 0b00, opcode, asmop, "8h", "16b", 2476 opnode, NI_sext_high_v8i8, VPR128, v8i16, v16i8>; 2477 def _4s8h : NeonI_3VDL<0b1, u, 0b01, opcode, asmop, "4s", "8h", 2478 opnode, NI_sext_high_v4i16, VPR128, v4i32, v8i16>; 2479 def _2d4s : NeonI_3VDL<0b1, u, 0b10, opcode, asmop, "2d", "4s", 2480 opnode, NI_sext_high_v2i32, VPR128, v2i64, v4i32>; 2481 } 2482} 2483 2484multiclass NeonI_3VDL_u<bit u, bits<4> opcode, string asmop, 2485 SDPatternOperator opnode, bit Commutable = 0> { 2486 let isCommutable = Commutable in { 2487 def _8h8b : NeonI_3VDL<0b0, u, 0b00, opcode, asmop, "8h", "8b", 2488 opnode, zext, VPR64, v8i16, v8i8>; 2489 def _4s4h : NeonI_3VDL<0b0, u, 0b01, opcode, asmop, "4s", "4h", 2490 opnode, zext, VPR64, v4i32, v4i16>; 2491 def _2d2s : NeonI_3VDL<0b0, u, 0b10, opcode, asmop, "2d", "2s", 2492 opnode, zext, VPR64, v2i64, v2i32>; 2493 } 2494} 2495 2496multiclass NeonI_3VDL2_u<bit u, bits<4> opcode, string asmop, 2497 SDPatternOperator opnode, bit Commutable = 0> { 2498 let isCommutable = Commutable in { 2499 def _8h16b : NeonI_3VDL<0b1, u, 0b00, opcode, asmop, "8h", "16b", 2500 opnode, NI_zext_high_v8i8, VPR128, v8i16, v16i8>; 2501 def _4s8h : NeonI_3VDL<0b1, u, 0b01, opcode, asmop, "4s", "8h", 2502 opnode, NI_zext_high_v4i16, VPR128, v4i32, v8i16>; 2503 def _2d4s : NeonI_3VDL<0b1, u, 0b10, opcode, asmop, "2d", "4s", 2504 opnode, NI_zext_high_v2i32, VPR128, v2i64, v4i32>; 2505 } 2506} 2507 2508defm SADDLvvv : NeonI_3VDL_s<0b0, 0b0000, "saddl", add, 1>; 2509defm UADDLvvv : NeonI_3VDL_u<0b1, 0b0000, "uaddl", add, 1>; 2510 2511defm SADDL2vvv : NeonI_3VDL2_s<0b0, 0b0000, "saddl2", add, 1>; 2512defm UADDL2vvv : NeonI_3VDL2_u<0b1, 0b0000, "uaddl2", add, 1>; 2513 2514defm SSUBLvvv : NeonI_3VDL_s<0b0, 0b0010, "ssubl", sub, 0>; 2515defm USUBLvvv : NeonI_3VDL_u<0b1, 0b0010, "usubl", sub, 0>; 2516 2517defm SSUBL2vvv : NeonI_3VDL2_s<0b0, 0b0010, "ssubl2", sub, 0>; 2518defm USUBL2vvv : NeonI_3VDL2_u<0b1, 0b0010, "usubl2", sub, 0>; 2519 2520// normal wide/wide2 pattern 2521class NeonI_3VDW<bit q, bit u, bits<2> size, bits<4> opcode, 2522 string asmop, string ResS, string OpS, 2523 SDPatternOperator opnode, SDPatternOperator ext, 2524 RegisterOperand OpVPR, 2525 ValueType ResTy, ValueType OpTy> 2526 : NeonI_3VDiff<q, u, size, opcode, 2527 (outs VPR128:$Rd), (ins VPR128:$Rn, OpVPR:$Rm), 2528 asmop # "\t$Rd." # ResS # ", $Rn." # ResS # ", $Rm." # OpS, 2529 [(set (ResTy VPR128:$Rd), 2530 (ResTy (opnode (ResTy VPR128:$Rn), 2531 (ResTy (ext (OpTy OpVPR:$Rm))))))], 2532 NoItinerary>; 2533 2534multiclass NeonI_3VDW_s<bit u, bits<4> opcode, string asmop, 2535 SDPatternOperator opnode> { 2536 def _8h8b : NeonI_3VDW<0b0, u, 0b00, opcode, asmop, "8h", "8b", 2537 opnode, sext, VPR64, v8i16, v8i8>; 2538 def _4s4h : NeonI_3VDW<0b0, u, 0b01, opcode, asmop, "4s", "4h", 2539 opnode, sext, VPR64, v4i32, v4i16>; 2540 def _2d2s : NeonI_3VDW<0b0, u, 0b10, opcode, asmop, "2d", "2s", 2541 opnode, sext, VPR64, v2i64, v2i32>; 2542} 2543 2544defm SADDWvvv : NeonI_3VDW_s<0b0, 0b0001, "saddw", add>; 2545defm SSUBWvvv : NeonI_3VDW_s<0b0, 0b0011, "ssubw", sub>; 2546 2547multiclass NeonI_3VDW2_s<bit u, bits<4> opcode, string asmop, 2548 SDPatternOperator opnode> { 2549 def _8h16b : NeonI_3VDW<0b1, u, 0b00, opcode, asmop, "8h", "16b", 2550 opnode, NI_sext_high_v8i8, VPR128, v8i16, v16i8>; 2551 def _4s8h : NeonI_3VDW<0b1, u, 0b01, opcode, asmop, "4s", "8h", 2552 opnode, NI_sext_high_v4i16, VPR128, v4i32, v8i16>; 2553 def _2d4s : NeonI_3VDW<0b1, u, 0b10, opcode, asmop, "2d", "4s", 2554 opnode, NI_sext_high_v2i32, VPR128, v2i64, v4i32>; 2555} 2556 2557defm SADDW2vvv : NeonI_3VDW2_s<0b0, 0b0001, "saddw2", add>; 2558defm SSUBW2vvv : NeonI_3VDW2_s<0b0, 0b0011, "ssubw2", sub>; 2559 2560multiclass NeonI_3VDW_u<bit u, bits<4> opcode, string asmop, 2561 SDPatternOperator opnode> { 2562 def _8h8b : NeonI_3VDW<0b0, u, 0b00, opcode, asmop, "8h", "8b", 2563 opnode, zext, VPR64, v8i16, v8i8>; 2564 def _4s4h : NeonI_3VDW<0b0, u, 0b01, opcode, asmop, "4s", "4h", 2565 opnode, zext, VPR64, v4i32, v4i16>; 2566 def _2d2s : NeonI_3VDW<0b0, u, 0b10, opcode, asmop, "2d", "2s", 2567 opnode, zext, VPR64, v2i64, v2i32>; 2568} 2569 2570defm UADDWvvv : NeonI_3VDW_u<0b1, 0b0001, "uaddw", add>; 2571defm USUBWvvv : NeonI_3VDW_u<0b1, 0b0011, "usubw", sub>; 2572 2573multiclass NeonI_3VDW2_u<bit u, bits<4> opcode, string asmop, 2574 SDPatternOperator opnode> { 2575 def _8h16b : NeonI_3VDW<0b1, u, 0b00, opcode, asmop, "8h", "16b", 2576 opnode, NI_zext_high_v8i8, VPR128, v8i16, v16i8>; 2577 def _4s8h : NeonI_3VDW<0b1, u, 0b01, opcode, asmop, "4s", "8h", 2578 opnode, NI_zext_high_v4i16, VPR128, v4i32, v8i16>; 2579 def _2d4s : NeonI_3VDW<0b1, u, 0b10, opcode, asmop, "2d", "4s", 2580 opnode, NI_zext_high_v2i32, VPR128, v2i64, v4i32>; 2581} 2582 2583defm UADDW2vvv : NeonI_3VDW2_u<0b1, 0b0001, "uaddw2", add>; 2584defm USUBW2vvv : NeonI_3VDW2_u<0b1, 0b0011, "usubw2", sub>; 2585 2586// Get the high half part of the vector element. 2587multiclass NeonI_get_high { 2588 def _8h : PatFrag<(ops node:$Rn), 2589 (v8i8 (trunc (v8i16 (srl (v8i16 node:$Rn), 2590 (v8i16 (Neon_vdup (i32 8)))))))>; 2591 def _4s : PatFrag<(ops node:$Rn), 2592 (v4i16 (trunc (v4i32 (srl (v4i32 node:$Rn), 2593 (v4i32 (Neon_vdup (i32 16)))))))>; 2594 def _2d : PatFrag<(ops node:$Rn), 2595 (v2i32 (trunc (v2i64 (srl (v2i64 node:$Rn), 2596 (v2i64 (Neon_vdup (i32 32)))))))>; 2597} 2598 2599defm NI_get_hi : NeonI_get_high; 2600 2601// pattern for addhn/subhn with 2 operands 2602class NeonI_3VDN_addhn_2Op<bit q, bit u, bits<2> size, bits<4> opcode, 2603 string asmop, string ResS, string OpS, 2604 SDPatternOperator opnode, SDPatternOperator get_hi, 2605 ValueType ResTy, ValueType OpTy> 2606 : NeonI_3VDiff<q, u, size, opcode, 2607 (outs VPR64:$Rd), (ins VPR128:$Rn, VPR128:$Rm), 2608 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS, 2609 [(set (ResTy VPR64:$Rd), 2610 (ResTy (get_hi 2611 (OpTy (opnode (OpTy VPR128:$Rn), 2612 (OpTy VPR128:$Rm))))))], 2613 NoItinerary>; 2614 2615multiclass NeonI_3VDN_addhn_2Op<bit u, bits<4> opcode, string asmop, 2616 SDPatternOperator opnode, bit Commutable = 0> { 2617 let isCommutable = Commutable in { 2618 def _8b8h : NeonI_3VDN_addhn_2Op<0b0, u, 0b00, opcode, asmop, "8b", "8h", 2619 opnode, NI_get_hi_8h, v8i8, v8i16>; 2620 def _4h4s : NeonI_3VDN_addhn_2Op<0b0, u, 0b01, opcode, asmop, "4h", "4s", 2621 opnode, NI_get_hi_4s, v4i16, v4i32>; 2622 def _2s2d : NeonI_3VDN_addhn_2Op<0b0, u, 0b10, opcode, asmop, "2s", "2d", 2623 opnode, NI_get_hi_2d, v2i32, v2i64>; 2624 } 2625} 2626 2627defm ADDHNvvv : NeonI_3VDN_addhn_2Op<0b0, 0b0100, "addhn", add, 1>; 2628defm SUBHNvvv : NeonI_3VDN_addhn_2Op<0b0, 0b0110, "subhn", sub, 0>; 2629 2630// pattern for operation with 2 operands 2631class NeonI_3VD_2Op<bit q, bit u, bits<2> size, bits<4> opcode, 2632 string asmop, string ResS, string OpS, 2633 SDPatternOperator opnode, 2634 RegisterOperand ResVPR, RegisterOperand OpVPR, 2635 ValueType ResTy, ValueType OpTy> 2636 : NeonI_3VDiff<q, u, size, opcode, 2637 (outs ResVPR:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm), 2638 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS, 2639 [(set (ResTy ResVPR:$Rd), 2640 (ResTy (opnode (OpTy OpVPR:$Rn), (OpTy OpVPR:$Rm))))], 2641 NoItinerary>; 2642 2643// normal narrow pattern 2644multiclass NeonI_3VDN_2Op<bit u, bits<4> opcode, string asmop, 2645 SDPatternOperator opnode, bit Commutable = 0> { 2646 let isCommutable = Commutable in { 2647 def _8b8h : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8b", "8h", 2648 opnode, VPR64, VPR128, v8i8, v8i16>; 2649 def _4h4s : NeonI_3VD_2Op<0b0, u, 0b01, opcode, asmop, "4h", "4s", 2650 opnode, VPR64, VPR128, v4i16, v4i32>; 2651 def _2s2d : NeonI_3VD_2Op<0b0, u, 0b10, opcode, asmop, "2s", "2d", 2652 opnode, VPR64, VPR128, v2i32, v2i64>; 2653 } 2654} 2655 2656defm RADDHNvvv : NeonI_3VDN_2Op<0b1, 0b0100, "raddhn", int_arm_neon_vraddhn, 1>; 2657defm RSUBHNvvv : NeonI_3VDN_2Op<0b1, 0b0110, "rsubhn", int_arm_neon_vrsubhn, 0>; 2658 2659// pattern for acle intrinsic with 3 operands 2660class NeonI_3VDN_3Op<bit q, bit u, bits<2> size, bits<4> opcode, 2661 string asmop, string ResS, string OpS> 2662 : NeonI_3VDiff<q, u, size, opcode, 2663 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn, VPR128:$Rm), 2664 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS, 2665 [], NoItinerary> { 2666 let Constraints = "$src = $Rd"; 2667 let neverHasSideEffects = 1; 2668} 2669 2670multiclass NeonI_3VDN_3Op_v1<bit u, bits<4> opcode, string asmop> { 2671 def _16b8h : NeonI_3VDN_3Op<0b1, u, 0b00, opcode, asmop, "16b", "8h">; 2672 def _8h4s : NeonI_3VDN_3Op<0b1, u, 0b01, opcode, asmop, "8h", "4s">; 2673 def _4s2d : NeonI_3VDN_3Op<0b1, u, 0b10, opcode, asmop, "4s", "2d">; 2674} 2675 2676defm ADDHN2vvv : NeonI_3VDN_3Op_v1<0b0, 0b0100, "addhn2">; 2677defm SUBHN2vvv : NeonI_3VDN_3Op_v1<0b0, 0b0110, "subhn2">; 2678 2679defm RADDHN2vvv : NeonI_3VDN_3Op_v1<0b1, 0b0100, "raddhn2">; 2680defm RSUBHN2vvv : NeonI_3VDN_3Op_v1<0b1, 0b0110, "rsubhn2">; 2681 2682// Patterns have to be separate because there's a SUBREG_TO_REG in the output 2683// part. 2684class NarrowHighHalfPat<Instruction INST, ValueType DstTy, ValueType SrcTy, 2685 SDPatternOperator coreop> 2686 : Pat<(Neon_combine_2D (v1i64 VPR64:$src), 2687 (v1i64 (bitconvert (DstTy (coreop (SrcTy VPR128:$Rn), 2688 (SrcTy VPR128:$Rm)))))), 2689 (INST (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64), 2690 VPR128:$Rn, VPR128:$Rm)>; 2691 2692// addhn2 patterns 2693def : NarrowHighHalfPat<ADDHN2vvv_16b8h, v8i8, v8i16, 2694 BinOpFrag<(NI_get_hi_8h (add node:$LHS, node:$RHS))>>; 2695def : NarrowHighHalfPat<ADDHN2vvv_8h4s, v4i16, v4i32, 2696 BinOpFrag<(NI_get_hi_4s (add node:$LHS, node:$RHS))>>; 2697def : NarrowHighHalfPat<ADDHN2vvv_4s2d, v2i32, v2i64, 2698 BinOpFrag<(NI_get_hi_2d (add node:$LHS, node:$RHS))>>; 2699 2700// subhn2 patterns 2701def : NarrowHighHalfPat<SUBHN2vvv_16b8h, v8i8, v8i16, 2702 BinOpFrag<(NI_get_hi_8h (sub node:$LHS, node:$RHS))>>; 2703def : NarrowHighHalfPat<SUBHN2vvv_8h4s, v4i16, v4i32, 2704 BinOpFrag<(NI_get_hi_4s (sub node:$LHS, node:$RHS))>>; 2705def : NarrowHighHalfPat<SUBHN2vvv_4s2d, v2i32, v2i64, 2706 BinOpFrag<(NI_get_hi_2d (sub node:$LHS, node:$RHS))>>; 2707 2708// raddhn2 patterns 2709def : NarrowHighHalfPat<RADDHN2vvv_16b8h, v8i8, v8i16, int_arm_neon_vraddhn>; 2710def : NarrowHighHalfPat<RADDHN2vvv_8h4s, v4i16, v4i32, int_arm_neon_vraddhn>; 2711def : NarrowHighHalfPat<RADDHN2vvv_4s2d, v2i32, v2i64, int_arm_neon_vraddhn>; 2712 2713// rsubhn2 patterns 2714def : NarrowHighHalfPat<RSUBHN2vvv_16b8h, v8i8, v8i16, int_arm_neon_vrsubhn>; 2715def : NarrowHighHalfPat<RSUBHN2vvv_8h4s, v4i16, v4i32, int_arm_neon_vrsubhn>; 2716def : NarrowHighHalfPat<RSUBHN2vvv_4s2d, v2i32, v2i64, int_arm_neon_vrsubhn>; 2717 2718// pattern that need to extend result 2719class NeonI_3VDL_Ext<bit q, bit u, bits<2> size, bits<4> opcode, 2720 string asmop, string ResS, string OpS, 2721 SDPatternOperator opnode, 2722 RegisterOperand OpVPR, 2723 ValueType ResTy, ValueType OpTy, ValueType OpSTy> 2724 : NeonI_3VDiff<q, u, size, opcode, 2725 (outs VPR128:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm), 2726 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS, 2727 [(set (ResTy VPR128:$Rd), 2728 (ResTy (zext (OpSTy (opnode (OpTy OpVPR:$Rn), 2729 (OpTy OpVPR:$Rm))))))], 2730 NoItinerary>; 2731 2732multiclass NeonI_3VDL_zext<bit u, bits<4> opcode, string asmop, 2733 SDPatternOperator opnode, bit Commutable = 0> { 2734 let isCommutable = Commutable in { 2735 def _8h8b : NeonI_3VDL_Ext<0b0, u, 0b00, opcode, asmop, "8h", "8b", 2736 opnode, VPR64, v8i16, v8i8, v8i8>; 2737 def _4s4h : NeonI_3VDL_Ext<0b0, u, 0b01, opcode, asmop, "4s", "4h", 2738 opnode, VPR64, v4i32, v4i16, v4i16>; 2739 def _2d2s : NeonI_3VDL_Ext<0b0, u, 0b10, opcode, asmop, "2d", "2s", 2740 opnode, VPR64, v2i64, v2i32, v2i32>; 2741 } 2742} 2743 2744defm SABDLvvv : NeonI_3VDL_zext<0b0, 0b0111, "sabdl", int_arm_neon_vabds, 1>; 2745defm UABDLvvv : NeonI_3VDL_zext<0b1, 0b0111, "uabdl", int_arm_neon_vabdu, 1>; 2746 2747multiclass NeonI_Op_High<SDPatternOperator op> { 2748 def _16B : PatFrag<(ops node:$Rn, node:$Rm), 2749 (op (v8i8 (Neon_High16B node:$Rn)), 2750 (v8i8 (Neon_High16B node:$Rm)))>; 2751 def _8H : PatFrag<(ops node:$Rn, node:$Rm), 2752 (op (v4i16 (Neon_High8H node:$Rn)), 2753 (v4i16 (Neon_High8H node:$Rm)))>; 2754 def _4S : PatFrag<(ops node:$Rn, node:$Rm), 2755 (op (v2i32 (Neon_High4S node:$Rn)), 2756 (v2i32 (Neon_High4S node:$Rm)))>; 2757} 2758 2759defm NI_sabdl_hi : NeonI_Op_High<int_arm_neon_vabds>; 2760defm NI_uabdl_hi : NeonI_Op_High<int_arm_neon_vabdu>; 2761defm NI_smull_hi : NeonI_Op_High<int_arm_neon_vmulls>; 2762defm NI_umull_hi : NeonI_Op_High<int_arm_neon_vmullu>; 2763defm NI_qdmull_hi : NeonI_Op_High<int_arm_neon_vqdmull>; 2764defm NI_pmull_hi : NeonI_Op_High<int_arm_neon_vmullp>; 2765 2766multiclass NeonI_3VDL_Abd_u<bit u, bits<4> opcode, string asmop, string opnode, 2767 bit Commutable = 0> { 2768 let isCommutable = Commutable in { 2769 def _8h8b : NeonI_3VDL_Ext<0b1, u, 0b00, opcode, asmop, "8h", "16b", 2770 !cast<PatFrag>(opnode # "_16B"), 2771 VPR128, v8i16, v16i8, v8i8>; 2772 def _4s4h : NeonI_3VDL_Ext<0b1, u, 0b01, opcode, asmop, "4s", "8h", 2773 !cast<PatFrag>(opnode # "_8H"), 2774 VPR128, v4i32, v8i16, v4i16>; 2775 def _2d2s : NeonI_3VDL_Ext<0b1, u, 0b10, opcode, asmop, "2d", "4s", 2776 !cast<PatFrag>(opnode # "_4S"), 2777 VPR128, v2i64, v4i32, v2i32>; 2778 } 2779} 2780 2781defm SABDL2vvv : NeonI_3VDL_Abd_u<0b0, 0b0111, "sabdl2", "NI_sabdl_hi", 1>; 2782defm UABDL2vvv : NeonI_3VDL_Abd_u<0b1, 0b0111, "uabdl2", "NI_uabdl_hi", 1>; 2783 2784// For pattern that need two operators being chained. 2785class NeonI_3VDL_Aba<bit q, bit u, bits<2> size, bits<4> opcode, 2786 string asmop, string ResS, string OpS, 2787 SDPatternOperator opnode, SDPatternOperator subop, 2788 RegisterOperand OpVPR, 2789 ValueType ResTy, ValueType OpTy, ValueType OpSTy> 2790 : NeonI_3VDiff<q, u, size, opcode, 2791 (outs VPR128:$Rd), (ins VPR128:$src, OpVPR:$Rn, OpVPR:$Rm), 2792 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS, 2793 [(set (ResTy VPR128:$Rd), 2794 (ResTy (opnode 2795 (ResTy VPR128:$src), 2796 (ResTy (zext (OpSTy (subop (OpTy OpVPR:$Rn), 2797 (OpTy OpVPR:$Rm))))))))], 2798 NoItinerary> { 2799 let Constraints = "$src = $Rd"; 2800} 2801 2802multiclass NeonI_3VDL_Aba_v1<bit u, bits<4> opcode, string asmop, 2803 SDPatternOperator opnode, SDPatternOperator subop>{ 2804 def _8h8b : NeonI_3VDL_Aba<0b0, u, 0b00, opcode, asmop, "8h", "8b", 2805 opnode, subop, VPR64, v8i16, v8i8, v8i8>; 2806 def _4s4h : NeonI_3VDL_Aba<0b0, u, 0b01, opcode, asmop, "4s", "4h", 2807 opnode, subop, VPR64, v4i32, v4i16, v4i16>; 2808 def _2d2s : NeonI_3VDL_Aba<0b0, u, 0b10, opcode, asmop, "2d", "2s", 2809 opnode, subop, VPR64, v2i64, v2i32, v2i32>; 2810} 2811 2812defm SABALvvv : NeonI_3VDL_Aba_v1<0b0, 0b0101, "sabal", 2813 add, int_arm_neon_vabds>; 2814defm UABALvvv : NeonI_3VDL_Aba_v1<0b1, 0b0101, "uabal", 2815 add, int_arm_neon_vabdu>; 2816 2817multiclass NeonI_3VDL2_Aba_v1<bit u, bits<4> opcode, string asmop, 2818 SDPatternOperator opnode, string subop> { 2819 def _8h8b : NeonI_3VDL_Aba<0b1, u, 0b00, opcode, asmop, "8h", "16b", 2820 opnode, !cast<PatFrag>(subop # "_16B"), 2821 VPR128, v8i16, v16i8, v8i8>; 2822 def _4s4h : NeonI_3VDL_Aba<0b1, u, 0b01, opcode, asmop, "4s", "8h", 2823 opnode, !cast<PatFrag>(subop # "_8H"), 2824 VPR128, v4i32, v8i16, v4i16>; 2825 def _2d2s : NeonI_3VDL_Aba<0b1, u, 0b10, opcode, asmop, "2d", "4s", 2826 opnode, !cast<PatFrag>(subop # "_4S"), 2827 VPR128, v2i64, v4i32, v2i32>; 2828} 2829 2830defm SABAL2vvv : NeonI_3VDL2_Aba_v1<0b0, 0b0101, "sabal2", add, 2831 "NI_sabdl_hi">; 2832defm UABAL2vvv : NeonI_3VDL2_Aba_v1<0b1, 0b0101, "uabal2", add, 2833 "NI_uabdl_hi">; 2834 2835// Long pattern with 2 operands 2836multiclass NeonI_3VDL_2Op<bit u, bits<4> opcode, string asmop, 2837 SDPatternOperator opnode, bit Commutable = 0> { 2838 let isCommutable = Commutable in { 2839 def _8h8b : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8h", "8b", 2840 opnode, VPR128, VPR64, v8i16, v8i8>; 2841 def _4s4h : NeonI_3VD_2Op<0b0, u, 0b01, opcode, asmop, "4s", "4h", 2842 opnode, VPR128, VPR64, v4i32, v4i16>; 2843 def _2d2s : NeonI_3VD_2Op<0b0, u, 0b10, opcode, asmop, "2d", "2s", 2844 opnode, VPR128, VPR64, v2i64, v2i32>; 2845 } 2846} 2847 2848defm SMULLvvv : NeonI_3VDL_2Op<0b0, 0b1100, "smull", int_arm_neon_vmulls, 1>; 2849defm UMULLvvv : NeonI_3VDL_2Op<0b1, 0b1100, "umull", int_arm_neon_vmullu, 1>; 2850 2851class NeonI_3VDL2_2Op_mull<bit q, bit u, bits<2> size, bits<4> opcode, 2852 string asmop, string ResS, string OpS, 2853 SDPatternOperator opnode, 2854 ValueType ResTy, ValueType OpTy> 2855 : NeonI_3VDiff<q, u, size, opcode, 2856 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm), 2857 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS, 2858 [(set (ResTy VPR128:$Rd), 2859 (ResTy (opnode (OpTy VPR128:$Rn), (OpTy VPR128:$Rm))))], 2860 NoItinerary>; 2861 2862multiclass NeonI_3VDL2_2Op_mull_v1<bit u, bits<4> opcode, string asmop, 2863 string opnode, bit Commutable = 0> { 2864 let isCommutable = Commutable in { 2865 def _8h16b : NeonI_3VDL2_2Op_mull<0b1, u, 0b00, opcode, asmop, "8h", "16b", 2866 !cast<PatFrag>(opnode # "_16B"), 2867 v8i16, v16i8>; 2868 def _4s8h : NeonI_3VDL2_2Op_mull<0b1, u, 0b01, opcode, asmop, "4s", "8h", 2869 !cast<PatFrag>(opnode # "_8H"), 2870 v4i32, v8i16>; 2871 def _2d4s : NeonI_3VDL2_2Op_mull<0b1, u, 0b10, opcode, asmop, "2d", "4s", 2872 !cast<PatFrag>(opnode # "_4S"), 2873 v2i64, v4i32>; 2874 } 2875} 2876 2877defm SMULL2vvv : NeonI_3VDL2_2Op_mull_v1<0b0, 0b1100, "smull2", 2878 "NI_smull_hi", 1>; 2879defm UMULL2vvv : NeonI_3VDL2_2Op_mull_v1<0b1, 0b1100, "umull2", 2880 "NI_umull_hi", 1>; 2881 2882// Long pattern with 3 operands 2883class NeonI_3VDL_3Op<bit q, bit u, bits<2> size, bits<4> opcode, 2884 string asmop, string ResS, string OpS, 2885 SDPatternOperator opnode, 2886 ValueType ResTy, ValueType OpTy> 2887 : NeonI_3VDiff<q, u, size, opcode, 2888 (outs VPR128:$Rd), (ins VPR128:$src, VPR64:$Rn, VPR64:$Rm), 2889 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS, 2890 [(set (ResTy VPR128:$Rd), 2891 (ResTy (opnode 2892 (ResTy VPR128:$src), 2893 (OpTy VPR64:$Rn), (OpTy VPR64:$Rm))))], 2894 NoItinerary> { 2895 let Constraints = "$src = $Rd"; 2896} 2897 2898multiclass NeonI_3VDL_3Op_v1<bit u, bits<4> opcode, string asmop, 2899 SDPatternOperator opnode> { 2900 def _8h8b : NeonI_3VDL_3Op<0b0, u, 0b00, opcode, asmop, "8h", "8b", 2901 opnode, v8i16, v8i8>; 2902 def _4s4h : NeonI_3VDL_3Op<0b0, u, 0b01, opcode, asmop, "4s", "4h", 2903 opnode, v4i32, v4i16>; 2904 def _2d2s : NeonI_3VDL_3Op<0b0, u, 0b10, opcode, asmop, "2d", "2s", 2905 opnode, v2i64, v2i32>; 2906} 2907 2908def Neon_smlal : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm), 2909 (add node:$Rd, 2910 (int_arm_neon_vmulls node:$Rn, node:$Rm))>; 2911 2912def Neon_umlal : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm), 2913 (add node:$Rd, 2914 (int_arm_neon_vmullu node:$Rn, node:$Rm))>; 2915 2916def Neon_smlsl : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm), 2917 (sub node:$Rd, 2918 (int_arm_neon_vmulls node:$Rn, node:$Rm))>; 2919 2920def Neon_umlsl : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm), 2921 (sub node:$Rd, 2922 (int_arm_neon_vmullu node:$Rn, node:$Rm))>; 2923 2924defm SMLALvvv : NeonI_3VDL_3Op_v1<0b0, 0b1000, "smlal", Neon_smlal>; 2925defm UMLALvvv : NeonI_3VDL_3Op_v1<0b1, 0b1000, "umlal", Neon_umlal>; 2926 2927defm SMLSLvvv : NeonI_3VDL_3Op_v1<0b0, 0b1010, "smlsl", Neon_smlsl>; 2928defm UMLSLvvv : NeonI_3VDL_3Op_v1<0b1, 0b1010, "umlsl", Neon_umlsl>; 2929 2930class NeonI_3VDL2_3Op_mlas<bit q, bit u, bits<2> size, bits<4> opcode, 2931 string asmop, string ResS, string OpS, 2932 SDPatternOperator subop, SDPatternOperator opnode, 2933 RegisterOperand OpVPR, 2934 ValueType ResTy, ValueType OpTy> 2935 : NeonI_3VDiff<q, u, size, opcode, 2936 (outs VPR128:$Rd), (ins VPR128:$src, OpVPR:$Rn, OpVPR:$Rm), 2937 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS, 2938 [(set (ResTy VPR128:$Rd), 2939 (ResTy (subop 2940 (ResTy VPR128:$src), 2941 (ResTy (opnode (OpTy OpVPR:$Rn), (OpTy OpVPR:$Rm))))))], 2942 NoItinerary> { 2943 let Constraints = "$src = $Rd"; 2944} 2945 2946multiclass NeonI_3VDL2_3Op_mlas_v1<bit u, bits<4> opcode, string asmop, 2947 SDPatternOperator subop, string opnode> { 2948 def _8h16b : NeonI_3VDL2_3Op_mlas<0b1, u, 0b00, opcode, asmop, "8h", "16b", 2949 subop, !cast<PatFrag>(opnode # "_16B"), 2950 VPR128, v8i16, v16i8>; 2951 def _4s8h : NeonI_3VDL2_3Op_mlas<0b1, u, 0b01, opcode, asmop, "4s", "8h", 2952 subop, !cast<PatFrag>(opnode # "_8H"), 2953 VPR128, v4i32, v8i16>; 2954 def _2d4s : NeonI_3VDL2_3Op_mlas<0b1, u, 0b10, opcode, asmop, "2d", "4s", 2955 subop, !cast<PatFrag>(opnode # "_4S"), 2956 VPR128, v2i64, v4i32>; 2957} 2958 2959defm SMLAL2vvv : NeonI_3VDL2_3Op_mlas_v1<0b0, 0b1000, "smlal2", 2960 add, "NI_smull_hi">; 2961defm UMLAL2vvv : NeonI_3VDL2_3Op_mlas_v1<0b1, 0b1000, "umlal2", 2962 add, "NI_umull_hi">; 2963 2964defm SMLSL2vvv : NeonI_3VDL2_3Op_mlas_v1<0b0, 0b1010, "smlsl2", 2965 sub, "NI_smull_hi">; 2966defm UMLSL2vvv : NeonI_3VDL2_3Op_mlas_v1<0b1, 0b1010, "umlsl2", 2967 sub, "NI_umull_hi">; 2968 2969multiclass NeonI_3VDL_qdmlal_3Op_v2<bit u, bits<4> opcode, string asmop, 2970 SDPatternOperator opnode> { 2971 def _4s4h : NeonI_3VDL2_3Op_mlas<0b0, u, 0b01, opcode, asmop, "4s", "4h", 2972 opnode, int_arm_neon_vqdmull, 2973 VPR64, v4i32, v4i16>; 2974 def _2d2s : NeonI_3VDL2_3Op_mlas<0b0, u, 0b10, opcode, asmop, "2d", "2s", 2975 opnode, int_arm_neon_vqdmull, 2976 VPR64, v2i64, v2i32>; 2977} 2978 2979defm SQDMLALvvv : NeonI_3VDL_qdmlal_3Op_v2<0b0, 0b1001, "sqdmlal", 2980 int_arm_neon_vqadds>; 2981defm SQDMLSLvvv : NeonI_3VDL_qdmlal_3Op_v2<0b0, 0b1011, "sqdmlsl", 2982 int_arm_neon_vqsubs>; 2983 2984multiclass NeonI_3VDL_v2<bit u, bits<4> opcode, string asmop, 2985 SDPatternOperator opnode, bit Commutable = 0> { 2986 let isCommutable = Commutable in { 2987 def _4s4h : NeonI_3VD_2Op<0b0, u, 0b01, opcode, asmop, "4s", "4h", 2988 opnode, VPR128, VPR64, v4i32, v4i16>; 2989 def _2d2s : NeonI_3VD_2Op<0b0, u, 0b10, opcode, asmop, "2d", "2s", 2990 opnode, VPR128, VPR64, v2i64, v2i32>; 2991 } 2992} 2993 2994defm SQDMULLvvv : NeonI_3VDL_v2<0b0, 0b1101, "sqdmull", 2995 int_arm_neon_vqdmull, 1>; 2996 2997multiclass NeonI_3VDL2_2Op_mull_v2<bit u, bits<4> opcode, string asmop, 2998 string opnode, bit Commutable = 0> { 2999 let isCommutable = Commutable in { 3000 def _4s8h : NeonI_3VDL2_2Op_mull<0b1, u, 0b01, opcode, asmop, "4s", "8h", 3001 !cast<PatFrag>(opnode # "_8H"), 3002 v4i32, v8i16>; 3003 def _2d4s : NeonI_3VDL2_2Op_mull<0b1, u, 0b10, opcode, asmop, "2d", "4s", 3004 !cast<PatFrag>(opnode # "_4S"), 3005 v2i64, v4i32>; 3006 } 3007} 3008 3009defm SQDMULL2vvv : NeonI_3VDL2_2Op_mull_v2<0b0, 0b1101, "sqdmull2", 3010 "NI_qdmull_hi", 1>; 3011 3012multiclass NeonI_3VDL2_3Op_qdmlal_v2<bit u, bits<4> opcode, string asmop, 3013 SDPatternOperator opnode> { 3014 def _4s8h : NeonI_3VDL2_3Op_mlas<0b1, u, 0b01, opcode, asmop, "4s", "8h", 3015 opnode, NI_qdmull_hi_8H, 3016 VPR128, v4i32, v8i16>; 3017 def _2d4s : NeonI_3VDL2_3Op_mlas<0b1, u, 0b10, opcode, asmop, "2d", "4s", 3018 opnode, NI_qdmull_hi_4S, 3019 VPR128, v2i64, v4i32>; 3020} 3021 3022defm SQDMLAL2vvv : NeonI_3VDL2_3Op_qdmlal_v2<0b0, 0b1001, "sqdmlal2", 3023 int_arm_neon_vqadds>; 3024defm SQDMLSL2vvv : NeonI_3VDL2_3Op_qdmlal_v2<0b0, 0b1011, "sqdmlsl2", 3025 int_arm_neon_vqsubs>; 3026 3027multiclass NeonI_3VDL_v3<bit u, bits<4> opcode, string asmop, 3028 SDPatternOperator opnode, bit Commutable = 0> { 3029 let isCommutable = Commutable in { 3030 def _8h8b : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8h", "8b", 3031 opnode, VPR128, VPR64, v8i16, v8i8>; 3032 3033 def _1q1d : NeonI_3VDiff<0b0, u, 0b11, opcode, 3034 (outs VPR128:$Rd), (ins VPR64:$Rn, VPR64:$Rm), 3035 asmop # "\t$Rd.1q, $Rn.1d, $Rm.1d", 3036 [], NoItinerary>; 3037 } 3038} 3039 3040defm PMULLvvv : NeonI_3VDL_v3<0b0, 0b1110, "pmull", int_arm_neon_vmullp, 1>; 3041 3042multiclass NeonI_3VDL2_2Op_mull_v3<bit u, bits<4> opcode, string asmop, 3043 string opnode, bit Commutable = 0> { 3044 let isCommutable = Commutable in { 3045 def _8h16b : NeonI_3VDL2_2Op_mull<0b1, u, 0b00, opcode, asmop, "8h", "16b", 3046 !cast<PatFrag>(opnode # "_16B"), 3047 v8i16, v16i8>; 3048 3049 def _1q2d : NeonI_3VDiff<0b1, u, 0b11, opcode, 3050 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm), 3051 asmop # "\t$Rd.1q, $Rn.2d, $Rm.2d", 3052 [], NoItinerary>; 3053 } 3054} 3055 3056defm PMULL2vvv : NeonI_3VDL2_2Op_mull_v3<0b0, 0b1110, "pmull2", "NI_pmull_hi", 3057 1>; 3058 3059// End of implementation for instruction class (3V Diff) 3060 3061// The followings are vector load/store multiple N-element structure 3062// (class SIMD lselem). 3063 3064// ld1: load multiple 1-element structure to 1/2/3/4 registers. 3065// ld2/ld3/ld4: load multiple N-element structure to N registers (N = 2, 3, 4). 3066// The structure consists of a sequence of sets of N values. 3067// The first element of the structure is placed in the first lane 3068// of the first first vector, the second element in the first lane 3069// of the second vector, and so on. 3070// E.g. LD1_3V_2S will load 32-bit elements {A, B, C, D, E, F} sequentially into 3071// the three 64-bit vectors list {BA, DC, FE}. 3072// E.g. LD3_2S will load 32-bit elements {A, B, C, D, E, F} into the three 3073// 64-bit vectors list {DA, EB, FC}. 3074// Store instructions store multiple structure to N registers like load. 3075 3076 3077class NeonI_LDVList<bit q, bits<4> opcode, bits<2> size, 3078 RegisterOperand VecList, string asmop> 3079 : NeonI_LdStMult<q, 1, opcode, size, 3080 (outs VecList:$Rt), (ins GPR64xsp:$Rn), 3081 asmop # "\t$Rt, [$Rn]", 3082 [], 3083 NoItinerary> { 3084 let mayLoad = 1; 3085 let neverHasSideEffects = 1; 3086} 3087 3088multiclass LDVList_BHSD<bits<4> opcode, string List, string asmop> { 3089 def _8B : NeonI_LDVList<0, opcode, 0b00, 3090 !cast<RegisterOperand>(List # "8B_operand"), asmop>; 3091 3092 def _4H : NeonI_LDVList<0, opcode, 0b01, 3093 !cast<RegisterOperand>(List # "4H_operand"), asmop>; 3094 3095 def _2S : NeonI_LDVList<0, opcode, 0b10, 3096 !cast<RegisterOperand>(List # "2S_operand"), asmop>; 3097 3098 def _16B : NeonI_LDVList<1, opcode, 0b00, 3099 !cast<RegisterOperand>(List # "16B_operand"), asmop>; 3100 3101 def _8H : NeonI_LDVList<1, opcode, 0b01, 3102 !cast<RegisterOperand>(List # "8H_operand"), asmop>; 3103 3104 def _4S : NeonI_LDVList<1, opcode, 0b10, 3105 !cast<RegisterOperand>(List # "4S_operand"), asmop>; 3106 3107 def _2D : NeonI_LDVList<1, opcode, 0b11, 3108 !cast<RegisterOperand>(List # "2D_operand"), asmop>; 3109} 3110 3111// Load multiple N-element structure to N consecutive registers (N = 1,2,3,4) 3112defm LD1 : LDVList_BHSD<0b0111, "VOne", "ld1">; 3113def LD1_1D : NeonI_LDVList<0, 0b0111, 0b11, VOne1D_operand, "ld1">; 3114 3115defm LD2 : LDVList_BHSD<0b1000, "VPair", "ld2">; 3116 3117defm LD3 : LDVList_BHSD<0b0100, "VTriple", "ld3">; 3118 3119defm LD4 : LDVList_BHSD<0b0000, "VQuad", "ld4">; 3120 3121// Load multiple 1-element structure to N consecutive registers (N = 2,3,4) 3122defm LD1x2 : LDVList_BHSD<0b1010, "VPair", "ld1">; 3123def LD1x2_1D : NeonI_LDVList<0, 0b1010, 0b11, VPair1D_operand, "ld1">; 3124 3125defm LD1x3 : LDVList_BHSD<0b0110, "VTriple", "ld1">; 3126def LD1x3_1D : NeonI_LDVList<0, 0b0110, 0b11, VTriple1D_operand, "ld1">; 3127 3128defm LD1x4 : LDVList_BHSD<0b0010, "VQuad", "ld1">; 3129def LD1x4_1D : NeonI_LDVList<0, 0b0010, 0b11, VQuad1D_operand, "ld1">; 3130 3131class NeonI_STVList<bit q, bits<4> opcode, bits<2> size, 3132 RegisterOperand VecList, string asmop> 3133 : NeonI_LdStMult<q, 0, opcode, size, 3134 (outs), (ins GPR64xsp:$Rn, VecList:$Rt), 3135 asmop # "\t$Rt, [$Rn]", 3136 [], 3137 NoItinerary> { 3138 let mayStore = 1; 3139 let neverHasSideEffects = 1; 3140} 3141 3142multiclass STVList_BHSD<bits<4> opcode, string List, string asmop> { 3143 def _8B : NeonI_STVList<0, opcode, 0b00, 3144 !cast<RegisterOperand>(List # "8B_operand"), asmop>; 3145 3146 def _4H : NeonI_STVList<0, opcode, 0b01, 3147 !cast<RegisterOperand>(List # "4H_operand"), asmop>; 3148 3149 def _2S : NeonI_STVList<0, opcode, 0b10, 3150 !cast<RegisterOperand>(List # "2S_operand"), asmop>; 3151 3152 def _16B : NeonI_STVList<1, opcode, 0b00, 3153 !cast<RegisterOperand>(List # "16B_operand"), asmop>; 3154 3155 def _8H : NeonI_STVList<1, opcode, 0b01, 3156 !cast<RegisterOperand>(List # "8H_operand"), asmop>; 3157 3158 def _4S : NeonI_STVList<1, opcode, 0b10, 3159 !cast<RegisterOperand>(List # "4S_operand"), asmop>; 3160 3161 def _2D : NeonI_STVList<1, opcode, 0b11, 3162 !cast<RegisterOperand>(List # "2D_operand"), asmop>; 3163} 3164 3165// Store multiple N-element structures from N registers (N = 1,2,3,4) 3166defm ST1 : STVList_BHSD<0b0111, "VOne", "st1">; 3167def ST1_1D : NeonI_STVList<0, 0b0111, 0b11, VOne1D_operand, "st1">; 3168 3169defm ST2 : STVList_BHSD<0b1000, "VPair", "st2">; 3170 3171defm ST3 : STVList_BHSD<0b0100, "VTriple", "st3">; 3172 3173defm ST4 : STVList_BHSD<0b0000, "VQuad", "st4">; 3174 3175// Store multiple 1-element structures from N consecutive registers (N = 2,3,4) 3176defm ST1x2 : STVList_BHSD<0b1010, "VPair", "st1">; 3177def ST1x2_1D : NeonI_STVList<0, 0b1010, 0b11, VPair1D_operand, "st1">; 3178 3179defm ST1x3 : STVList_BHSD<0b0110, "VTriple", "st1">; 3180def ST1x3_1D : NeonI_STVList<0, 0b0110, 0b11, VTriple1D_operand, "st1">; 3181 3182defm ST1x4 : STVList_BHSD<0b0010, "VQuad", "st1">; 3183def ST1x4_1D : NeonI_STVList<0, 0b0010, 0b11, VQuad1D_operand, "st1">; 3184 3185def : Pat<(v2f64 (load GPR64xsp:$addr)), (LD1_2D GPR64xsp:$addr)>; 3186def : Pat<(v2i64 (load GPR64xsp:$addr)), (LD1_2D GPR64xsp:$addr)>; 3187 3188def : Pat<(v4f32 (load GPR64xsp:$addr)), (LD1_4S GPR64xsp:$addr)>; 3189def : Pat<(v4i32 (load GPR64xsp:$addr)), (LD1_4S GPR64xsp:$addr)>; 3190 3191def : Pat<(v8i16 (load GPR64xsp:$addr)), (LD1_8H GPR64xsp:$addr)>; 3192def : Pat<(v16i8 (load GPR64xsp:$addr)), (LD1_16B GPR64xsp:$addr)>; 3193 3194def : Pat<(v1f64 (load GPR64xsp:$addr)), (LD1_1D GPR64xsp:$addr)>; 3195def : Pat<(v1i64 (load GPR64xsp:$addr)), (LD1_1D GPR64xsp:$addr)>; 3196 3197def : Pat<(v2f32 (load GPR64xsp:$addr)), (LD1_2S GPR64xsp:$addr)>; 3198def : Pat<(v2i32 (load GPR64xsp:$addr)), (LD1_2S GPR64xsp:$addr)>; 3199 3200def : Pat<(v4i16 (load GPR64xsp:$addr)), (LD1_4H GPR64xsp:$addr)>; 3201def : Pat<(v8i8 (load GPR64xsp:$addr)), (LD1_8B GPR64xsp:$addr)>; 3202 3203def : Pat<(store (v2i64 VPR128:$value), GPR64xsp:$addr), 3204 (ST1_2D GPR64xsp:$addr, VPR128:$value)>; 3205def : Pat<(store (v2f64 VPR128:$value), GPR64xsp:$addr), 3206 (ST1_2D GPR64xsp:$addr, VPR128:$value)>; 3207 3208def : Pat<(store (v4i32 VPR128:$value), GPR64xsp:$addr), 3209 (ST1_4S GPR64xsp:$addr, VPR128:$value)>; 3210def : Pat<(store (v4f32 VPR128:$value), GPR64xsp:$addr), 3211 (ST1_4S GPR64xsp:$addr, VPR128:$value)>; 3212 3213def : Pat<(store (v8i16 VPR128:$value), GPR64xsp:$addr), 3214 (ST1_8H GPR64xsp:$addr, VPR128:$value)>; 3215def : Pat<(store (v16i8 VPR128:$value), GPR64xsp:$addr), 3216 (ST1_16B GPR64xsp:$addr, VPR128:$value)>; 3217 3218def : Pat<(store (v1i64 VPR64:$value), GPR64xsp:$addr), 3219 (ST1_1D GPR64xsp:$addr, VPR64:$value)>; 3220def : Pat<(store (v1f64 VPR64:$value), GPR64xsp:$addr), 3221 (ST1_1D GPR64xsp:$addr, VPR64:$value)>; 3222 3223def : Pat<(store (v2i32 VPR64:$value), GPR64xsp:$addr), 3224 (ST1_2S GPR64xsp:$addr, VPR64:$value)>; 3225def : Pat<(store (v2f32 VPR64:$value), GPR64xsp:$addr), 3226 (ST1_2S GPR64xsp:$addr, VPR64:$value)>; 3227 3228def : Pat<(store (v4i16 VPR64:$value), GPR64xsp:$addr), 3229 (ST1_4H GPR64xsp:$addr, VPR64:$value)>; 3230def : Pat<(store (v8i8 VPR64:$value), GPR64xsp:$addr), 3231 (ST1_8B GPR64xsp:$addr, VPR64:$value)>; 3232 3233// End of vector load/store multiple N-element structure(class SIMD lselem) 3234 3235// The followings are post-index vector load/store multiple N-element 3236// structure(class SIMD lselem-post) 3237def exact1_asmoperand : AsmOperandClass { 3238 let Name = "Exact1"; 3239 let PredicateMethod = "isExactImm<1>"; 3240 let RenderMethod = "addImmOperands"; 3241} 3242def uimm_exact1 : Operand<i32>, ImmLeaf<i32, [{return Imm == 1;}]> { 3243 let ParserMatchClass = exact1_asmoperand; 3244} 3245 3246def exact2_asmoperand : AsmOperandClass { 3247 let Name = "Exact2"; 3248 let PredicateMethod = "isExactImm<2>"; 3249 let RenderMethod = "addImmOperands"; 3250} 3251def uimm_exact2 : Operand<i32>, ImmLeaf<i32, [{return Imm == 2;}]> { 3252 let ParserMatchClass = exact2_asmoperand; 3253} 3254 3255def exact3_asmoperand : AsmOperandClass { 3256 let Name = "Exact3"; 3257 let PredicateMethod = "isExactImm<3>"; 3258 let RenderMethod = "addImmOperands"; 3259} 3260def uimm_exact3 : Operand<i32>, ImmLeaf<i32, [{return Imm == 3;}]> { 3261 let ParserMatchClass = exact3_asmoperand; 3262} 3263 3264def exact4_asmoperand : AsmOperandClass { 3265 let Name = "Exact4"; 3266 let PredicateMethod = "isExactImm<4>"; 3267 let RenderMethod = "addImmOperands"; 3268} 3269def uimm_exact4 : Operand<i32>, ImmLeaf<i32, [{return Imm == 4;}]> { 3270 let ParserMatchClass = exact4_asmoperand; 3271} 3272 3273def exact6_asmoperand : AsmOperandClass { 3274 let Name = "Exact6"; 3275 let PredicateMethod = "isExactImm<6>"; 3276 let RenderMethod = "addImmOperands"; 3277} 3278def uimm_exact6 : Operand<i32>, ImmLeaf<i32, [{return Imm == 6;}]> { 3279 let ParserMatchClass = exact6_asmoperand; 3280} 3281 3282def exact8_asmoperand : AsmOperandClass { 3283 let Name = "Exact8"; 3284 let PredicateMethod = "isExactImm<8>"; 3285 let RenderMethod = "addImmOperands"; 3286} 3287def uimm_exact8 : Operand<i32>, ImmLeaf<i32, [{return Imm == 8;}]> { 3288 let ParserMatchClass = exact8_asmoperand; 3289} 3290 3291def exact12_asmoperand : AsmOperandClass { 3292 let Name = "Exact12"; 3293 let PredicateMethod = "isExactImm<12>"; 3294 let RenderMethod = "addImmOperands"; 3295} 3296def uimm_exact12 : Operand<i32>, ImmLeaf<i32, [{return Imm == 12;}]> { 3297 let ParserMatchClass = exact12_asmoperand; 3298} 3299 3300def exact16_asmoperand : AsmOperandClass { 3301 let Name = "Exact16"; 3302 let PredicateMethod = "isExactImm<16>"; 3303 let RenderMethod = "addImmOperands"; 3304} 3305def uimm_exact16 : Operand<i32>, ImmLeaf<i32, [{return Imm == 16;}]> { 3306 let ParserMatchClass = exact16_asmoperand; 3307} 3308 3309def exact24_asmoperand : AsmOperandClass { 3310 let Name = "Exact24"; 3311 let PredicateMethod = "isExactImm<24>"; 3312 let RenderMethod = "addImmOperands"; 3313} 3314def uimm_exact24 : Operand<i32>, ImmLeaf<i32, [{return Imm == 24;}]> { 3315 let ParserMatchClass = exact24_asmoperand; 3316} 3317 3318def exact32_asmoperand : AsmOperandClass { 3319 let Name = "Exact32"; 3320 let PredicateMethod = "isExactImm<32>"; 3321 let RenderMethod = "addImmOperands"; 3322} 3323def uimm_exact32 : Operand<i32>, ImmLeaf<i32, [{return Imm == 32;}]> { 3324 let ParserMatchClass = exact32_asmoperand; 3325} 3326 3327def exact48_asmoperand : AsmOperandClass { 3328 let Name = "Exact48"; 3329 let PredicateMethod = "isExactImm<48>"; 3330 let RenderMethod = "addImmOperands"; 3331} 3332def uimm_exact48 : Operand<i32>, ImmLeaf<i32, [{return Imm == 48;}]> { 3333 let ParserMatchClass = exact48_asmoperand; 3334} 3335 3336def exact64_asmoperand : AsmOperandClass { 3337 let Name = "Exact64"; 3338 let PredicateMethod = "isExactImm<64>"; 3339 let RenderMethod = "addImmOperands"; 3340} 3341def uimm_exact64 : Operand<i32>, ImmLeaf<i32, [{return Imm == 64;}]> { 3342 let ParserMatchClass = exact64_asmoperand; 3343} 3344 3345multiclass NeonI_LDWB_VList<bit q, bits<4> opcode, bits<2> size, 3346 RegisterOperand VecList, Operand ImmTy, 3347 string asmop> { 3348 let Constraints = "$Rn = $wb", mayLoad = 1, neverHasSideEffects = 1, 3349 DecoderMethod = "DecodeVLDSTPostInstruction" in { 3350 def _fixed : NeonI_LdStMult_Post<q, 1, opcode, size, 3351 (outs VecList:$Rt, GPR64xsp:$wb), 3352 (ins GPR64xsp:$Rn, ImmTy:$amt), 3353 asmop # "\t$Rt, [$Rn], $amt", 3354 [], 3355 NoItinerary> { 3356 let Rm = 0b11111; 3357 } 3358 3359 def _register : NeonI_LdStMult_Post<q, 1, opcode, size, 3360 (outs VecList:$Rt, GPR64xsp:$wb), 3361 (ins GPR64xsp:$Rn, GPR64noxzr:$Rm), 3362 asmop # "\t$Rt, [$Rn], $Rm", 3363 [], 3364 NoItinerary>; 3365 } 3366} 3367 3368multiclass LDWB_VList_BHSD<bits<4> opcode, string List, Operand ImmTy, 3369 Operand ImmTy2, string asmop> { 3370 defm _8B : NeonI_LDWB_VList<0, opcode, 0b00, 3371 !cast<RegisterOperand>(List # "8B_operand"), 3372 ImmTy, asmop>; 3373 3374 defm _4H : NeonI_LDWB_VList<0, opcode, 0b01, 3375 !cast<RegisterOperand>(List # "4H_operand"), 3376 ImmTy, asmop>; 3377 3378 defm _2S : NeonI_LDWB_VList<0, opcode, 0b10, 3379 !cast<RegisterOperand>(List # "2S_operand"), 3380 ImmTy, asmop>; 3381 3382 defm _16B : NeonI_LDWB_VList<1, opcode, 0b00, 3383 !cast<RegisterOperand>(List # "16B_operand"), 3384 ImmTy2, asmop>; 3385 3386 defm _8H : NeonI_LDWB_VList<1, opcode, 0b01, 3387 !cast<RegisterOperand>(List # "8H_operand"), 3388 ImmTy2, asmop>; 3389 3390 defm _4S : NeonI_LDWB_VList<1, opcode, 0b10, 3391 !cast<RegisterOperand>(List # "4S_operand"), 3392 ImmTy2, asmop>; 3393 3394 defm _2D : NeonI_LDWB_VList<1, opcode, 0b11, 3395 !cast<RegisterOperand>(List # "2D_operand"), 3396 ImmTy2, asmop>; 3397} 3398 3399// Post-index load multiple N-element structures from N registers (N = 1,2,3,4) 3400defm LD1WB : LDWB_VList_BHSD<0b0111, "VOne", uimm_exact8, uimm_exact16, "ld1">; 3401defm LD1WB_1D : NeonI_LDWB_VList<0, 0b0111, 0b11, VOne1D_operand, uimm_exact8, 3402 "ld1">; 3403 3404defm LD2WB : LDWB_VList_BHSD<0b1000, "VPair", uimm_exact16, uimm_exact32, "ld2">; 3405 3406defm LD3WB : LDWB_VList_BHSD<0b0100, "VTriple", uimm_exact24, uimm_exact48, 3407 "ld3">; 3408 3409defm LD4WB : LDWB_VList_BHSD<0b0000, "VQuad", uimm_exact32, uimm_exact64, "ld4">; 3410 3411// Post-index load multiple 1-element structures from N consecutive registers 3412// (N = 2,3,4) 3413defm LD1x2WB : LDWB_VList_BHSD<0b1010, "VPair", uimm_exact16, uimm_exact32, 3414 "ld1">; 3415defm LD1x2WB_1D : NeonI_LDWB_VList<0, 0b1010, 0b11, VPair1D_operand, 3416 uimm_exact16, "ld1">; 3417 3418defm LD1x3WB : LDWB_VList_BHSD<0b0110, "VTriple", uimm_exact24, uimm_exact48, 3419 "ld1">; 3420defm LD1x3WB_1D : NeonI_LDWB_VList<0, 0b0110, 0b11, VTriple1D_operand, 3421 uimm_exact24, "ld1">; 3422 3423defm LD1x4WB : LDWB_VList_BHSD<0b0010, "VQuad", uimm_exact32, uimm_exact64, 3424 "ld1">; 3425defm LD1x4WB_1D : NeonI_LDWB_VList<0, 0b0010, 0b11, VQuad1D_operand, 3426 uimm_exact32, "ld1">; 3427 3428multiclass NeonI_STWB_VList<bit q, bits<4> opcode, bits<2> size, 3429 RegisterOperand VecList, Operand ImmTy, 3430 string asmop> { 3431 let Constraints = "$Rn = $wb", mayStore = 1, neverHasSideEffects = 1, 3432 DecoderMethod = "DecodeVLDSTPostInstruction" in { 3433 def _fixed : NeonI_LdStMult_Post<q, 0, opcode, size, 3434 (outs GPR64xsp:$wb), 3435 (ins GPR64xsp:$Rn, ImmTy:$amt, VecList:$Rt), 3436 asmop # "\t$Rt, [$Rn], $amt", 3437 [], 3438 NoItinerary> { 3439 let Rm = 0b11111; 3440 } 3441 3442 def _register : NeonI_LdStMult_Post<q, 0, opcode, size, 3443 (outs GPR64xsp:$wb), 3444 (ins GPR64xsp:$Rn, GPR64noxzr:$Rm, VecList:$Rt), 3445 asmop # "\t$Rt, [$Rn], $Rm", 3446 [], 3447 NoItinerary>; 3448 } 3449} 3450 3451multiclass STWB_VList_BHSD<bits<4> opcode, string List, Operand ImmTy, 3452 Operand ImmTy2, string asmop> { 3453 defm _8B : NeonI_STWB_VList<0, opcode, 0b00, 3454 !cast<RegisterOperand>(List # "8B_operand"), ImmTy, asmop>; 3455 3456 defm _4H : NeonI_STWB_VList<0, opcode, 0b01, 3457 !cast<RegisterOperand>(List # "4H_operand"), 3458 ImmTy, asmop>; 3459 3460 defm _2S : NeonI_STWB_VList<0, opcode, 0b10, 3461 !cast<RegisterOperand>(List # "2S_operand"), 3462 ImmTy, asmop>; 3463 3464 defm _16B : NeonI_STWB_VList<1, opcode, 0b00, 3465 !cast<RegisterOperand>(List # "16B_operand"), 3466 ImmTy2, asmop>; 3467 3468 defm _8H : NeonI_STWB_VList<1, opcode, 0b01, 3469 !cast<RegisterOperand>(List # "8H_operand"), 3470 ImmTy2, asmop>; 3471 3472 defm _4S : NeonI_STWB_VList<1, opcode, 0b10, 3473 !cast<RegisterOperand>(List # "4S_operand"), 3474 ImmTy2, asmop>; 3475 3476 defm _2D : NeonI_STWB_VList<1, opcode, 0b11, 3477 !cast<RegisterOperand>(List # "2D_operand"), 3478 ImmTy2, asmop>; 3479} 3480 3481// Post-index load multiple N-element structures from N registers (N = 1,2,3,4) 3482defm ST1WB : STWB_VList_BHSD<0b0111, "VOne", uimm_exact8, uimm_exact16, "st1">; 3483defm ST1WB_1D : NeonI_STWB_VList<0, 0b0111, 0b11, VOne1D_operand, uimm_exact8, 3484 "st1">; 3485 3486defm ST2WB : STWB_VList_BHSD<0b1000, "VPair", uimm_exact16, uimm_exact32, "st2">; 3487 3488defm ST3WB : STWB_VList_BHSD<0b0100, "VTriple", uimm_exact24, uimm_exact48, 3489 "st3">; 3490 3491defm ST4WB : STWB_VList_BHSD<0b0000, "VQuad", uimm_exact32, uimm_exact64, "st4">; 3492 3493// Post-index load multiple 1-element structures from N consecutive registers 3494// (N = 2,3,4) 3495defm ST1x2WB : STWB_VList_BHSD<0b1010, "VPair", uimm_exact16, uimm_exact32, 3496 "st1">; 3497defm ST1x2WB_1D : NeonI_STWB_VList<0, 0b1010, 0b11, VPair1D_operand, 3498 uimm_exact16, "st1">; 3499 3500defm ST1x3WB : STWB_VList_BHSD<0b0110, "VTriple", uimm_exact24, uimm_exact48, 3501 "st1">; 3502defm ST1x3WB_1D : NeonI_STWB_VList<0, 0b0110, 0b11, VTriple1D_operand, 3503 uimm_exact24, "st1">; 3504 3505defm ST1x4WB : STWB_VList_BHSD<0b0010, "VQuad", uimm_exact32, uimm_exact64, 3506 "st1">; 3507defm ST1x4WB_1D : NeonI_STWB_VList<0, 0b0010, 0b11, VQuad1D_operand, 3508 uimm_exact32, "st1">; 3509 3510// End of post-index vector load/store multiple N-element structure 3511// (class SIMD lselem-post) 3512 3513// The followings are vector load/store single N-element structure 3514// (class SIMD lsone). 3515def neon_uimm0_bare : Operand<i64>, 3516 ImmLeaf<i64, [{return Imm == 0;}]> { 3517 let ParserMatchClass = neon_uimm0_asmoperand; 3518 let PrintMethod = "printUImmBareOperand"; 3519} 3520 3521def neon_uimm1_bare : Operand<i64>, 3522 ImmLeaf<i64, [{return Imm < 2;}]> { 3523 let ParserMatchClass = neon_uimm1_asmoperand; 3524 let PrintMethod = "printUImmBareOperand"; 3525} 3526 3527def neon_uimm2_bare : Operand<i64>, 3528 ImmLeaf<i64, [{return Imm < 4;}]> { 3529 let ParserMatchClass = neon_uimm2_asmoperand; 3530 let PrintMethod = "printUImmBareOperand"; 3531} 3532 3533def neon_uimm3_bare : Operand<i64>, 3534 ImmLeaf<i64, [{return Imm < 8;}]> { 3535 let ParserMatchClass = uimm3_asmoperand; 3536 let PrintMethod = "printUImmBareOperand"; 3537} 3538 3539def neon_uimm4_bare : Operand<i64>, 3540 ImmLeaf<i64, [{return Imm < 16;}]> { 3541 let ParserMatchClass = uimm4_asmoperand; 3542 let PrintMethod = "printUImmBareOperand"; 3543} 3544 3545class NeonI_LDN_Dup<bit q, bit r, bits<3> opcode, bits<2> size, 3546 RegisterOperand VecList, string asmop> 3547 : NeonI_LdOne_Dup<q, r, opcode, size, 3548 (outs VecList:$Rt), (ins GPR64xsp:$Rn), 3549 asmop # "\t$Rt, [$Rn]", 3550 [], 3551 NoItinerary> { 3552 let mayLoad = 1; 3553 let neverHasSideEffects = 1; 3554} 3555 3556multiclass LDN_Dup_BHSD<bit r, bits<3> opcode, string List, string asmop> { 3557 def _8B : NeonI_LDN_Dup<0, r, opcode, 0b00, 3558 !cast<RegisterOperand>(List # "8B_operand"), asmop>; 3559 3560 def _4H : NeonI_LDN_Dup<0, r, opcode, 0b01, 3561 !cast<RegisterOperand>(List # "4H_operand"), asmop>; 3562 3563 def _2S : NeonI_LDN_Dup<0, r, opcode, 0b10, 3564 !cast<RegisterOperand>(List # "2S_operand"), asmop>; 3565 3566 def _1D : NeonI_LDN_Dup<0, r, opcode, 0b11, 3567 !cast<RegisterOperand>(List # "1D_operand"), asmop>; 3568 3569 def _16B : NeonI_LDN_Dup<1, r, opcode, 0b00, 3570 !cast<RegisterOperand>(List # "16B_operand"), asmop>; 3571 3572 def _8H : NeonI_LDN_Dup<1, r, opcode, 0b01, 3573 !cast<RegisterOperand>(List # "8H_operand"), asmop>; 3574 3575 def _4S : NeonI_LDN_Dup<1, r, opcode, 0b10, 3576 !cast<RegisterOperand>(List # "4S_operand"), asmop>; 3577 3578 def _2D : NeonI_LDN_Dup<1, r, opcode, 0b11, 3579 !cast<RegisterOperand>(List # "2D_operand"), asmop>; 3580} 3581 3582// Load single 1-element structure to all lanes of 1 register 3583defm LD1R : LDN_Dup_BHSD<0b0, 0b110, "VOne", "ld1r">; 3584 3585// Load single N-element structure to all lanes of N consecutive 3586// registers (N = 2,3,4) 3587defm LD2R : LDN_Dup_BHSD<0b1, 0b110, "VPair", "ld2r">; 3588defm LD3R : LDN_Dup_BHSD<0b0, 0b111, "VTriple", "ld3r">; 3589defm LD4R : LDN_Dup_BHSD<0b1, 0b111, "VQuad", "ld4r">; 3590 3591 3592class LD1R_pattern <ValueType VTy, ValueType DTy, PatFrag LoadOp, 3593 Instruction INST> 3594 : Pat<(VTy (Neon_vdup (DTy (LoadOp GPR64xsp:$Rn)))), 3595 (VTy (INST GPR64xsp:$Rn))>; 3596 3597// Match all LD1R instructions 3598def : LD1R_pattern<v8i8, i32, extloadi8, LD1R_8B>; 3599 3600def : LD1R_pattern<v16i8, i32, extloadi8, LD1R_16B>; 3601 3602def : LD1R_pattern<v4i16, i32, extloadi16, LD1R_4H>; 3603 3604def : LD1R_pattern<v8i16, i32, extloadi16, LD1R_8H>; 3605 3606def : LD1R_pattern<v2i32, i32, load, LD1R_2S>; 3607def : LD1R_pattern<v2f32, f32, load, LD1R_2S>; 3608 3609def : LD1R_pattern<v4i32, i32, load, LD1R_4S>; 3610def : LD1R_pattern<v4f32, f32, load, LD1R_4S>; 3611 3612def : LD1R_pattern<v1i64, i64, load, LD1R_1D>; 3613def : LD1R_pattern<v1f64, f64, load, LD1R_1D>; 3614 3615def : LD1R_pattern<v2i64, i64, load, LD1R_2D>; 3616def : LD1R_pattern<v2f64, f64, load, LD1R_2D>; 3617 3618 3619multiclass VectorList_Bare_BHSD<string PREFIX, int Count, 3620 RegisterClass RegList> { 3621 defm B : VectorList_operands<PREFIX, "B", Count, RegList>; 3622 defm H : VectorList_operands<PREFIX, "H", Count, RegList>; 3623 defm S : VectorList_operands<PREFIX, "S", Count, RegList>; 3624 defm D : VectorList_operands<PREFIX, "D", Count, RegList>; 3625} 3626 3627// Special vector list operand of 128-bit vectors with bare layout. 3628// i.e. only show ".b", ".h", ".s", ".d" 3629defm VOne : VectorList_Bare_BHSD<"VOne", 1, FPR128>; 3630defm VPair : VectorList_Bare_BHSD<"VPair", 2, QPair>; 3631defm VTriple : VectorList_Bare_BHSD<"VTriple", 3, QTriple>; 3632defm VQuad : VectorList_Bare_BHSD<"VQuad", 4, QQuad>; 3633 3634class NeonI_LDN_Lane<bit r, bits<2> op2_1, bit op0, RegisterOperand VList, 3635 Operand ImmOp, string asmop> 3636 : NeonI_LdStOne_Lane<1, r, op2_1, op0, 3637 (outs VList:$Rt), 3638 (ins GPR64xsp:$Rn, VList:$src, ImmOp:$lane), 3639 asmop # "\t$Rt[$lane], [$Rn]", 3640 [], 3641 NoItinerary> { 3642 let mayLoad = 1; 3643 let neverHasSideEffects = 1; 3644 let hasExtraDefRegAllocReq = 1; 3645 let Constraints = "$src = $Rt"; 3646} 3647 3648multiclass LDN_Lane_BHSD<bit r, bit op0, string List, string asmop> { 3649 def _B : NeonI_LDN_Lane<r, 0b00, op0, 3650 !cast<RegisterOperand>(List # "B_operand"), 3651 neon_uimm4_bare, asmop> { 3652 let Inst{12-10} = lane{2-0}; 3653 let Inst{30} = lane{3}; 3654 } 3655 3656 def _H : NeonI_LDN_Lane<r, 0b01, op0, 3657 !cast<RegisterOperand>(List # "H_operand"), 3658 neon_uimm3_bare, asmop> { 3659 let Inst{12-10} = {lane{1}, lane{0}, 0b0}; 3660 let Inst{30} = lane{2}; 3661 } 3662 3663 def _S : NeonI_LDN_Lane<r, 0b10, op0, 3664 !cast<RegisterOperand>(List # "S_operand"), 3665 neon_uimm2_bare, asmop> { 3666 let Inst{12-10} = {lane{0}, 0b0, 0b0}; 3667 let Inst{30} = lane{1}; 3668 } 3669 3670 def _D : NeonI_LDN_Lane<r, 0b10, op0, 3671 !cast<RegisterOperand>(List # "D_operand"), 3672 neon_uimm1_bare, asmop> { 3673 let Inst{12-10} = 0b001; 3674 let Inst{30} = lane{0}; 3675 } 3676} 3677 3678// Load single 1-element structure to one lane of 1 register. 3679defm LD1LN : LDN_Lane_BHSD<0b0, 0b0, "VOne", "ld1">; 3680 3681// Load single N-element structure to one lane of N consecutive registers 3682// (N = 2,3,4) 3683defm LD2LN : LDN_Lane_BHSD<0b1, 0b0, "VPair", "ld2">; 3684defm LD3LN : LDN_Lane_BHSD<0b0, 0b1, "VTriple", "ld3">; 3685defm LD4LN : LDN_Lane_BHSD<0b1, 0b1, "VQuad", "ld4">; 3686 3687multiclass LD1LN_patterns<ValueType VTy, ValueType VTy2, ValueType DTy, 3688 Operand ImmOp, Operand ImmOp2, PatFrag LoadOp, 3689 Instruction INST> { 3690 def : Pat<(VTy (vector_insert (VTy VPR64:$src), 3691 (DTy (LoadOp GPR64xsp:$Rn)), (ImmOp:$lane))), 3692 (VTy (EXTRACT_SUBREG 3693 (INST GPR64xsp:$Rn, 3694 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64), 3695 ImmOp:$lane), 3696 sub_64))>; 3697 3698 def : Pat<(VTy2 (vector_insert (VTy2 VPR128:$src), 3699 (DTy (LoadOp GPR64xsp:$Rn)), (ImmOp2:$lane))), 3700 (VTy2 (INST GPR64xsp:$Rn, VPR128:$src, ImmOp2:$lane))>; 3701} 3702 3703// Match all LD1LN instructions 3704defm : LD1LN_patterns<v8i8, v16i8, i32, neon_uimm3_bare, neon_uimm4_bare, 3705 extloadi8, LD1LN_B>; 3706 3707defm : LD1LN_patterns<v4i16, v8i16, i32, neon_uimm2_bare, neon_uimm3_bare, 3708 extloadi16, LD1LN_H>; 3709 3710defm : LD1LN_patterns<v2i32, v4i32, i32, neon_uimm1_bare, neon_uimm2_bare, 3711 load, LD1LN_S>; 3712defm : LD1LN_patterns<v2f32, v4f32, f32, neon_uimm1_bare, neon_uimm2_bare, 3713 load, LD1LN_S>; 3714 3715defm : LD1LN_patterns<v1i64, v2i64, i64, neon_uimm0_bare, neon_uimm1_bare, 3716 load, LD1LN_D>; 3717defm : LD1LN_patterns<v1f64, v2f64, f64, neon_uimm0_bare, neon_uimm1_bare, 3718 load, LD1LN_D>; 3719 3720class NeonI_STN_Lane<bit r, bits<2> op2_1, bit op0, RegisterOperand VList, 3721 Operand ImmOp, string asmop> 3722 : NeonI_LdStOne_Lane<0, r, op2_1, op0, 3723 (outs), (ins GPR64xsp:$Rn, VList:$Rt, ImmOp:$lane), 3724 asmop # "\t$Rt[$lane], [$Rn]", 3725 [], 3726 NoItinerary> { 3727 let mayStore = 1; 3728 let neverHasSideEffects = 1; 3729 let hasExtraDefRegAllocReq = 1; 3730} 3731 3732multiclass STN_Lane_BHSD<bit r, bit op0, string List, string asmop> { 3733 def _B : NeonI_STN_Lane<r, 0b00, op0, 3734 !cast<RegisterOperand>(List # "B_operand"), 3735 neon_uimm4_bare, asmop> { 3736 let Inst{12-10} = lane{2-0}; 3737 let Inst{30} = lane{3}; 3738 } 3739 3740 def _H : NeonI_STN_Lane<r, 0b01, op0, 3741 !cast<RegisterOperand>(List # "H_operand"), 3742 neon_uimm3_bare, asmop> { 3743 let Inst{12-10} = {lane{1}, lane{0}, 0b0}; 3744 let Inst{30} = lane{2}; 3745 } 3746 3747 def _S : NeonI_STN_Lane<r, 0b10, op0, 3748 !cast<RegisterOperand>(List # "S_operand"), 3749 neon_uimm2_bare, asmop> { 3750 let Inst{12-10} = {lane{0}, 0b0, 0b0}; 3751 let Inst{30} = lane{1}; 3752 } 3753 3754 def _D : NeonI_STN_Lane<r, 0b10, op0, 3755 !cast<RegisterOperand>(List # "D_operand"), 3756 neon_uimm1_bare, asmop>{ 3757 let Inst{12-10} = 0b001; 3758 let Inst{30} = lane{0}; 3759 } 3760} 3761 3762// Store single 1-element structure from one lane of 1 register. 3763defm ST1LN : STN_Lane_BHSD<0b0, 0b0, "VOne", "st1">; 3764 3765// Store single N-element structure from one lane of N consecutive registers 3766// (N = 2,3,4) 3767defm ST2LN : STN_Lane_BHSD<0b1, 0b0, "VPair", "st2">; 3768defm ST3LN : STN_Lane_BHSD<0b0, 0b1, "VTriple", "st3">; 3769defm ST4LN : STN_Lane_BHSD<0b1, 0b1, "VQuad", "st4">; 3770 3771multiclass ST1LN_patterns<ValueType VTy, ValueType VTy2, ValueType DTy, 3772 Operand ImmOp, Operand ImmOp2, PatFrag StoreOp, 3773 Instruction INST> { 3774 def : Pat<(StoreOp (DTy (vector_extract (VTy VPR64:$Rt), ImmOp:$lane)), 3775 GPR64xsp:$Rn), 3776 (INST GPR64xsp:$Rn, 3777 (SUBREG_TO_REG (i64 0), VPR64:$Rt, sub_64), 3778 ImmOp:$lane)>; 3779 3780 def : Pat<(StoreOp (DTy (vector_extract (VTy2 VPR128:$Rt), ImmOp2:$lane)), 3781 GPR64xsp:$Rn), 3782 (INST GPR64xsp:$Rn, VPR128:$Rt, ImmOp2:$lane)>; 3783} 3784 3785// Match all ST1LN instructions 3786defm : ST1LN_patterns<v8i8, v16i8, i32, neon_uimm3_bare, neon_uimm4_bare, 3787 truncstorei8, ST1LN_B>; 3788 3789defm : ST1LN_patterns<v4i16, v8i16, i32, neon_uimm2_bare, neon_uimm3_bare, 3790 truncstorei16, ST1LN_H>; 3791 3792defm : ST1LN_patterns<v2i32, v4i32, i32, neon_uimm1_bare, neon_uimm2_bare, 3793 store, ST1LN_S>; 3794defm : ST1LN_patterns<v2f32, v4f32, f32, neon_uimm1_bare, neon_uimm2_bare, 3795 store, ST1LN_S>; 3796 3797defm : ST1LN_patterns<v1i64, v2i64, i64, neon_uimm0_bare, neon_uimm1_bare, 3798 store, ST1LN_D>; 3799defm : ST1LN_patterns<v1f64, v2f64, f64, neon_uimm0_bare, neon_uimm1_bare, 3800 store, ST1LN_D>; 3801 3802// End of vector load/store single N-element structure (class SIMD lsone). 3803 3804 3805// The following are post-index load/store single N-element instructions 3806// (class SIMD lsone-post) 3807 3808multiclass NeonI_LDN_WB_Dup<bit q, bit r, bits<3> opcode, bits<2> size, 3809 RegisterOperand VecList, Operand ImmTy, 3810 string asmop> { 3811 let mayLoad = 1, neverHasSideEffects = 1, Constraints = "$wb = $Rn", 3812 DecoderMethod = "DecodeVLDSTLanePostInstruction" in { 3813 def _fixed : NeonI_LdOne_Dup_Post<q, r, opcode, size, 3814 (outs VecList:$Rt, GPR64xsp:$wb), 3815 (ins GPR64xsp:$Rn, ImmTy:$amt), 3816 asmop # "\t$Rt, [$Rn], $amt", 3817 [], 3818 NoItinerary> { 3819 let Rm = 0b11111; 3820 } 3821 3822 def _register : NeonI_LdOne_Dup_Post<q, r, opcode, size, 3823 (outs VecList:$Rt, GPR64xsp:$wb), 3824 (ins GPR64xsp:$Rn, GPR64noxzr:$Rm), 3825 asmop # "\t$Rt, [$Rn], $Rm", 3826 [], 3827 NoItinerary>; 3828 } 3829} 3830 3831multiclass LDWB_Dup_BHSD<bit r, bits<3> opcode, string List, string asmop, 3832 Operand uimm_b, Operand uimm_h, 3833 Operand uimm_s, Operand uimm_d> { 3834 defm _8B : NeonI_LDN_WB_Dup<0, r, opcode, 0b00, 3835 !cast<RegisterOperand>(List # "8B_operand"), 3836 uimm_b, asmop>; 3837 3838 defm _4H : NeonI_LDN_WB_Dup<0, r, opcode, 0b01, 3839 !cast<RegisterOperand>(List # "4H_operand"), 3840 uimm_h, asmop>; 3841 3842 defm _2S : NeonI_LDN_WB_Dup<0, r, opcode, 0b10, 3843 !cast<RegisterOperand>(List # "2S_operand"), 3844 uimm_s, asmop>; 3845 3846 defm _1D : NeonI_LDN_WB_Dup<0, r, opcode, 0b11, 3847 !cast<RegisterOperand>(List # "1D_operand"), 3848 uimm_d, asmop>; 3849 3850 defm _16B : NeonI_LDN_WB_Dup<1, r, opcode, 0b00, 3851 !cast<RegisterOperand>(List # "16B_operand"), 3852 uimm_b, asmop>; 3853 3854 defm _8H : NeonI_LDN_WB_Dup<1, r, opcode, 0b01, 3855 !cast<RegisterOperand>(List # "8H_operand"), 3856 uimm_h, asmop>; 3857 3858 defm _4S : NeonI_LDN_WB_Dup<1, r, opcode, 0b10, 3859 !cast<RegisterOperand>(List # "4S_operand"), 3860 uimm_s, asmop>; 3861 3862 defm _2D : NeonI_LDN_WB_Dup<1, r, opcode, 0b11, 3863 !cast<RegisterOperand>(List # "2D_operand"), 3864 uimm_d, asmop>; 3865} 3866 3867// Post-index load single 1-element structure to all lanes of 1 register 3868defm LD1R_WB : LDWB_Dup_BHSD<0b0, 0b110, "VOne", "ld1r", uimm_exact1, 3869 uimm_exact2, uimm_exact4, uimm_exact8>; 3870 3871// Post-index load single N-element structure to all lanes of N consecutive 3872// registers (N = 2,3,4) 3873defm LD2R_WB : LDWB_Dup_BHSD<0b1, 0b110, "VPair", "ld2r", uimm_exact2, 3874 uimm_exact4, uimm_exact8, uimm_exact16>; 3875defm LD3R_WB : LDWB_Dup_BHSD<0b0, 0b111, "VTriple", "ld3r", uimm_exact3, 3876 uimm_exact6, uimm_exact12, uimm_exact24>; 3877defm LD4R_WB : LDWB_Dup_BHSD<0b1, 0b111, "VQuad", "ld4r", uimm_exact4, 3878 uimm_exact8, uimm_exact16, uimm_exact32>; 3879 3880let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1, 3881 Constraints = "$Rn = $wb, $Rt = $src", 3882 DecoderMethod = "DecodeVLDSTLanePostInstruction" in { 3883 class LDN_WBFx_Lane<bit r, bits<2> op2_1, bit op0, RegisterOperand VList, 3884 Operand ImmTy, Operand ImmOp, string asmop> 3885 : NeonI_LdStOne_Lane_Post<1, r, op2_1, op0, 3886 (outs VList:$Rt, GPR64xsp:$wb), 3887 (ins GPR64xsp:$Rn, ImmTy:$amt, 3888 VList:$src, ImmOp:$lane), 3889 asmop # "\t$Rt[$lane], [$Rn], $amt", 3890 [], 3891 NoItinerary> { 3892 let Rm = 0b11111; 3893 } 3894 3895 class LDN_WBReg_Lane<bit r, bits<2> op2_1, bit op0, RegisterOperand VList, 3896 Operand ImmTy, Operand ImmOp, string asmop> 3897 : NeonI_LdStOne_Lane_Post<1, r, op2_1, op0, 3898 (outs VList:$Rt, GPR64xsp:$wb), 3899 (ins GPR64xsp:$Rn, GPR64noxzr:$Rm, 3900 VList:$src, ImmOp:$lane), 3901 asmop # "\t$Rt[$lane], [$Rn], $Rm", 3902 [], 3903 NoItinerary>; 3904} 3905 3906multiclass LD_Lane_WB_BHSD<bit r, bit op0, string List, string asmop, 3907 Operand uimm_b, Operand uimm_h, 3908 Operand uimm_s, Operand uimm_d> { 3909 def _B_fixed : LDN_WBFx_Lane<r, 0b00, op0, 3910 !cast<RegisterOperand>(List # "B_operand"), 3911 uimm_b, neon_uimm4_bare, asmop> { 3912 let Inst{12-10} = lane{2-0}; 3913 let Inst{30} = lane{3}; 3914 } 3915 3916 def _B_register : LDN_WBReg_Lane<r, 0b00, op0, 3917 !cast<RegisterOperand>(List # "B_operand"), 3918 uimm_b, neon_uimm4_bare, asmop> { 3919 let Inst{12-10} = lane{2-0}; 3920 let Inst{30} = lane{3}; 3921 } 3922 3923 def _H_fixed : LDN_WBFx_Lane<r, 0b01, op0, 3924 !cast<RegisterOperand>(List # "H_operand"), 3925 uimm_h, neon_uimm3_bare, asmop> { 3926 let Inst{12-10} = {lane{1}, lane{0}, 0b0}; 3927 let Inst{30} = lane{2}; 3928 } 3929 3930 def _H_register : LDN_WBReg_Lane<r, 0b01, op0, 3931 !cast<RegisterOperand>(List # "H_operand"), 3932 uimm_h, neon_uimm3_bare, asmop> { 3933 let Inst{12-10} = {lane{1}, lane{0}, 0b0}; 3934 let Inst{30} = lane{2}; 3935 } 3936 3937 def _S_fixed : LDN_WBFx_Lane<r, 0b10, op0, 3938 !cast<RegisterOperand>(List # "S_operand"), 3939 uimm_s, neon_uimm2_bare, asmop> { 3940 let Inst{12-10} = {lane{0}, 0b0, 0b0}; 3941 let Inst{30} = lane{1}; 3942 } 3943 3944 def _S_register : LDN_WBReg_Lane<r, 0b10, op0, 3945 !cast<RegisterOperand>(List # "S_operand"), 3946 uimm_s, neon_uimm2_bare, asmop> { 3947 let Inst{12-10} = {lane{0}, 0b0, 0b0}; 3948 let Inst{30} = lane{1}; 3949 } 3950 3951 def _D_fixed : LDN_WBFx_Lane<r, 0b10, op0, 3952 !cast<RegisterOperand>(List # "D_operand"), 3953 uimm_d, neon_uimm1_bare, asmop> { 3954 let Inst{12-10} = 0b001; 3955 let Inst{30} = lane{0}; 3956 } 3957 3958 def _D_register : LDN_WBReg_Lane<r, 0b10, op0, 3959 !cast<RegisterOperand>(List # "D_operand"), 3960 uimm_d, neon_uimm1_bare, asmop> { 3961 let Inst{12-10} = 0b001; 3962 let Inst{30} = lane{0}; 3963 } 3964} 3965 3966// Post-index load single 1-element structure to one lane of 1 register. 3967defm LD1LN_WB : LD_Lane_WB_BHSD<0b0, 0b0, "VOne", "ld1", uimm_exact1, 3968 uimm_exact2, uimm_exact4, uimm_exact8>; 3969 3970// Post-index load single N-element structure to one lane of N consecutive 3971// registers 3972// (N = 2,3,4) 3973defm LD2LN_WB : LD_Lane_WB_BHSD<0b1, 0b0, "VPair", "ld2", uimm_exact2, 3974 uimm_exact4, uimm_exact8, uimm_exact16>; 3975defm LD3LN_WB : LD_Lane_WB_BHSD<0b0, 0b1, "VTriple", "ld3", uimm_exact3, 3976 uimm_exact6, uimm_exact12, uimm_exact24>; 3977defm LD4LN_WB : LD_Lane_WB_BHSD<0b1, 0b1, "VQuad", "ld4", uimm_exact4, 3978 uimm_exact8, uimm_exact16, uimm_exact32>; 3979 3980let mayStore = 1, neverHasSideEffects = 1, 3981 hasExtraDefRegAllocReq = 1, Constraints = "$Rn = $wb", 3982 DecoderMethod = "DecodeVLDSTLanePostInstruction" in { 3983 class STN_WBFx_Lane<bit r, bits<2> op2_1, bit op0, RegisterOperand VList, 3984 Operand ImmTy, Operand ImmOp, string asmop> 3985 : NeonI_LdStOne_Lane_Post<0, r, op2_1, op0, 3986 (outs GPR64xsp:$wb), 3987 (ins GPR64xsp:$Rn, ImmTy:$amt, 3988 VList:$Rt, ImmOp:$lane), 3989 asmop # "\t$Rt[$lane], [$Rn], $amt", 3990 [], 3991 NoItinerary> { 3992 let Rm = 0b11111; 3993 } 3994 3995 class STN_WBReg_Lane<bit r, bits<2> op2_1, bit op0, RegisterOperand VList, 3996 Operand ImmTy, Operand ImmOp, string asmop> 3997 : NeonI_LdStOne_Lane_Post<0, r, op2_1, op0, 3998 (outs GPR64xsp:$wb), 3999 (ins GPR64xsp:$Rn, GPR64noxzr:$Rm, VList:$Rt, 4000 ImmOp:$lane), 4001 asmop # "\t$Rt[$lane], [$Rn], $Rm", 4002 [], 4003 NoItinerary>; 4004} 4005 4006multiclass ST_Lane_WB_BHSD<bit r, bit op0, string List, string asmop, 4007 Operand uimm_b, Operand uimm_h, 4008 Operand uimm_s, Operand uimm_d> { 4009 def _B_fixed : STN_WBFx_Lane<r, 0b00, op0, 4010 !cast<RegisterOperand>(List # "B_operand"), 4011 uimm_b, neon_uimm4_bare, asmop> { 4012 let Inst{12-10} = lane{2-0}; 4013 let Inst{30} = lane{3}; 4014 } 4015 4016 def _B_register : STN_WBReg_Lane<r, 0b00, op0, 4017 !cast<RegisterOperand>(List # "B_operand"), 4018 uimm_b, neon_uimm4_bare, asmop> { 4019 let Inst{12-10} = lane{2-0}; 4020 let Inst{30} = lane{3}; 4021 } 4022 4023 def _H_fixed : STN_WBFx_Lane<r, 0b01, op0, 4024 !cast<RegisterOperand>(List # "H_operand"), 4025 uimm_h, neon_uimm3_bare, asmop> { 4026 let Inst{12-10} = {lane{1}, lane{0}, 0b0}; 4027 let Inst{30} = lane{2}; 4028 } 4029 4030 def _H_register : STN_WBReg_Lane<r, 0b01, op0, 4031 !cast<RegisterOperand>(List # "H_operand"), 4032 uimm_h, neon_uimm3_bare, asmop> { 4033 let Inst{12-10} = {lane{1}, lane{0}, 0b0}; 4034 let Inst{30} = lane{2}; 4035 } 4036 4037 def _S_fixed : STN_WBFx_Lane<r, 0b10, op0, 4038 !cast<RegisterOperand>(List # "S_operand"), 4039 uimm_s, neon_uimm2_bare, asmop> { 4040 let Inst{12-10} = {lane{0}, 0b0, 0b0}; 4041 let Inst{30} = lane{1}; 4042 } 4043 4044 def _S_register : STN_WBReg_Lane<r, 0b10, op0, 4045 !cast<RegisterOperand>(List # "S_operand"), 4046 uimm_s, neon_uimm2_bare, asmop> { 4047 let Inst{12-10} = {lane{0}, 0b0, 0b0}; 4048 let Inst{30} = lane{1}; 4049 } 4050 4051 def _D_fixed : STN_WBFx_Lane<r, 0b10, op0, 4052 !cast<RegisterOperand>(List # "D_operand"), 4053 uimm_d, neon_uimm1_bare, asmop> { 4054 let Inst{12-10} = 0b001; 4055 let Inst{30} = lane{0}; 4056 } 4057 4058 def _D_register : STN_WBReg_Lane<r, 0b10, op0, 4059 !cast<RegisterOperand>(List # "D_operand"), 4060 uimm_d, neon_uimm1_bare, asmop> { 4061 let Inst{12-10} = 0b001; 4062 let Inst{30} = lane{0}; 4063 } 4064} 4065 4066// Post-index store single 1-element structure from one lane of 1 register. 4067defm ST1LN_WB : ST_Lane_WB_BHSD<0b0, 0b0, "VOne", "st1", uimm_exact1, 4068 uimm_exact2, uimm_exact4, uimm_exact8>; 4069 4070// Post-index store single N-element structure from one lane of N consecutive 4071// registers (N = 2,3,4) 4072defm ST2LN_WB : ST_Lane_WB_BHSD<0b1, 0b0, "VPair", "st2", uimm_exact2, 4073 uimm_exact4, uimm_exact8, uimm_exact16>; 4074defm ST3LN_WB : ST_Lane_WB_BHSD<0b0, 0b1, "VTriple", "st3", uimm_exact3, 4075 uimm_exact6, uimm_exact12, uimm_exact24>; 4076defm ST4LN_WB : ST_Lane_WB_BHSD<0b1, 0b1, "VQuad", "st4", uimm_exact4, 4077 uimm_exact8, uimm_exact16, uimm_exact32>; 4078 4079// End of post-index load/store single N-element instructions 4080// (class SIMD lsone-post) 4081 4082// Neon Scalar instructions implementation 4083// Scalar Three Same 4084 4085class NeonI_Scalar3Same_size<bit u, bits<2> size, bits<5> opcode, string asmop, 4086 RegisterClass FPRC> 4087 : NeonI_Scalar3Same<u, size, opcode, 4088 (outs FPRC:$Rd), (ins FPRC:$Rn, FPRC:$Rm), 4089 !strconcat(asmop, "\t$Rd, $Rn, $Rm"), 4090 [], 4091 NoItinerary>; 4092 4093class NeonI_Scalar3Same_D_size<bit u, bits<5> opcode, string asmop> 4094 : NeonI_Scalar3Same_size<u, 0b11, opcode, asmop, FPR64>; 4095 4096multiclass NeonI_Scalar3Same_HS_sizes<bit u, bits<5> opcode, string asmop, 4097 bit Commutable = 0> { 4098 let isCommutable = Commutable in { 4099 def hhh : NeonI_Scalar3Same_size<u, 0b01, opcode, asmop, FPR16>; 4100 def sss : NeonI_Scalar3Same_size<u, 0b10, opcode, asmop, FPR32>; 4101 } 4102} 4103 4104multiclass NeonI_Scalar3Same_SD_sizes<bit u, bit size_high, bits<5> opcode, 4105 string asmop, bit Commutable = 0> { 4106 let isCommutable = Commutable in { 4107 def sss : NeonI_Scalar3Same_size<u, {size_high, 0b0}, opcode, asmop, FPR32>; 4108 def ddd : NeonI_Scalar3Same_size<u, {size_high, 0b1}, opcode, asmop, FPR64>; 4109 } 4110} 4111 4112multiclass NeonI_Scalar3Same_BHSD_sizes<bit u, bits<5> opcode, 4113 string asmop, bit Commutable = 0> { 4114 let isCommutable = Commutable in { 4115 def bbb : NeonI_Scalar3Same_size<u, 0b00, opcode, asmop, FPR8>; 4116 def hhh : NeonI_Scalar3Same_size<u, 0b01, opcode, asmop, FPR16>; 4117 def sss : NeonI_Scalar3Same_size<u, 0b10, opcode, asmop, FPR32>; 4118 def ddd : NeonI_Scalar3Same_size<u, 0b11, opcode, asmop, FPR64>; 4119 } 4120} 4121 4122multiclass Neon_Scalar3Same_D_size_patterns<SDPatternOperator opnode, 4123 Instruction INSTD> { 4124 def : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm))), 4125 (INSTD FPR64:$Rn, FPR64:$Rm)>; 4126} 4127 4128multiclass Neon_Scalar3Same_BHSD_size_patterns<SDPatternOperator opnode, 4129 Instruction INSTB, 4130 Instruction INSTH, 4131 Instruction INSTS, 4132 Instruction INSTD> 4133 : Neon_Scalar3Same_D_size_patterns<opnode, INSTD> { 4134 def: Pat<(v1i8 (opnode (v1i8 FPR8:$Rn), (v1i8 FPR8:$Rm))), 4135 (INSTB FPR8:$Rn, FPR8:$Rm)>; 4136 4137 def: Pat<(v1i16 (opnode (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))), 4138 (INSTH FPR16:$Rn, FPR16:$Rm)>; 4139 4140 def: Pat<(v1i32 (opnode (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))), 4141 (INSTS FPR32:$Rn, FPR32:$Rm)>; 4142} 4143 4144class Neon_Scalar3Same_cmp_D_size_patterns<SDPatternOperator opnode, 4145 Instruction INSTD> 4146 : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm))), 4147 (INSTD FPR64:$Rn, FPR64:$Rm)>; 4148 4149multiclass Neon_Scalar3Same_HS_size_patterns<SDPatternOperator opnode, 4150 Instruction INSTH, 4151 Instruction INSTS> { 4152 def : Pat<(v1i16 (opnode (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))), 4153 (INSTH FPR16:$Rn, FPR16:$Rm)>; 4154 def : Pat<(v1i32 (opnode (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))), 4155 (INSTS FPR32:$Rn, FPR32:$Rm)>; 4156} 4157 4158multiclass Neon_Scalar3Same_SD_size_patterns<SDPatternOperator opnode, 4159 Instruction INSTS, 4160 Instruction INSTD> { 4161 def : Pat<(v1f32 (opnode (v1f32 FPR32:$Rn), (v1f32 FPR32:$Rm))), 4162 (INSTS FPR32:$Rn, FPR32:$Rm)>; 4163 def : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), 4164 (INSTD FPR64:$Rn, FPR64:$Rm)>; 4165} 4166 4167multiclass Neon_Scalar3Same_cmp_SD_size_patterns<SDPatternOperator opnode, 4168 Instruction INSTS, 4169 Instruction INSTD> { 4170 def : Pat<(v1i32 (opnode (v1f32 FPR32:$Rn), (v1f32 FPR32:$Rm))), 4171 (INSTS FPR32:$Rn, FPR32:$Rm)>; 4172 def : Pat<(v1i64 (opnode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), 4173 (INSTD FPR64:$Rn, FPR64:$Rm)>; 4174} 4175 4176class Neon_Scalar3Same_cmp_V1_D_size_patterns<CondCode CC, 4177 Instruction INSTD> 4178 : Pat<(v1i64 (Neon_cmp (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm), CC)), 4179 (INSTD FPR64:$Rn, FPR64:$Rm)>; 4180 4181// Scalar Three Different 4182 4183class NeonI_Scalar3Diff_size<bit u, bits<2> size, bits<4> opcode, string asmop, 4184 RegisterClass FPRCD, RegisterClass FPRCS> 4185 : NeonI_Scalar3Diff<u, size, opcode, 4186 (outs FPRCD:$Rd), (ins FPRCS:$Rn, FPRCS:$Rm), 4187 !strconcat(asmop, "\t$Rd, $Rn, $Rm"), 4188 [], 4189 NoItinerary>; 4190 4191multiclass NeonI_Scalar3Diff_HS_size<bit u, bits<4> opcode, string asmop> { 4192 def shh : NeonI_Scalar3Diff_size<u, 0b01, opcode, asmop, FPR32, FPR16>; 4193 def dss : NeonI_Scalar3Diff_size<u, 0b10, opcode, asmop, FPR64, FPR32>; 4194} 4195 4196multiclass NeonI_Scalar3Diff_ml_HS_size<bit u, bits<4> opcode, string asmop> { 4197 let Constraints = "$Src = $Rd" in { 4198 def shh : NeonI_Scalar3Diff<u, 0b01, opcode, 4199 (outs FPR32:$Rd), (ins FPR32:$Src, FPR16:$Rn, FPR16:$Rm), 4200 !strconcat(asmop, "\t$Rd, $Rn, $Rm"), 4201 [], 4202 NoItinerary>; 4203 def dss : NeonI_Scalar3Diff<u, 0b10, opcode, 4204 (outs FPR64:$Rd), (ins FPR64:$Src, FPR32:$Rn, FPR32:$Rm), 4205 !strconcat(asmop, "\t$Rd, $Rn, $Rm"), 4206 [], 4207 NoItinerary>; 4208 } 4209} 4210 4211multiclass Neon_Scalar3Diff_HS_size_patterns<SDPatternOperator opnode, 4212 Instruction INSTH, 4213 Instruction INSTS> { 4214 def : Pat<(v1i32 (opnode (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))), 4215 (INSTH FPR16:$Rn, FPR16:$Rm)>; 4216 def : Pat<(v1i64 (opnode (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))), 4217 (INSTS FPR32:$Rn, FPR32:$Rm)>; 4218} 4219 4220multiclass Neon_Scalar3Diff_ml_HS_size_patterns<SDPatternOperator opnode, 4221 Instruction INSTH, 4222 Instruction INSTS> { 4223 def : Pat<(v1i32 (opnode (v1i32 FPR32:$Src), (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))), 4224 (INSTH FPR32:$Src, FPR16:$Rn, FPR16:$Rm)>; 4225 def : Pat<(v1i64 (opnode (v1i64 FPR64:$Src), (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))), 4226 (INSTS FPR64:$Src, FPR32:$Rn, FPR32:$Rm)>; 4227} 4228 4229// Scalar Two Registers Miscellaneous 4230 4231class NeonI_Scalar2SameMisc_size<bit u, bits<2> size, bits<5> opcode, string asmop, 4232 RegisterClass FPRCD, RegisterClass FPRCS> 4233 : NeonI_Scalar2SameMisc<u, size, opcode, 4234 (outs FPRCD:$Rd), (ins FPRCS:$Rn), 4235 !strconcat(asmop, "\t$Rd, $Rn"), 4236 [], 4237 NoItinerary>; 4238 4239multiclass NeonI_Scalar2SameMisc_SD_size<bit u, bit size_high, bits<5> opcode, 4240 string asmop> { 4241 def ss : NeonI_Scalar2SameMisc_size<u, {size_high, 0b0}, opcode, asmop, FPR32, 4242 FPR32>; 4243 def dd : NeonI_Scalar2SameMisc_size<u, {size_high, 0b1}, opcode, asmop, FPR64, 4244 FPR64>; 4245} 4246 4247multiclass NeonI_Scalar2SameMisc_D_size<bit u, bits<5> opcode, string asmop> { 4248 def dd : NeonI_Scalar2SameMisc_size<u, 0b11, opcode, asmop, FPR64, FPR64>; 4249} 4250 4251multiclass NeonI_Scalar2SameMisc_BHSD_size<bit u, bits<5> opcode, string asmop> 4252 : NeonI_Scalar2SameMisc_D_size<u, opcode, asmop> { 4253 def bb : NeonI_Scalar2SameMisc_size<u, 0b00, opcode, asmop, FPR8, FPR8>; 4254 def hh : NeonI_Scalar2SameMisc_size<u, 0b01, opcode, asmop, FPR16, FPR16>; 4255 def ss : NeonI_Scalar2SameMisc_size<u, 0b10, opcode, asmop, FPR32, FPR32>; 4256} 4257 4258class NeonI_Scalar2SameMisc_fcvtxn_D_size<bit u, bits<5> opcode, string asmop> 4259 : NeonI_Scalar2SameMisc_size<u, 0b01, opcode, asmop, FPR32, FPR64>; 4260 4261multiclass NeonI_Scalar2SameMisc_narrow_HSD_size<bit u, bits<5> opcode, 4262 string asmop> { 4263 def bh : NeonI_Scalar2SameMisc_size<u, 0b00, opcode, asmop, FPR8, FPR16>; 4264 def hs : NeonI_Scalar2SameMisc_size<u, 0b01, opcode, asmop, FPR16, FPR32>; 4265 def sd : NeonI_Scalar2SameMisc_size<u, 0b10, opcode, asmop, FPR32, FPR64>; 4266} 4267 4268class NeonI_Scalar2SameMisc_accum_size<bit u, bits<2> size, bits<5> opcode, 4269 string asmop, RegisterClass FPRC> 4270 : NeonI_Scalar2SameMisc<u, size, opcode, 4271 (outs FPRC:$Rd), (ins FPRC:$Src, FPRC:$Rn), 4272 !strconcat(asmop, "\t$Rd, $Rn"), 4273 [], 4274 NoItinerary>; 4275 4276multiclass NeonI_Scalar2SameMisc_accum_BHSD_size<bit u, bits<5> opcode, 4277 string asmop> { 4278 4279 let Constraints = "$Src = $Rd" in { 4280 def bb : NeonI_Scalar2SameMisc_accum_size<u, 0b00, opcode, asmop, FPR8>; 4281 def hh : NeonI_Scalar2SameMisc_accum_size<u, 0b01, opcode, asmop, FPR16>; 4282 def ss : NeonI_Scalar2SameMisc_accum_size<u, 0b10, opcode, asmop, FPR32>; 4283 def dd : NeonI_Scalar2SameMisc_accum_size<u, 0b11, opcode, asmop, FPR64>; 4284 } 4285} 4286 4287class Neon_Scalar2SameMisc_fcvtxn_D_size_patterns<SDPatternOperator opnode, 4288 Instruction INSTD> 4289 : Pat<(v1f32 (opnode (v1f64 FPR64:$Rn))), 4290 (INSTD FPR64:$Rn)>; 4291 4292multiclass Neon_Scalar2SameMisc_fcvt_SD_size_patterns<SDPatternOperator opnode, 4293 Instruction INSTS, 4294 Instruction INSTD> { 4295 def : Pat<(v1i32 (opnode (v1f32 FPR32:$Rn))), 4296 (INSTS FPR32:$Rn)>; 4297 def : Pat<(v1i64 (opnode (v1f64 FPR64:$Rn))), 4298 (INSTD FPR64:$Rn)>; 4299} 4300 4301multiclass Neon_Scalar2SameMisc_cvt_SD_size_patterns<SDPatternOperator Sopnode, 4302 SDPatternOperator Dopnode, 4303 Instruction INSTS, 4304 Instruction INSTD> { 4305 def : Pat<(f32 (Sopnode (v1i32 FPR32:$Rn))), 4306 (INSTS FPR32:$Rn)>; 4307 def : Pat<(f64 (Dopnode (v1i64 FPR64:$Rn))), 4308 (INSTD FPR64:$Rn)>; 4309} 4310 4311multiclass Neon_Scalar2SameMisc_SD_size_patterns<SDPatternOperator opnode, 4312 Instruction INSTS, 4313 Instruction INSTD> { 4314 def : Pat<(v1f32 (opnode (v1f32 FPR32:$Rn))), 4315 (INSTS FPR32:$Rn)>; 4316 def : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn))), 4317 (INSTD FPR64:$Rn)>; 4318} 4319 4320class NeonI_Scalar2SameMisc_cmpz_D_size<bit u, bits<5> opcode, string asmop> 4321 : NeonI_Scalar2SameMisc<u, 0b11, opcode, 4322 (outs FPR64:$Rd), (ins FPR64:$Rn, neon_uimm0:$Imm), 4323 !strconcat(asmop, "\t$Rd, $Rn, $Imm"), 4324 [], 4325 NoItinerary>; 4326 4327multiclass NeonI_Scalar2SameMisc_cmpz_SD_size<bit u, bits<5> opcode, 4328 string asmop> { 4329 def ssi : NeonI_Scalar2SameMisc<u, 0b10, opcode, 4330 (outs FPR32:$Rd), (ins FPR32:$Rn, fpz32:$FPImm), 4331 !strconcat(asmop, "\t$Rd, $Rn, $FPImm"), 4332 [], 4333 NoItinerary>; 4334 def ddi : NeonI_Scalar2SameMisc<u, 0b11, opcode, 4335 (outs FPR64:$Rd), (ins FPR64:$Rn, fpz32:$FPImm), 4336 !strconcat(asmop, "\t$Rd, $Rn, $FPImm"), 4337 [], 4338 NoItinerary>; 4339} 4340 4341class Neon_Scalar2SameMisc_cmpz_D_size_patterns<SDPatternOperator opnode, 4342 Instruction INSTD> 4343 : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), 4344 (v1i64 (bitconvert (v8i8 Neon_AllZero))))), 4345 (INSTD FPR64:$Rn, 0)>; 4346 4347class Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns<CondCode CC, 4348 Instruction INSTD> 4349 : Pat<(v1i64 (Neon_cmpz (v1i64 FPR64:$Rn), 4350 (i32 neon_uimm0:$Imm), CC)), 4351 (INSTD FPR64:$Rn, neon_uimm0:$Imm)>; 4352 4353multiclass Neon_Scalar2SameMisc_cmpz_SD_size_patterns<SDPatternOperator opnode, 4354 Instruction INSTS, 4355 Instruction INSTD> { 4356 def : Pat<(v1i32 (opnode (v1f32 FPR32:$Rn), 4357 (v1f32 (scalar_to_vector (f32 fpz32:$FPImm))))), 4358 (INSTS FPR32:$Rn, fpz32:$FPImm)>; 4359 def : Pat<(v1i64 (opnode (v1f64 FPR64:$Rn), 4360 (v1f32 (scalar_to_vector (f32 fpz32:$FPImm))))), 4361 (INSTD FPR64:$Rn, fpz32:$FPImm)>; 4362} 4363 4364multiclass Neon_Scalar2SameMisc_D_size_patterns<SDPatternOperator opnode, 4365 Instruction INSTD> { 4366 def : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn))), 4367 (INSTD FPR64:$Rn)>; 4368} 4369 4370multiclass Neon_Scalar2SameMisc_BHSD_size_patterns<SDPatternOperator opnode, 4371 Instruction INSTB, 4372 Instruction INSTH, 4373 Instruction INSTS, 4374 Instruction INSTD> 4375 : Neon_Scalar2SameMisc_D_size_patterns<opnode, INSTD> { 4376 def : Pat<(v1i8 (opnode (v1i8 FPR8:$Rn))), 4377 (INSTB FPR8:$Rn)>; 4378 def : Pat<(v1i16 (opnode (v1i16 FPR16:$Rn))), 4379 (INSTH FPR16:$Rn)>; 4380 def : Pat<(v1i32 (opnode (v1i32 FPR32:$Rn))), 4381 (INSTS FPR32:$Rn)>; 4382} 4383 4384multiclass Neon_Scalar2SameMisc_narrow_HSD_size_patterns< 4385 SDPatternOperator opnode, 4386 Instruction INSTH, 4387 Instruction INSTS, 4388 Instruction INSTD> { 4389 def : Pat<(v1i8 (opnode (v1i16 FPR16:$Rn))), 4390 (INSTH FPR16:$Rn)>; 4391 def : Pat<(v1i16 (opnode (v1i32 FPR32:$Rn))), 4392 (INSTS FPR32:$Rn)>; 4393 def : Pat<(v1i32 (opnode (v1i64 FPR64:$Rn))), 4394 (INSTD FPR64:$Rn)>; 4395 4396} 4397 4398multiclass Neon_Scalar2SameMisc_accum_BHSD_size_patterns< 4399 SDPatternOperator opnode, 4400 Instruction INSTB, 4401 Instruction INSTH, 4402 Instruction INSTS, 4403 Instruction INSTD> { 4404 def : Pat<(v1i8 (opnode (v1i8 FPR8:$Src), (v1i8 FPR8:$Rn))), 4405 (INSTB FPR8:$Src, FPR8:$Rn)>; 4406 def : Pat<(v1i16 (opnode (v1i16 FPR16:$Src), (v1i16 FPR16:$Rn))), 4407 (INSTH FPR16:$Src, FPR16:$Rn)>; 4408 def : Pat<(v1i32 (opnode (v1i32 FPR32:$Src), (v1i32 FPR32:$Rn))), 4409 (INSTS FPR32:$Src, FPR32:$Rn)>; 4410 def : Pat<(v1i64 (opnode (v1i64 FPR64:$Src), (v1i64 FPR64:$Rn))), 4411 (INSTD FPR64:$Src, FPR64:$Rn)>; 4412} 4413 4414// Scalar Shift By Immediate 4415 4416class NeonI_ScalarShiftImm_size<bit u, bits<5> opcode, string asmop, 4417 RegisterClass FPRC, Operand ImmTy> 4418 : NeonI_ScalarShiftImm<u, opcode, 4419 (outs FPRC:$Rd), (ins FPRC:$Rn, ImmTy:$Imm), 4420 !strconcat(asmop, "\t$Rd, $Rn, $Imm"), 4421 [], NoItinerary>; 4422 4423multiclass NeonI_ScalarShiftRightImm_D_size<bit u, bits<5> opcode, 4424 string asmop> { 4425 def ddi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR64, shr_imm64> { 4426 bits<6> Imm; 4427 let Inst{22} = 0b1; // immh:immb = 1xxxxxx 4428 let Inst{21-16} = Imm; 4429 } 4430} 4431 4432multiclass NeonI_ScalarShiftRightImm_BHSD_size<bit u, bits<5> opcode, 4433 string asmop> 4434 : NeonI_ScalarShiftRightImm_D_size<u, opcode, asmop> { 4435 def bbi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR8, shr_imm8> { 4436 bits<3> Imm; 4437 let Inst{22-19} = 0b0001; // immh:immb = 0001xxx 4438 let Inst{18-16} = Imm; 4439 } 4440 def hhi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR16, shr_imm16> { 4441 bits<4> Imm; 4442 let Inst{22-20} = 0b001; // immh:immb = 001xxxx 4443 let Inst{19-16} = Imm; 4444 } 4445 def ssi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR32, shr_imm32> { 4446 bits<5> Imm; 4447 let Inst{22-21} = 0b01; // immh:immb = 01xxxxx 4448 let Inst{20-16} = Imm; 4449 } 4450} 4451 4452multiclass NeonI_ScalarShiftLeftImm_D_size<bit u, bits<5> opcode, 4453 string asmop> { 4454 def ddi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR64, shl_imm64> { 4455 bits<6> Imm; 4456 let Inst{22} = 0b1; // immh:immb = 1xxxxxx 4457 let Inst{21-16} = Imm; 4458 } 4459} 4460 4461multiclass NeonI_ScalarShiftLeftImm_BHSD_size<bit u, bits<5> opcode, 4462 string asmop> 4463 : NeonI_ScalarShiftLeftImm_D_size<u, opcode, asmop> { 4464 def bbi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR8, shl_imm8> { 4465 bits<3> Imm; 4466 let Inst{22-19} = 0b0001; // immh:immb = 0001xxx 4467 let Inst{18-16} = Imm; 4468 } 4469 def hhi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR16, shl_imm16> { 4470 bits<4> Imm; 4471 let Inst{22-20} = 0b001; // immh:immb = 001xxxx 4472 let Inst{19-16} = Imm; 4473 } 4474 def ssi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR32, shl_imm32> { 4475 bits<5> Imm; 4476 let Inst{22-21} = 0b01; // immh:immb = 01xxxxx 4477 let Inst{20-16} = Imm; 4478 } 4479} 4480 4481class NeonI_ScalarShiftRightImm_accum_D_size<bit u, bits<5> opcode, string asmop> 4482 : NeonI_ScalarShiftImm<u, opcode, 4483 (outs FPR64:$Rd), 4484 (ins FPR64:$Src, FPR64:$Rn, shr_imm64:$Imm), 4485 !strconcat(asmop, "\t$Rd, $Rn, $Imm"), 4486 [], NoItinerary> { 4487 bits<6> Imm; 4488 let Inst{22} = 0b1; // immh:immb = 1xxxxxx 4489 let Inst{21-16} = Imm; 4490 let Constraints = "$Src = $Rd"; 4491} 4492 4493class NeonI_ScalarShiftLeftImm_accum_D_size<bit u, bits<5> opcode, string asmop> 4494 : NeonI_ScalarShiftImm<u, opcode, 4495 (outs FPR64:$Rd), 4496 (ins FPR64:$Src, FPR64:$Rn, shl_imm64:$Imm), 4497 !strconcat(asmop, "\t$Rd, $Rn, $Imm"), 4498 [], NoItinerary> { 4499 bits<6> Imm; 4500 let Inst{22} = 0b1; // immh:immb = 1xxxxxx 4501 let Inst{21-16} = Imm; 4502 let Constraints = "$Src = $Rd"; 4503} 4504 4505class NeonI_ScalarShiftImm_narrow_size<bit u, bits<5> opcode, string asmop, 4506 RegisterClass FPRCD, RegisterClass FPRCS, 4507 Operand ImmTy> 4508 : NeonI_ScalarShiftImm<u, opcode, 4509 (outs FPRCD:$Rd), (ins FPRCS:$Rn, ImmTy:$Imm), 4510 !strconcat(asmop, "\t$Rd, $Rn, $Imm"), 4511 [], NoItinerary>; 4512 4513multiclass NeonI_ScalarShiftImm_narrow_HSD_size<bit u, bits<5> opcode, 4514 string asmop> { 4515 def bhi : NeonI_ScalarShiftImm_narrow_size<u, opcode, asmop, FPR8, FPR16, 4516 shr_imm8> { 4517 bits<3> Imm; 4518 let Inst{22-19} = 0b0001; // immh:immb = 0001xxx 4519 let Inst{18-16} = Imm; 4520 } 4521 def hsi : NeonI_ScalarShiftImm_narrow_size<u, opcode, asmop, FPR16, FPR32, 4522 shr_imm16> { 4523 bits<4> Imm; 4524 let Inst{22-20} = 0b001; // immh:immb = 001xxxx 4525 let Inst{19-16} = Imm; 4526 } 4527 def sdi : NeonI_ScalarShiftImm_narrow_size<u, opcode, asmop, FPR32, FPR64, 4528 shr_imm32> { 4529 bits<5> Imm; 4530 let Inst{22-21} = 0b01; // immh:immb = 01xxxxx 4531 let Inst{20-16} = Imm; 4532 } 4533} 4534 4535multiclass NeonI_ScalarShiftImm_cvt_SD_size<bit u, bits<5> opcode, string asmop> { 4536 def ssi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR32, shr_imm32> { 4537 bits<5> Imm; 4538 let Inst{22-21} = 0b01; // immh:immb = 01xxxxx 4539 let Inst{20-16} = Imm; 4540 } 4541 def ddi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR64, shr_imm64> { 4542 bits<6> Imm; 4543 let Inst{22} = 0b1; // immh:immb = 1xxxxxx 4544 let Inst{21-16} = Imm; 4545 } 4546} 4547 4548multiclass Neon_ScalarShiftRImm_D_size_patterns<SDPatternOperator opnode, 4549 Instruction INSTD> { 4550 def ddi : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (i32 shr_imm64:$Imm))), 4551 (INSTD FPR64:$Rn, imm:$Imm)>; 4552} 4553 4554multiclass Neon_ScalarShiftLImm_D_size_patterns<SDPatternOperator opnode, 4555 Instruction INSTD> { 4556 def ddi : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (i32 shl_imm64:$Imm))), 4557 (INSTD FPR64:$Rn, imm:$Imm)>; 4558} 4559 4560class Neon_ScalarShiftImm_arm_D_size_patterns<SDPatternOperator opnode, 4561 Instruction INSTD> 4562 : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), 4563 (v1i64 (Neon_vdup (i32 shr_imm64:$Imm))))), 4564 (INSTD FPR64:$Rn, imm:$Imm)>; 4565 4566multiclass Neon_ScalarShiftLImm_BHSD_size_patterns<SDPatternOperator opnode, 4567 Instruction INSTB, 4568 Instruction INSTH, 4569 Instruction INSTS, 4570 Instruction INSTD> 4571 : Neon_ScalarShiftLImm_D_size_patterns<opnode, INSTD> { 4572 def bbi : Pat<(v1i8 (opnode (v1i8 FPR8:$Rn), (i32 shl_imm8:$Imm))), 4573 (INSTB FPR8:$Rn, imm:$Imm)>; 4574 def hhi : Pat<(v1i16 (opnode (v1i16 FPR16:$Rn), (i32 shl_imm16:$Imm))), 4575 (INSTH FPR16:$Rn, imm:$Imm)>; 4576 def ssi : Pat<(v1i32 (opnode (v1i32 FPR32:$Rn), (i32 shl_imm32:$Imm))), 4577 (INSTS FPR32:$Rn, imm:$Imm)>; 4578} 4579 4580class Neon_ScalarShiftLImm_accum_D_size_patterns<SDPatternOperator opnode, 4581 Instruction INSTD> 4582 : Pat<(v1i64 (opnode (v1i64 FPR64:$Src), (v1i64 FPR64:$Rn), 4583 (i32 shl_imm64:$Imm))), 4584 (INSTD FPR64:$Src, FPR64:$Rn, imm:$Imm)>; 4585 4586class Neon_ScalarShiftRImm_accum_D_size_patterns<SDPatternOperator opnode, 4587 Instruction INSTD> 4588 : Pat<(v1i64 (opnode (v1i64 FPR64:$Src), (v1i64 FPR64:$Rn), 4589 (i32 shr_imm64:$Imm))), 4590 (INSTD FPR64:$Src, FPR64:$Rn, imm:$Imm)>; 4591 4592multiclass Neon_ScalarShiftImm_narrow_HSD_size_patterns< 4593 SDPatternOperator opnode, 4594 Instruction INSTH, 4595 Instruction INSTS, 4596 Instruction INSTD> { 4597 def bhi : Pat<(v1i8 (opnode (v1i16 FPR16:$Rn), (i32 shr_imm16:$Imm))), 4598 (INSTH FPR16:$Rn, imm:$Imm)>; 4599 def hsi : Pat<(v1i16 (opnode (v1i32 FPR32:$Rn), (i32 shr_imm32:$Imm))), 4600 (INSTS FPR32:$Rn, imm:$Imm)>; 4601 def sdi : Pat<(v1i32 (opnode (v1i64 FPR64:$Rn), (i32 shr_imm64:$Imm))), 4602 (INSTD FPR64:$Rn, imm:$Imm)>; 4603} 4604 4605multiclass Neon_ScalarShiftImm_scvtf_SD_size_patterns<SDPatternOperator Sopnode, 4606 SDPatternOperator Dopnode, 4607 Instruction INSTS, 4608 Instruction INSTD> { 4609 def ssi : Pat<(f32 (Sopnode (v1i32 FPR32:$Rn), (i32 shr_imm32:$Imm))), 4610 (INSTS FPR32:$Rn, imm:$Imm)>; 4611 def ddi : Pat<(f64 (Dopnode (v1i64 FPR64:$Rn), (i32 shr_imm64:$Imm))), 4612 (INSTD FPR64:$Rn, imm:$Imm)>; 4613} 4614 4615multiclass Neon_ScalarShiftImm_fcvts_SD_size_patterns<SDPatternOperator Sopnode, 4616 SDPatternOperator Dopnode, 4617 Instruction INSTS, 4618 Instruction INSTD> { 4619 def ssi : Pat<(v1i32 (Sopnode (v1f32 FPR32:$Rn), (i32 shr_imm32:$Imm))), 4620 (INSTS FPR32:$Rn, imm:$Imm)>; 4621 def ddi : Pat<(v1i64 (Dopnode (v1f64 FPR64:$Rn), (i32 shr_imm64:$Imm))), 4622 (INSTD FPR64:$Rn, imm:$Imm)>; 4623} 4624 4625// Scalar Signed Shift Right (Immediate) 4626defm SSHR : NeonI_ScalarShiftRightImm_D_size<0b0, 0b00000, "sshr">; 4627defm : Neon_ScalarShiftRImm_D_size_patterns<int_aarch64_neon_vshrds_n, SSHRddi>; 4628// Pattern to match llvm.arm.* intrinsic. 4629def : Neon_ScalarShiftImm_arm_D_size_patterns<sra, SSHRddi>; 4630 4631// Scalar Unsigned Shift Right (Immediate) 4632defm USHR : NeonI_ScalarShiftRightImm_D_size<0b1, 0b00000, "ushr">; 4633defm : Neon_ScalarShiftRImm_D_size_patterns<int_aarch64_neon_vshrdu_n, USHRddi>; 4634// Pattern to match llvm.arm.* intrinsic. 4635def : Neon_ScalarShiftImm_arm_D_size_patterns<srl, USHRddi>; 4636 4637// Scalar Signed Rounding Shift Right (Immediate) 4638defm SRSHR : NeonI_ScalarShiftRightImm_D_size<0b0, 0b00100, "srshr">; 4639defm : Neon_ScalarShiftRImm_D_size_patterns<int_aarch64_neon_vsrshr, SRSHRddi>; 4640 4641// Scalar Unigned Rounding Shift Right (Immediate) 4642defm URSHR : NeonI_ScalarShiftRightImm_D_size<0b1, 0b00100, "urshr">; 4643defm : Neon_ScalarShiftRImm_D_size_patterns<int_aarch64_neon_vurshr, URSHRddi>; 4644 4645// Scalar Signed Shift Right and Accumulate (Immediate) 4646def SSRA : NeonI_ScalarShiftRightImm_accum_D_size<0b0, 0b00010, "ssra">; 4647def : Neon_ScalarShiftRImm_accum_D_size_patterns 4648 <int_aarch64_neon_vsrads_n, SSRA>; 4649 4650// Scalar Unsigned Shift Right and Accumulate (Immediate) 4651def USRA : NeonI_ScalarShiftRightImm_accum_D_size<0b1, 0b00010, "usra">; 4652def : Neon_ScalarShiftRImm_accum_D_size_patterns 4653 <int_aarch64_neon_vsradu_n, USRA>; 4654 4655// Scalar Signed Rounding Shift Right and Accumulate (Immediate) 4656def SRSRA : NeonI_ScalarShiftRightImm_accum_D_size<0b0, 0b00110, "srsra">; 4657def : Neon_ScalarShiftRImm_accum_D_size_patterns 4658 <int_aarch64_neon_vrsrads_n, SRSRA>; 4659 4660// Scalar Unsigned Rounding Shift Right and Accumulate (Immediate) 4661def URSRA : NeonI_ScalarShiftRightImm_accum_D_size<0b1, 0b00110, "ursra">; 4662def : Neon_ScalarShiftRImm_accum_D_size_patterns 4663 <int_aarch64_neon_vrsradu_n, URSRA>; 4664 4665// Scalar Shift Left (Immediate) 4666defm SHL : NeonI_ScalarShiftLeftImm_D_size<0b0, 0b01010, "shl">; 4667defm : Neon_ScalarShiftLImm_D_size_patterns<int_aarch64_neon_vshld_n, SHLddi>; 4668// Pattern to match llvm.arm.* intrinsic. 4669def : Neon_ScalarShiftImm_arm_D_size_patterns<shl, SHLddi>; 4670 4671// Signed Saturating Shift Left (Immediate) 4672defm SQSHL : NeonI_ScalarShiftLeftImm_BHSD_size<0b0, 0b01110, "sqshl">; 4673defm : Neon_ScalarShiftLImm_BHSD_size_patterns<int_aarch64_neon_vqshls_n, 4674 SQSHLbbi, SQSHLhhi, 4675 SQSHLssi, SQSHLddi>; 4676// Pattern to match llvm.arm.* intrinsic. 4677defm : Neon_ScalarShiftLImm_D_size_patterns<Neon_sqrshlImm, SQSHLddi>; 4678 4679// Unsigned Saturating Shift Left (Immediate) 4680defm UQSHL : NeonI_ScalarShiftLeftImm_BHSD_size<0b1, 0b01110, "uqshl">; 4681defm : Neon_ScalarShiftLImm_BHSD_size_patterns<int_aarch64_neon_vqshlu_n, 4682 UQSHLbbi, UQSHLhhi, 4683 UQSHLssi, UQSHLddi>; 4684// Pattern to match llvm.arm.* intrinsic. 4685defm : Neon_ScalarShiftLImm_D_size_patterns<Neon_uqrshlImm, UQSHLddi>; 4686 4687// Signed Saturating Shift Left Unsigned (Immediate) 4688defm SQSHLU : NeonI_ScalarShiftLeftImm_BHSD_size<0b1, 0b01100, "sqshlu">; 4689defm : Neon_ScalarShiftLImm_BHSD_size_patterns<int_aarch64_neon_vsqshlu, 4690 SQSHLUbbi, SQSHLUhhi, 4691 SQSHLUssi, SQSHLUddi>; 4692 4693// Shift Right And Insert (Immediate) 4694def SRI : NeonI_ScalarShiftRightImm_accum_D_size<0b1, 0b01000, "sri">; 4695def : Neon_ScalarShiftRImm_accum_D_size_patterns 4696 <int_aarch64_neon_vsri, SRI>; 4697 4698// Shift Left And Insert (Immediate) 4699def SLI : NeonI_ScalarShiftLeftImm_accum_D_size<0b1, 0b01010, "sli">; 4700def : Neon_ScalarShiftLImm_accum_D_size_patterns 4701 <int_aarch64_neon_vsli, SLI>; 4702 4703// Signed Saturating Shift Right Narrow (Immediate) 4704defm SQSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b0, 0b10010, "sqshrn">; 4705defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vsqshrn, 4706 SQSHRNbhi, SQSHRNhsi, 4707 SQSHRNsdi>; 4708 4709// Unsigned Saturating Shift Right Narrow (Immediate) 4710defm UQSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10010, "uqshrn">; 4711defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vuqshrn, 4712 UQSHRNbhi, UQSHRNhsi, 4713 UQSHRNsdi>; 4714 4715// Signed Saturating Rounded Shift Right Narrow (Immediate) 4716defm SQRSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b0, 0b10011, "sqrshrn">; 4717defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vsqrshrn, 4718 SQRSHRNbhi, SQRSHRNhsi, 4719 SQRSHRNsdi>; 4720 4721// Unsigned Saturating Rounded Shift Right Narrow (Immediate) 4722defm UQRSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10011, "uqrshrn">; 4723defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vuqrshrn, 4724 UQRSHRNbhi, UQRSHRNhsi, 4725 UQRSHRNsdi>; 4726 4727// Signed Saturating Shift Right Unsigned Narrow (Immediate) 4728defm SQSHRUN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10000, "sqshrun">; 4729defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vsqshrun, 4730 SQSHRUNbhi, SQSHRUNhsi, 4731 SQSHRUNsdi>; 4732 4733// Signed Saturating Rounded Shift Right Unsigned Narrow (Immediate) 4734defm SQRSHRUN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10001, "sqrshrun">; 4735defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vsqrshrun, 4736 SQRSHRUNbhi, SQRSHRUNhsi, 4737 SQRSHRUNsdi>; 4738 4739// Scalar Signed Fixed-point Convert To Floating-Point (Immediate) 4740defm SCVTF_N : NeonI_ScalarShiftImm_cvt_SD_size<0b0, 0b11100, "scvtf">; 4741defm : Neon_ScalarShiftImm_scvtf_SD_size_patterns<int_aarch64_neon_vcvtf32_n_s32, 4742 int_aarch64_neon_vcvtf64_n_s64, 4743 SCVTF_Nssi, SCVTF_Nddi>; 4744 4745// Scalar Unsigned Fixed-point Convert To Floating-Point (Immediate) 4746defm UCVTF_N : NeonI_ScalarShiftImm_cvt_SD_size<0b1, 0b11100, "ucvtf">; 4747defm : Neon_ScalarShiftImm_scvtf_SD_size_patterns<int_aarch64_neon_vcvtf32_n_u32, 4748 int_aarch64_neon_vcvtf64_n_u64, 4749 UCVTF_Nssi, UCVTF_Nddi>; 4750 4751// Scalar Floating-point Convert To Signed Fixed-point (Immediate) 4752defm FCVTZS_N : NeonI_ScalarShiftImm_cvt_SD_size<0b0, 0b11111, "fcvtzs">; 4753defm : Neon_ScalarShiftImm_fcvts_SD_size_patterns<int_aarch64_neon_vcvts_n_s32_f32, 4754 int_aarch64_neon_vcvtd_n_s64_f64, 4755 FCVTZS_Nssi, FCVTZS_Nddi>; 4756 4757// Scalar Floating-point Convert To Unsigned Fixed-point (Immediate) 4758defm FCVTZU_N : NeonI_ScalarShiftImm_cvt_SD_size<0b1, 0b11111, "fcvtzu">; 4759defm : Neon_ScalarShiftImm_fcvts_SD_size_patterns<int_aarch64_neon_vcvts_n_u32_f32, 4760 int_aarch64_neon_vcvtd_n_u64_f64, 4761 FCVTZU_Nssi, FCVTZU_Nddi>; 4762 4763// Patterns For Convert Instructions Between v1f64 and v1i64 4764class Neon_ScalarShiftImm_cvtf_v1f64_pattern<SDPatternOperator opnode, 4765 Instruction INST> 4766 : Pat<(v1f64 (opnode (v1i64 FPR64:$Rn), (i32 shr_imm64:$Imm))), 4767 (INST FPR64:$Rn, imm:$Imm)>; 4768 4769class Neon_ScalarShiftImm_fcvt_v1f64_pattern<SDPatternOperator opnode, 4770 Instruction INST> 4771 : Pat<(v1i64 (opnode (v1f64 FPR64:$Rn), (i32 shr_imm64:$Imm))), 4772 (INST FPR64:$Rn, imm:$Imm)>; 4773 4774def : Neon_ScalarShiftImm_cvtf_v1f64_pattern<int_arm_neon_vcvtfxs2fp, 4775 SCVTF_Nddi>; 4776 4777def : Neon_ScalarShiftImm_cvtf_v1f64_pattern<int_arm_neon_vcvtfxu2fp, 4778 UCVTF_Nddi>; 4779 4780def : Neon_ScalarShiftImm_fcvt_v1f64_pattern<int_arm_neon_vcvtfp2fxs, 4781 FCVTZS_Nddi>; 4782 4783def : Neon_ScalarShiftImm_fcvt_v1f64_pattern<int_arm_neon_vcvtfp2fxu, 4784 FCVTZU_Nddi>; 4785 4786// Scalar Integer Add 4787let isCommutable = 1 in { 4788def ADDddd : NeonI_Scalar3Same_D_size<0b0, 0b10000, "add">; 4789} 4790 4791// Scalar Integer Sub 4792def SUBddd : NeonI_Scalar3Same_D_size<0b1, 0b10000, "sub">; 4793 4794// Pattern for Scalar Integer Add and Sub with D register only 4795defm : Neon_Scalar3Same_D_size_patterns<add, ADDddd>; 4796defm : Neon_Scalar3Same_D_size_patterns<sub, SUBddd>; 4797 4798// Patterns to match llvm.aarch64.* intrinsic for Scalar Add, Sub 4799defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vaddds, ADDddd>; 4800defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vadddu, ADDddd>; 4801defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vsubds, SUBddd>; 4802defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vsubdu, SUBddd>; 4803 4804// Scalar Integer Saturating Add (Signed, Unsigned) 4805defm SQADD : NeonI_Scalar3Same_BHSD_sizes<0b0, 0b00001, "sqadd", 1>; 4806defm UQADD : NeonI_Scalar3Same_BHSD_sizes<0b1, 0b00001, "uqadd", 1>; 4807 4808// Scalar Integer Saturating Sub (Signed, Unsigned) 4809defm SQSUB : NeonI_Scalar3Same_BHSD_sizes<0b0, 0b00101, "sqsub", 0>; 4810defm UQSUB : NeonI_Scalar3Same_BHSD_sizes<0b1, 0b00101, "uqsub", 0>; 4811 4812 4813// Patterns to match llvm.aarch64.* intrinsic for 4814// Scalar Integer Saturating Add, Sub (Signed, Unsigned) 4815defm : Neon_Scalar3Same_BHSD_size_patterns<int_arm_neon_vqadds, SQADDbbb, 4816 SQADDhhh, SQADDsss, SQADDddd>; 4817defm : Neon_Scalar3Same_BHSD_size_patterns<int_arm_neon_vqaddu, UQADDbbb, 4818 UQADDhhh, UQADDsss, UQADDddd>; 4819defm : Neon_Scalar3Same_BHSD_size_patterns<int_arm_neon_vqsubs, SQSUBbbb, 4820 SQSUBhhh, SQSUBsss, SQSUBddd>; 4821defm : Neon_Scalar3Same_BHSD_size_patterns<int_arm_neon_vqsubu, UQSUBbbb, 4822 UQSUBhhh, UQSUBsss, UQSUBddd>; 4823 4824// Scalar Integer Saturating Doubling Multiply Half High 4825defm SQDMULH : NeonI_Scalar3Same_HS_sizes<0b0, 0b10110, "sqdmulh", 1>; 4826 4827// Scalar Integer Saturating Rounding Doubling Multiply Half High 4828defm SQRDMULH : NeonI_Scalar3Same_HS_sizes<0b1, 0b10110, "sqrdmulh", 1>; 4829 4830// Patterns to match llvm.arm.* intrinsic for 4831// Scalar Integer Saturating Doubling Multiply Half High and 4832// Scalar Integer Saturating Rounding Doubling Multiply Half High 4833defm : Neon_Scalar3Same_HS_size_patterns<int_arm_neon_vqdmulh, SQDMULHhhh, 4834 SQDMULHsss>; 4835defm : Neon_Scalar3Same_HS_size_patterns<int_arm_neon_vqrdmulh, SQRDMULHhhh, 4836 SQRDMULHsss>; 4837 4838// Scalar Floating-point Multiply Extended 4839defm FMULX : NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11011, "fmulx", 1>; 4840 4841// Scalar Floating-point Reciprocal Step 4842defm FRECPS : NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11111, "frecps", 0>; 4843 4844// Scalar Floating-point Reciprocal Square Root Step 4845defm FRSQRTS : NeonI_Scalar3Same_SD_sizes<0b0, 0b1, 0b11111, "frsqrts", 0>; 4846 4847// Patterns to match llvm.arm.* intrinsic for 4848// Scalar Floating-point Reciprocal Step and 4849// Scalar Floating-point Reciprocal Square Root Step 4850defm : Neon_Scalar3Same_SD_size_patterns<int_arm_neon_vrecps, FRECPSsss, 4851 FRECPSddd>; 4852defm : Neon_Scalar3Same_SD_size_patterns<int_arm_neon_vrsqrts, FRSQRTSsss, 4853 FRSQRTSddd>; 4854 4855def : Pat<(v1f64 (fsqrt (v1f64 FPR64:$Rn))), (FSQRTdd FPR64:$Rn)>; 4856 4857// Patterns to match llvm.aarch64.* intrinsic for 4858// Scalar Floating-point Multiply Extended, 4859multiclass Neon_Scalar3Same_MULX_SD_size_patterns<SDPatternOperator opnode, 4860 Instruction INSTS, 4861 Instruction INSTD> { 4862 def : Pat<(f32 (opnode (f32 FPR32:$Rn), (f32 FPR32:$Rm))), 4863 (INSTS FPR32:$Rn, FPR32:$Rm)>; 4864 def : Pat<(f64 (opnode (f64 FPR64:$Rn), (f64 FPR64:$Rm))), 4865 (INSTD FPR64:$Rn, FPR64:$Rm)>; 4866} 4867 4868defm : Neon_Scalar3Same_MULX_SD_size_patterns<int_aarch64_neon_vmulx, 4869 FMULXsss,FMULXddd>; 4870 4871// Scalar Integer Shift Left (Signed, Unsigned) 4872def SSHLddd : NeonI_Scalar3Same_D_size<0b0, 0b01000, "sshl">; 4873def USHLddd : NeonI_Scalar3Same_D_size<0b1, 0b01000, "ushl">; 4874 4875// Patterns to match llvm.arm.* intrinsic for 4876// Scalar Integer Shift Left (Signed, Unsigned) 4877defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vshifts, SSHLddd>; 4878defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vshiftu, USHLddd>; 4879 4880// Patterns to match llvm.aarch64.* intrinsic for 4881// Scalar Integer Shift Left (Signed, Unsigned) 4882defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vshlds, SSHLddd>; 4883defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vshldu, USHLddd>; 4884 4885// Scalar Integer Saturating Shift Left (Signed, Unsigned) 4886defm SQSHL: NeonI_Scalar3Same_BHSD_sizes<0b0, 0b01001, "sqshl", 0>; 4887defm UQSHL: NeonI_Scalar3Same_BHSD_sizes<0b1, 0b01001, "uqshl", 0>; 4888 4889// Patterns to match llvm.aarch64.* intrinsic for 4890// Scalar Integer Saturating Shift Letf (Signed, Unsigned) 4891defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqshls, SQSHLbbb, 4892 SQSHLhhh, SQSHLsss, SQSHLddd>; 4893defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqshlu, UQSHLbbb, 4894 UQSHLhhh, UQSHLsss, UQSHLddd>; 4895 4896// Patterns to match llvm.arm.* intrinsic for 4897// Scalar Integer Saturating Shift Letf (Signed, Unsigned) 4898defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqshifts, SQSHLddd>; 4899defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqshiftu, UQSHLddd>; 4900 4901// Scalar Integer Rounding Shift Left (Signed, Unsigned) 4902def SRSHLddd: NeonI_Scalar3Same_D_size<0b0, 0b01010, "srshl">; 4903def URSHLddd: NeonI_Scalar3Same_D_size<0b1, 0b01010, "urshl">; 4904 4905// Patterns to match llvm.aarch64.* intrinsic for 4906// Scalar Integer Rounding Shift Left (Signed, Unsigned) 4907defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vrshlds, SRSHLddd>; 4908defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vrshldu, URSHLddd>; 4909 4910// Patterns to match llvm.arm.* intrinsic for 4911// Scalar Integer Rounding Shift Left (Signed, Unsigned) 4912defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vrshifts, SRSHLddd>; 4913defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vrshiftu, URSHLddd>; 4914 4915// Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned) 4916defm SQRSHL: NeonI_Scalar3Same_BHSD_sizes<0b0, 0b01011, "sqrshl", 0>; 4917defm UQRSHL: NeonI_Scalar3Same_BHSD_sizes<0b1, 0b01011, "uqrshl", 0>; 4918 4919// Patterns to match llvm.aarch64.* intrinsic for 4920// Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned) 4921defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqrshls, SQRSHLbbb, 4922 SQRSHLhhh, SQRSHLsss, SQRSHLddd>; 4923defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqrshlu, UQRSHLbbb, 4924 UQRSHLhhh, UQRSHLsss, UQRSHLddd>; 4925 4926// Patterns to match llvm.arm.* intrinsic for 4927// Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned) 4928defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqrshifts, SQRSHLddd>; 4929defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqrshiftu, UQRSHLddd>; 4930 4931// Signed Saturating Doubling Multiply-Add Long 4932defm SQDMLAL : NeonI_Scalar3Diff_ml_HS_size<0b0, 0b1001, "sqdmlal">; 4933defm : Neon_Scalar3Diff_ml_HS_size_patterns<int_aarch64_neon_vqdmlal, 4934 SQDMLALshh, SQDMLALdss>; 4935 4936// Signed Saturating Doubling Multiply-Subtract Long 4937defm SQDMLSL : NeonI_Scalar3Diff_ml_HS_size<0b0, 0b1011, "sqdmlsl">; 4938defm : Neon_Scalar3Diff_ml_HS_size_patterns<int_aarch64_neon_vqdmlsl, 4939 SQDMLSLshh, SQDMLSLdss>; 4940 4941// Signed Saturating Doubling Multiply Long 4942defm SQDMULL : NeonI_Scalar3Diff_HS_size<0b0, 0b1101, "sqdmull">; 4943defm : Neon_Scalar3Diff_HS_size_patterns<int_arm_neon_vqdmull, 4944 SQDMULLshh, SQDMULLdss>; 4945 4946// Scalar Signed Integer Convert To Floating-point 4947defm SCVTF : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11101, "scvtf">; 4948defm : Neon_Scalar2SameMisc_cvt_SD_size_patterns<int_aarch64_neon_vcvtf32_s32, 4949 int_aarch64_neon_vcvtf64_s64, 4950 SCVTFss, SCVTFdd>; 4951 4952// Scalar Unsigned Integer Convert To Floating-point 4953defm UCVTF : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11101, "ucvtf">; 4954defm : Neon_Scalar2SameMisc_cvt_SD_size_patterns<int_aarch64_neon_vcvtf32_u32, 4955 int_aarch64_neon_vcvtf64_u64, 4956 UCVTFss, UCVTFdd>; 4957 4958// Scalar Floating-point Converts 4959def FCVTXN : NeonI_Scalar2SameMisc_fcvtxn_D_size<0b1, 0b10110, "fcvtxn">; 4960def : Neon_Scalar2SameMisc_fcvtxn_D_size_patterns<int_aarch64_neon_fcvtxn, 4961 FCVTXN>; 4962 4963defm FCVTNS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11010, "fcvtns">; 4964defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtns, 4965 FCVTNSss, FCVTNSdd>; 4966 4967defm FCVTNU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11010, "fcvtnu">; 4968defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtnu, 4969 FCVTNUss, FCVTNUdd>; 4970 4971defm FCVTMS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11011, "fcvtms">; 4972defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtms, 4973 FCVTMSss, FCVTMSdd>; 4974 4975defm FCVTMU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11011, "fcvtmu">; 4976defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtmu, 4977 FCVTMUss, FCVTMUdd>; 4978 4979defm FCVTAS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11100, "fcvtas">; 4980defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtas, 4981 FCVTASss, FCVTASdd>; 4982 4983defm FCVTAU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11100, "fcvtau">; 4984defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtau, 4985 FCVTAUss, FCVTAUdd>; 4986 4987defm FCVTPS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11010, "fcvtps">; 4988defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtps, 4989 FCVTPSss, FCVTPSdd>; 4990 4991defm FCVTPU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b1, 0b11010, "fcvtpu">; 4992defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtpu, 4993 FCVTPUss, FCVTPUdd>; 4994 4995defm FCVTZS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11011, "fcvtzs">; 4996defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtzs, 4997 FCVTZSss, FCVTZSdd>; 4998 4999defm FCVTZU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b1, 0b11011, "fcvtzu">; 5000defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtzu, 5001 FCVTZUss, FCVTZUdd>; 5002 5003// Patterns For Convert Instructions Between v1f64 and v1i64 5004class Neon_Scalar2SameMisc_cvtf_v1f64_pattern<SDPatternOperator opnode, 5005 Instruction INST> 5006 : Pat<(v1f64 (opnode (v1i64 FPR64:$Rn))), (INST FPR64:$Rn)>; 5007 5008class Neon_Scalar2SameMisc_fcvt_v1f64_pattern<SDPatternOperator opnode, 5009 Instruction INST> 5010 : Pat<(v1i64 (opnode (v1f64 FPR64:$Rn))), (INST FPR64:$Rn)>; 5011 5012def : Neon_Scalar2SameMisc_cvtf_v1f64_pattern<sint_to_fp, SCVTFdd>; 5013def : Neon_Scalar2SameMisc_cvtf_v1f64_pattern<uint_to_fp, UCVTFdd>; 5014 5015def : Neon_Scalar2SameMisc_fcvt_v1f64_pattern<fp_to_sint, FCVTZSdd>; 5016def : Neon_Scalar2SameMisc_fcvt_v1f64_pattern<fp_to_uint, FCVTZUdd>; 5017 5018// Scalar Floating-point Reciprocal Estimate 5019defm FRECPE : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11101, "frecpe">; 5020defm : Neon_Scalar2SameMisc_SD_size_patterns<int_arm_neon_vrecpe, 5021 FRECPEss, FRECPEdd>; 5022 5023// Scalar Floating-point Reciprocal Exponent 5024defm FRECPX : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11111, "frecpx">; 5025defm : Neon_Scalar2SameMisc_SD_size_patterns<int_aarch64_neon_vrecpx, 5026 FRECPXss, FRECPXdd>; 5027 5028// Scalar Floating-point Reciprocal Square Root Estimate 5029defm FRSQRTE: NeonI_Scalar2SameMisc_SD_size<0b1, 0b1, 0b11101, "frsqrte">; 5030defm : Neon_Scalar2SameMisc_SD_size_patterns<int_arm_neon_vrsqrte, 5031 FRSQRTEss, FRSQRTEdd>; 5032 5033// Scalar Floating-point Round 5034class Neon_ScalarFloatRound_pattern<SDPatternOperator opnode, Instruction INST> 5035 : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn))), (INST FPR64:$Rn)>; 5036 5037def : Neon_ScalarFloatRound_pattern<fceil, FRINTPdd>; 5038def : Neon_ScalarFloatRound_pattern<ffloor, FRINTMdd>; 5039def : Neon_ScalarFloatRound_pattern<ftrunc, FRINTZdd>; 5040def : Neon_ScalarFloatRound_pattern<frint, FRINTXdd>; 5041def : Neon_ScalarFloatRound_pattern<fnearbyint, FRINTIdd>; 5042def : Neon_ScalarFloatRound_pattern<frnd, FRINTAdd>; 5043def : Neon_ScalarFloatRound_pattern<int_aarch64_neon_frintn, FRINTNdd>; 5044 5045// Scalar Integer Compare 5046 5047// Scalar Compare Bitwise Equal 5048def CMEQddd: NeonI_Scalar3Same_D_size<0b1, 0b10001, "cmeq">; 5049def : Neon_Scalar3Same_cmp_D_size_patterns<int_aarch64_neon_vceq, CMEQddd>; 5050 5051class Neon_Scalar3Same_cmp_D_size_v1_patterns<SDPatternOperator opnode, 5052 Instruction INSTD, 5053 CondCode CC> 5054 : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm), CC)), 5055 (INSTD FPR64:$Rn, FPR64:$Rm)>; 5056 5057def : Neon_Scalar3Same_cmp_D_size_v1_patterns<Neon_cmp, CMEQddd, SETEQ>; 5058 5059// Scalar Compare Signed Greather Than Or Equal 5060def CMGEddd: NeonI_Scalar3Same_D_size<0b0, 0b00111, "cmge">; 5061def : Neon_Scalar3Same_cmp_D_size_patterns<int_aarch64_neon_vcge, CMGEddd>; 5062def : Neon_Scalar3Same_cmp_D_size_v1_patterns<Neon_cmp, CMGEddd, SETGE>; 5063 5064// Scalar Compare Unsigned Higher Or Same 5065def CMHSddd: NeonI_Scalar3Same_D_size<0b1, 0b00111, "cmhs">; 5066def : Neon_Scalar3Same_cmp_D_size_patterns<int_aarch64_neon_vchs, CMHSddd>; 5067def : Neon_Scalar3Same_cmp_D_size_v1_patterns<Neon_cmp, CMHSddd, SETUGE>; 5068 5069// Scalar Compare Unsigned Higher 5070def CMHIddd: NeonI_Scalar3Same_D_size<0b1, 0b00110, "cmhi">; 5071def : Neon_Scalar3Same_cmp_D_size_patterns<int_aarch64_neon_vchi, CMHIddd>; 5072def : Neon_Scalar3Same_cmp_D_size_v1_patterns<Neon_cmp, CMHIddd, SETUGT>; 5073 5074// Scalar Compare Signed Greater Than 5075def CMGTddd: NeonI_Scalar3Same_D_size<0b0, 0b00110, "cmgt">; 5076def : Neon_Scalar3Same_cmp_D_size_patterns<int_aarch64_neon_vcgt, CMGTddd>; 5077def : Neon_Scalar3Same_cmp_D_size_v1_patterns<Neon_cmp, CMGTddd, SETGT>; 5078 5079// Scalar Compare Bitwise Test Bits 5080def CMTSTddd: NeonI_Scalar3Same_D_size<0b0, 0b10001, "cmtst">; 5081def : Neon_Scalar3Same_cmp_D_size_patterns<int_aarch64_neon_vtstd, CMTSTddd>; 5082def : Neon_Scalar3Same_cmp_D_size_patterns<Neon_tst, CMTSTddd>; 5083 5084// Scalar Compare Bitwise Equal To Zero 5085def CMEQddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b0, 0b01001, "cmeq">; 5086def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vceq, 5087 CMEQddi>; 5088def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns<SETEQ, CMEQddi>; 5089 5090// Scalar Compare Signed Greather Than Or Equal To Zero 5091def CMGEddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b1, 0b01000, "cmge">; 5092def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vcge, 5093 CMGEddi>; 5094def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns<SETGE, CMGEddi>; 5095 5096// Scalar Compare Signed Greater Than Zero 5097def CMGTddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b0, 0b01000, "cmgt">; 5098def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vcgt, 5099 CMGTddi>; 5100def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns<SETGT, CMGTddi>; 5101 5102// Scalar Compare Signed Less Than Or Equal To Zero 5103def CMLEddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b1, 0b01001, "cmle">; 5104def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vclez, 5105 CMLEddi>; 5106def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns<SETLE, CMLEddi>; 5107 5108// Scalar Compare Less Than Zero 5109def CMLTddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b0, 0b01010, "cmlt">; 5110def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vcltz, 5111 CMLTddi>; 5112def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns<SETLT, CMLTddi>; 5113 5114// Scalar Floating-point Compare 5115 5116// Scalar Floating-point Compare Mask Equal 5117defm FCMEQ: NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11100, "fcmeq">; 5118defm : Neon_Scalar3Same_cmp_SD_size_patterns<int_aarch64_neon_vceq, 5119 FCMEQsss, FCMEQddd>; 5120def : Neon_Scalar3Same_cmp_V1_D_size_patterns<SETEQ, FCMEQddd>; 5121 5122// Scalar Floating-point Compare Mask Equal To Zero 5123defm FCMEQZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b0, 0b01101, "fcmeq">; 5124defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_vceq, 5125 FCMEQZssi, FCMEQZddi>; 5126def : Pat<(v1i64 (Neon_cmpz (v1f64 FPR64:$Rn), (f32 fpz32:$FPImm), SETEQ)), 5127 (FCMEQZddi FPR64:$Rn, fpz32:$FPImm)>; 5128 5129// Scalar Floating-point Compare Mask Greater Than Or Equal 5130defm FCMGE: NeonI_Scalar3Same_SD_sizes<0b1, 0b0, 0b11100, "fcmge">; 5131defm : Neon_Scalar3Same_cmp_SD_size_patterns<int_aarch64_neon_vcge, 5132 FCMGEsss, FCMGEddd>; 5133def : Neon_Scalar3Same_cmp_V1_D_size_patterns<SETGE, FCMGEddd>; 5134 5135// Scalar Floating-point Compare Mask Greater Than Or Equal To Zero 5136defm FCMGEZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b1, 0b01100, "fcmge">; 5137defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_vcge, 5138 FCMGEZssi, FCMGEZddi>; 5139 5140// Scalar Floating-point Compare Mask Greather Than 5141defm FCMGT: NeonI_Scalar3Same_SD_sizes<0b1, 0b1, 0b11100, "fcmgt">; 5142defm : Neon_Scalar3Same_cmp_SD_size_patterns<int_aarch64_neon_vcgt, 5143 FCMGTsss, FCMGTddd>; 5144def : Neon_Scalar3Same_cmp_V1_D_size_patterns<SETGT, FCMGTddd>; 5145 5146// Scalar Floating-point Compare Mask Greather Than Zero 5147defm FCMGTZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b0, 0b01100, "fcmgt">; 5148defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_vcgt, 5149 FCMGTZssi, FCMGTZddi>; 5150 5151// Scalar Floating-point Compare Mask Less Than Or Equal To Zero 5152defm FCMLEZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b1, 0b01101, "fcmle">; 5153defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_vclez, 5154 FCMLEZssi, FCMLEZddi>; 5155 5156// Scalar Floating-point Compare Mask Less Than Zero 5157defm FCMLTZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b0, 0b01110, "fcmlt">; 5158defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_vcltz, 5159 FCMLTZssi, FCMLTZddi>; 5160 5161// Scalar Floating-point Absolute Compare Mask Greater Than Or Equal 5162defm FACGE: NeonI_Scalar3Same_SD_sizes<0b1, 0b0, 0b11101, "facge">; 5163defm : Neon_Scalar3Same_cmp_SD_size_patterns<int_aarch64_neon_vcage, 5164 FACGEsss, FACGEddd>; 5165 5166// Scalar Floating-point Absolute Compare Mask Greater Than 5167defm FACGT: NeonI_Scalar3Same_SD_sizes<0b1, 0b1, 0b11101, "facgt">; 5168defm : Neon_Scalar3Same_cmp_SD_size_patterns<int_aarch64_neon_vcagt, 5169 FACGTsss, FACGTddd>; 5170 5171// Scakar Floating-point Absolute Difference 5172defm FABD: NeonI_Scalar3Same_SD_sizes<0b1, 0b1, 0b11010, "fabd">; 5173defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_vabd, 5174 FABDsss, FABDddd>; 5175 5176// Scalar Absolute Value 5177defm ABS : NeonI_Scalar2SameMisc_D_size<0b0, 0b01011, "abs">; 5178defm : Neon_Scalar2SameMisc_D_size_patterns<int_aarch64_neon_vabs, ABSdd>; 5179 5180// Scalar Signed Saturating Absolute Value 5181defm SQABS : NeonI_Scalar2SameMisc_BHSD_size<0b0, 0b00111, "sqabs">; 5182defm : Neon_Scalar2SameMisc_BHSD_size_patterns<int_arm_neon_vqabs, 5183 SQABSbb, SQABShh, SQABSss, SQABSdd>; 5184 5185// Scalar Negate 5186defm NEG : NeonI_Scalar2SameMisc_D_size<0b1, 0b01011, "neg">; 5187defm : Neon_Scalar2SameMisc_D_size_patterns<int_aarch64_neon_vneg, NEGdd>; 5188 5189// Scalar Signed Saturating Negate 5190defm SQNEG : NeonI_Scalar2SameMisc_BHSD_size<0b1, 0b00111, "sqneg">; 5191defm : Neon_Scalar2SameMisc_BHSD_size_patterns<int_arm_neon_vqneg, 5192 SQNEGbb, SQNEGhh, SQNEGss, SQNEGdd>; 5193 5194// Scalar Signed Saturating Accumulated of Unsigned Value 5195defm SUQADD : NeonI_Scalar2SameMisc_accum_BHSD_size<0b0, 0b00011, "suqadd">; 5196defm : Neon_Scalar2SameMisc_accum_BHSD_size_patterns<int_aarch64_neon_vuqadd, 5197 SUQADDbb, SUQADDhh, 5198 SUQADDss, SUQADDdd>; 5199 5200// Scalar Unsigned Saturating Accumulated of Signed Value 5201defm USQADD : NeonI_Scalar2SameMisc_accum_BHSD_size<0b1, 0b00011, "usqadd">; 5202defm : Neon_Scalar2SameMisc_accum_BHSD_size_patterns<int_aarch64_neon_vsqadd, 5203 USQADDbb, USQADDhh, 5204 USQADDss, USQADDdd>; 5205 5206def : Pat<(v1i64 (int_aarch64_neon_suqadd (v1i64 FPR64:$Src), 5207 (v1i64 FPR64:$Rn))), 5208 (SUQADDdd FPR64:$Src, FPR64:$Rn)>; 5209 5210def : Pat<(v1i64 (int_aarch64_neon_usqadd (v1i64 FPR64:$Src), 5211 (v1i64 FPR64:$Rn))), 5212 (USQADDdd FPR64:$Src, FPR64:$Rn)>; 5213 5214def : Pat<(v1i64 (int_arm_neon_vabs (v1i64 FPR64:$Rn))), 5215 (ABSdd FPR64:$Rn)>; 5216 5217def : Pat<(v1i64 (int_arm_neon_vqabs (v1i64 FPR64:$Rn))), 5218 (SQABSdd FPR64:$Rn)>; 5219 5220def : Pat<(v1i64 (int_arm_neon_vqneg (v1i64 FPR64:$Rn))), 5221 (SQNEGdd FPR64:$Rn)>; 5222 5223def : Pat<(v1i64 (sub (v1i64 (bitconvert (v8i8 Neon_AllZero))), 5224 (v1i64 FPR64:$Rn))), 5225 (NEGdd FPR64:$Rn)>; 5226 5227// Scalar Signed Saturating Extract Unsigned Narrow 5228defm SQXTUN : NeonI_Scalar2SameMisc_narrow_HSD_size<0b1, 0b10010, "sqxtun">; 5229defm : Neon_Scalar2SameMisc_narrow_HSD_size_patterns<int_arm_neon_vqmovnsu, 5230 SQXTUNbh, SQXTUNhs, 5231 SQXTUNsd>; 5232 5233// Scalar Signed Saturating Extract Narrow 5234defm SQXTN : NeonI_Scalar2SameMisc_narrow_HSD_size<0b0, 0b10100, "sqxtn">; 5235defm : Neon_Scalar2SameMisc_narrow_HSD_size_patterns<int_arm_neon_vqmovns, 5236 SQXTNbh, SQXTNhs, 5237 SQXTNsd>; 5238 5239// Scalar Unsigned Saturating Extract Narrow 5240defm UQXTN : NeonI_Scalar2SameMisc_narrow_HSD_size<0b1, 0b10100, "uqxtn">; 5241defm : Neon_Scalar2SameMisc_narrow_HSD_size_patterns<int_arm_neon_vqmovnu, 5242 UQXTNbh, UQXTNhs, 5243 UQXTNsd>; 5244 5245// Scalar Reduce Pairwise 5246 5247multiclass NeonI_ScalarPair_D_sizes<bit u, bit size, bits<5> opcode, 5248 string asmop, bit Commutable = 0> { 5249 let isCommutable = Commutable in { 5250 def _D_2D : NeonI_ScalarPair<u, {size, 0b1}, opcode, 5251 (outs FPR64:$Rd), (ins VPR128:$Rn), 5252 !strconcat(asmop, "\t$Rd, $Rn.2d"), 5253 [], 5254 NoItinerary>; 5255 } 5256} 5257 5258multiclass NeonI_ScalarPair_SD_sizes<bit u, bit size, bits<5> opcode, 5259 string asmop, bit Commutable = 0> 5260 : NeonI_ScalarPair_D_sizes<u, size, opcode, asmop, Commutable> { 5261 let isCommutable = Commutable in { 5262 def _S_2S : NeonI_ScalarPair<u, {size, 0b0}, opcode, 5263 (outs FPR32:$Rd), (ins VPR64:$Rn), 5264 !strconcat(asmop, "\t$Rd, $Rn.2s"), 5265 [], 5266 NoItinerary>; 5267 } 5268} 5269 5270// Scalar Reduce Addition Pairwise (Integer) with 5271// Pattern to match llvm.arm.* intrinsic 5272defm ADDPvv : NeonI_ScalarPair_D_sizes<0b0, 0b1, 0b11011, "addp", 0>; 5273 5274// Pattern to match llvm.aarch64.* intrinsic for 5275// Scalar Reduce Addition Pairwise (Integer) 5276def : Pat<(v1i64 (int_aarch64_neon_vpadd (v2i64 VPR128:$Rn))), 5277 (ADDPvv_D_2D VPR128:$Rn)>; 5278def : Pat<(v1i64 (int_aarch64_neon_vaddv (v2i64 VPR128:$Rn))), 5279 (ADDPvv_D_2D VPR128:$Rn)>; 5280 5281// Scalar Reduce Addition Pairwise (Floating Point) 5282defm FADDPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01101, "faddp", 0>; 5283 5284// Scalar Reduce Maximum Pairwise (Floating Point) 5285defm FMAXPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01111, "fmaxp", 0>; 5286 5287// Scalar Reduce Minimum Pairwise (Floating Point) 5288defm FMINPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b1, 0b01111, "fminp", 0>; 5289 5290// Scalar Reduce maxNum Pairwise (Floating Point) 5291defm FMAXNMPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01100, "fmaxnmp", 0>; 5292 5293// Scalar Reduce minNum Pairwise (Floating Point) 5294defm FMINNMPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b1, 0b01100, "fminnmp", 0>; 5295 5296multiclass Neon_ScalarPair_SD_size_patterns<SDPatternOperator opnodeS, 5297 SDPatternOperator opnodeD, 5298 Instruction INSTS, 5299 Instruction INSTD> { 5300 def : Pat<(v1f32 (opnodeS (v2f32 VPR64:$Rn))), 5301 (INSTS VPR64:$Rn)>; 5302 def : Pat<(v1f64 (opnodeD (v2f64 VPR128:$Rn))), 5303 (INSTD VPR128:$Rn)>; 5304} 5305 5306// Patterns to match llvm.aarch64.* intrinsic for 5307// Scalar Reduce Add, Max, Min, MaxiNum, MinNum Pairwise (Floating Point) 5308defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfadd, 5309 int_aarch64_neon_vpfaddq, FADDPvv_S_2S, FADDPvv_D_2D>; 5310 5311defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpmax, 5312 int_aarch64_neon_vpmaxq, FMAXPvv_S_2S, FMAXPvv_D_2D>; 5313 5314defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpmin, 5315 int_aarch64_neon_vpminq, FMINPvv_S_2S, FMINPvv_D_2D>; 5316 5317defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfmaxnm, 5318 int_aarch64_neon_vpfmaxnmq, FMAXNMPvv_S_2S, FMAXNMPvv_D_2D>; 5319 5320defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfminnm, 5321 int_aarch64_neon_vpfminnmq, FMINNMPvv_S_2S, FMINNMPvv_D_2D>; 5322 5323defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vaddv, 5324 int_aarch64_neon_vaddv, FADDPvv_S_2S, FADDPvv_D_2D>; 5325 5326def : Pat<(v1f32 (int_aarch64_neon_vaddv (v4f32 VPR128:$Rn))), 5327 (FADDPvv_S_2S (v2f32 5328 (EXTRACT_SUBREG 5329 (v4f32 (FADDP_4S (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rn))), 5330 sub_64)))>; 5331 5332defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vmaxv, 5333 int_aarch64_neon_vmaxv, FMAXPvv_S_2S, FMAXPvv_D_2D>; 5334 5335defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vminv, 5336 int_aarch64_neon_vminv, FMINPvv_S_2S, FMINPvv_D_2D>; 5337 5338defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vmaxnmv, 5339 int_aarch64_neon_vmaxnmv, FMAXNMPvv_S_2S, FMAXNMPvv_D_2D>; 5340 5341defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vminnmv, 5342 int_aarch64_neon_vminnmv, FMINNMPvv_S_2S, FMINNMPvv_D_2D>; 5343 5344// Scalar by element Arithmetic 5345 5346class NeonI_ScalarXIndexedElemArith<string asmop, bits<4> opcode, 5347 string rmlane, bit u, bit szhi, bit szlo, 5348 RegisterClass ResFPR, RegisterClass OpFPR, 5349 RegisterOperand OpVPR, Operand OpImm> 5350 : NeonI_ScalarXIndexedElem<u, szhi, szlo, opcode, 5351 (outs ResFPR:$Rd), 5352 (ins OpFPR:$Rn, OpVPR:$MRm, OpImm:$Imm), 5353 asmop # "\t$Rd, $Rn, $MRm" # rmlane # "[$Imm]", 5354 [], 5355 NoItinerary> { 5356 bits<3> Imm; 5357 bits<5> MRm; 5358} 5359 5360class NeonI_ScalarXIndexedElemArith_Constraint_Impl<string asmop, bits<4> opcode, 5361 string rmlane, 5362 bit u, bit szhi, bit szlo, 5363 RegisterClass ResFPR, 5364 RegisterClass OpFPR, 5365 RegisterOperand OpVPR, 5366 Operand OpImm> 5367 : NeonI_ScalarXIndexedElem<u, szhi, szlo, opcode, 5368 (outs ResFPR:$Rd), 5369 (ins ResFPR:$src, OpFPR:$Rn, OpVPR:$MRm, OpImm:$Imm), 5370 asmop # "\t$Rd, $Rn, $MRm" # rmlane # "[$Imm]", 5371 [], 5372 NoItinerary> { 5373 let Constraints = "$src = $Rd"; 5374 bits<3> Imm; 5375 bits<5> MRm; 5376} 5377 5378// Scalar Floating Point multiply (scalar, by element) 5379def FMULssv_4S : NeonI_ScalarXIndexedElemArith<"fmul", 5380 0b1001, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> { 5381 let Inst{11} = Imm{1}; // h 5382 let Inst{21} = Imm{0}; // l 5383 let Inst{20-16} = MRm; 5384} 5385def FMULddv_2D : NeonI_ScalarXIndexedElemArith<"fmul", 5386 0b1001, ".d", 0b0, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> { 5387 let Inst{11} = Imm{0}; // h 5388 let Inst{21} = 0b0; // l 5389 let Inst{20-16} = MRm; 5390} 5391 5392// Scalar Floating Point multiply extended (scalar, by element) 5393def FMULXssv_4S : NeonI_ScalarXIndexedElemArith<"fmulx", 5394 0b1001, ".s", 0b1, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> { 5395 let Inst{11} = Imm{1}; // h 5396 let Inst{21} = Imm{0}; // l 5397 let Inst{20-16} = MRm; 5398} 5399def FMULXddv_2D : NeonI_ScalarXIndexedElemArith<"fmulx", 5400 0b1001, ".d", 0b1, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> { 5401 let Inst{11} = Imm{0}; // h 5402 let Inst{21} = 0b0; // l 5403 let Inst{20-16} = MRm; 5404} 5405 5406multiclass Neon_ScalarXIndexedElem_MUL_MULX_Patterns< 5407 SDPatternOperator opnode, 5408 Instruction INST, 5409 ValueType ResTy, RegisterClass FPRC, ValueType OpTy, Operand OpImm, 5410 ValueType OpNTy, ValueType ExTy, Operand OpNImm> { 5411 5412 def : Pat<(ResTy (opnode (ResTy FPRC:$Rn), 5413 (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm)))), 5414 (ResTy (INST (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>; 5415 5416 def : Pat<(ResTy (opnode (ResTy FPRC:$Rn), 5417 (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm)))), 5418 (ResTy (INST (ResTy FPRC:$Rn), 5419 (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)), 5420 OpNImm:$Imm))>; 5421 5422 // swapped operands 5423 def : Pat<(ResTy (opnode 5424 (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm)), 5425 (ResTy FPRC:$Rn))), 5426 (ResTy (INST (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>; 5427 5428 def : Pat<(ResTy (opnode 5429 (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm)), 5430 (ResTy FPRC:$Rn))), 5431 (ResTy (INST (ResTy FPRC:$Rn), 5432 (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)), 5433 OpNImm:$Imm))>; 5434} 5435 5436// Patterns for Scalar Floating Point multiply (scalar, by element) 5437defm : Neon_ScalarXIndexedElem_MUL_MULX_Patterns<fmul, FMULssv_4S, 5438 f32, FPR32, v4f32, neon_uimm2_bare, v2f32, v4f32, neon_uimm1_bare>; 5439defm : Neon_ScalarXIndexedElem_MUL_MULX_Patterns<fmul, FMULddv_2D, 5440 f64, FPR64, v2f64, neon_uimm1_bare, v1f64, v2f64, neon_uimm0_bare>; 5441 5442// Patterns for Scalar Floating Point multiply extended (scalar, by element) 5443defm : Neon_ScalarXIndexedElem_MUL_MULX_Patterns<int_aarch64_neon_vmulx, 5444 FMULXssv_4S, f32, FPR32, v4f32, neon_uimm2_bare, 5445 v2f32, v4f32, neon_uimm1_bare>; 5446defm : Neon_ScalarXIndexedElem_MUL_MULX_Patterns<int_aarch64_neon_vmulx, 5447 FMULXddv_2D, f64, FPR64, v2f64, neon_uimm1_bare, 5448 v1f64, v2f64, neon_uimm0_bare>; 5449 5450 5451// Scalar Floating Point fused multiply-add (scalar, by element) 5452def FMLAssv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmla", 5453 0b0001, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> { 5454 let Inst{11} = Imm{1}; // h 5455 let Inst{21} = Imm{0}; // l 5456 let Inst{20-16} = MRm; 5457} 5458def FMLAddv_2D : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmla", 5459 0b0001, ".d", 0b0, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> { 5460 let Inst{11} = Imm{0}; // h 5461 let Inst{21} = 0b0; // l 5462 let Inst{20-16} = MRm; 5463} 5464 5465// Scalar Floating Point fused multiply-subtract (scalar, by element) 5466def FMLSssv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmls", 5467 0b0101, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> { 5468 let Inst{11} = Imm{1}; // h 5469 let Inst{21} = Imm{0}; // l 5470 let Inst{20-16} = MRm; 5471} 5472def FMLSddv_2D : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmls", 5473 0b0101, ".d", 0b0, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> { 5474 let Inst{11} = Imm{0}; // h 5475 let Inst{21} = 0b0; // l 5476 let Inst{20-16} = MRm; 5477} 5478// We are allowed to match the fma instruction regardless of compile options. 5479multiclass Neon_ScalarXIndexedElem_FMA_Patterns< 5480 Instruction FMLAI, Instruction FMLSI, 5481 ValueType ResTy, RegisterClass FPRC, ValueType OpTy, Operand OpImm, 5482 ValueType OpNTy, ValueType ExTy, Operand OpNImm> { 5483 // fmla 5484 def : Pat<(ResTy (fma (ResTy FPRC:$Rn), 5485 (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm)), 5486 (ResTy FPRC:$Ra))), 5487 (ResTy (FMLAI (ResTy FPRC:$Ra), 5488 (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>; 5489 5490 def : Pat<(ResTy (fma (ResTy FPRC:$Rn), 5491 (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm)), 5492 (ResTy FPRC:$Ra))), 5493 (ResTy (FMLAI (ResTy FPRC:$Ra), 5494 (ResTy FPRC:$Rn), 5495 (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)), 5496 OpNImm:$Imm))>; 5497 5498 // swapped fmla operands 5499 def : Pat<(ResTy (fma 5500 (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm)), 5501 (ResTy FPRC:$Rn), 5502 (ResTy FPRC:$Ra))), 5503 (ResTy (FMLAI (ResTy FPRC:$Ra), 5504 (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>; 5505 5506 def : Pat<(ResTy (fma 5507 (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm)), 5508 (ResTy FPRC:$Rn), 5509 (ResTy FPRC:$Ra))), 5510 (ResTy (FMLAI (ResTy FPRC:$Ra), 5511 (ResTy FPRC:$Rn), 5512 (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)), 5513 OpNImm:$Imm))>; 5514 5515 // fmls 5516 def : Pat<(ResTy (fma (ResTy FPRC:$Rn), 5517 (fneg (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm))), 5518 (ResTy FPRC:$Ra))), 5519 (ResTy (FMLSI (ResTy FPRC:$Ra), 5520 (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>; 5521 5522 def : Pat<(ResTy (fma (ResTy FPRC:$Rn), 5523 (fneg (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm))), 5524 (ResTy FPRC:$Ra))), 5525 (ResTy (FMLSI (ResTy FPRC:$Ra), 5526 (ResTy FPRC:$Rn), 5527 (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)), 5528 OpNImm:$Imm))>; 5529 5530 // swapped fmls operands 5531 def : Pat<(ResTy (fma 5532 (fneg (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm))), 5533 (ResTy FPRC:$Rn), 5534 (ResTy FPRC:$Ra))), 5535 (ResTy (FMLSI (ResTy FPRC:$Ra), 5536 (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>; 5537 5538 def : Pat<(ResTy (fma 5539 (fneg (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm))), 5540 (ResTy FPRC:$Rn), 5541 (ResTy FPRC:$Ra))), 5542 (ResTy (FMLSI (ResTy FPRC:$Ra), 5543 (ResTy FPRC:$Rn), 5544 (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)), 5545 OpNImm:$Imm))>; 5546} 5547 5548// Scalar Floating Point fused multiply-add and 5549// multiply-subtract (scalar, by element) 5550defm : Neon_ScalarXIndexedElem_FMA_Patterns<FMLAssv_4S, FMLSssv_4S, 5551 f32, FPR32, v4f32, neon_uimm2_bare, v2f32, v4f32, neon_uimm1_bare>; 5552defm : Neon_ScalarXIndexedElem_FMA_Patterns<FMLAddv_2D, FMLSddv_2D, 5553 f64, FPR64, v2f64, neon_uimm1_bare, v1f64, v2f64, neon_uimm0_bare>; 5554defm : Neon_ScalarXIndexedElem_FMA_Patterns<FMLAddv_2D, FMLSddv_2D, 5555 f64, FPR64, v2f64, neon_uimm1_bare, v1f64, v2f64, neon_uimm0_bare>; 5556 5557// Scalar Signed saturating doubling multiply long (scalar, by element) 5558def SQDMULLshv_4H : NeonI_ScalarXIndexedElemArith<"sqdmull", 5559 0b1011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR64Lo, neon_uimm2_bare> { 5560 let Inst{11} = 0b0; // h 5561 let Inst{21} = Imm{1}; // l 5562 let Inst{20} = Imm{0}; // m 5563 let Inst{19-16} = MRm{3-0}; 5564} 5565def SQDMULLshv_8H : NeonI_ScalarXIndexedElemArith<"sqdmull", 5566 0b1011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR128Lo, neon_uimm3_bare> { 5567 let Inst{11} = Imm{2}; // h 5568 let Inst{21} = Imm{1}; // l 5569 let Inst{20} = Imm{0}; // m 5570 let Inst{19-16} = MRm{3-0}; 5571} 5572def SQDMULLdsv_2S : NeonI_ScalarXIndexedElemArith<"sqdmull", 5573 0b1011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR64, neon_uimm1_bare> { 5574 let Inst{11} = 0b0; // h 5575 let Inst{21} = Imm{0}; // l 5576 let Inst{20-16} = MRm; 5577} 5578def SQDMULLdsv_4S : NeonI_ScalarXIndexedElemArith<"sqdmull", 5579 0b1011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR128, neon_uimm2_bare> { 5580 let Inst{11} = Imm{1}; // h 5581 let Inst{21} = Imm{0}; // l 5582 let Inst{20-16} = MRm; 5583} 5584 5585multiclass Neon_ScalarXIndexedElem_MUL_Patterns< 5586 SDPatternOperator opnode, 5587 Instruction INST, 5588 ValueType ResTy, RegisterClass FPRC, 5589 ValueType OpVTy, ValueType OpTy, 5590 ValueType VecOpTy, ValueType ExTy, RegisterOperand VPRC, Operand OpImm> { 5591 5592 def : Pat<(ResTy (opnode (OpVTy FPRC:$Rn), 5593 (OpVTy (scalar_to_vector 5594 (ExTy (vector_extract (VecOpTy VPRC:$MRm), OpImm:$Imm)))))), 5595 (ResTy (INST (OpVTy FPRC:$Rn), (VecOpTy VPRC:$MRm), OpImm:$Imm))>; 5596 5597 //swapped operands 5598 def : Pat<(ResTy (opnode 5599 (OpVTy (scalar_to_vector 5600 (ExTy (vector_extract (VecOpTy VPRC:$MRm), OpImm:$Imm)))), 5601 (OpVTy FPRC:$Rn))), 5602 (ResTy (INST (OpVTy FPRC:$Rn), (VecOpTy VPRC:$MRm), OpImm:$Imm))>; 5603} 5604 5605 5606// Patterns for Scalar Signed saturating doubling 5607// multiply long (scalar, by element) 5608defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmull, 5609 SQDMULLshv_4H, v1i32, FPR16, v1i16, i16, v4i16, 5610 i32, VPR64Lo, neon_uimm2_bare>; 5611defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmull, 5612 SQDMULLshv_8H, v1i32, FPR16, v1i16, i16, v8i16, 5613 i32, VPR128Lo, neon_uimm3_bare>; 5614defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmull, 5615 SQDMULLdsv_2S, v1i64, FPR32, v1i32, i32, v2i32, 5616 i32, VPR64Lo, neon_uimm1_bare>; 5617defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmull, 5618 SQDMULLdsv_4S, v1i64, FPR32, v1i32, i32, v4i32, 5619 i32, VPR128Lo, neon_uimm2_bare>; 5620 5621// Scalar Signed saturating doubling multiply-add long (scalar, by element) 5622def SQDMLALshv_4H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal", 5623 0b0011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR64Lo, neon_uimm2_bare> { 5624 let Inst{11} = 0b0; // h 5625 let Inst{21} = Imm{1}; // l 5626 let Inst{20} = Imm{0}; // m 5627 let Inst{19-16} = MRm{3-0}; 5628} 5629def SQDMLALshv_8H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal", 5630 0b0011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR128Lo, neon_uimm3_bare> { 5631 let Inst{11} = Imm{2}; // h 5632 let Inst{21} = Imm{1}; // l 5633 let Inst{20} = Imm{0}; // m 5634 let Inst{19-16} = MRm{3-0}; 5635} 5636def SQDMLALdsv_2S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal", 5637 0b0011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR64, neon_uimm1_bare> { 5638 let Inst{11} = 0b0; // h 5639 let Inst{21} = Imm{0}; // l 5640 let Inst{20-16} = MRm; 5641} 5642def SQDMLALdsv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal", 5643 0b0011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR128, neon_uimm2_bare> { 5644 let Inst{11} = Imm{1}; // h 5645 let Inst{21} = Imm{0}; // l 5646 let Inst{20-16} = MRm; 5647} 5648 5649// Scalar Signed saturating doubling 5650// multiply-subtract long (scalar, by element) 5651def SQDMLSLshv_4H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl", 5652 0b0111, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR64Lo, neon_uimm2_bare> { 5653 let Inst{11} = 0b0; // h 5654 let Inst{21} = Imm{1}; // l 5655 let Inst{20} = Imm{0}; // m 5656 let Inst{19-16} = MRm{3-0}; 5657} 5658def SQDMLSLshv_8H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl", 5659 0b0111, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR128Lo, neon_uimm3_bare> { 5660 let Inst{11} = Imm{2}; // h 5661 let Inst{21} = Imm{1}; // l 5662 let Inst{20} = Imm{0}; // m 5663 let Inst{19-16} = MRm{3-0}; 5664} 5665def SQDMLSLdsv_2S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl", 5666 0b0111, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR64, neon_uimm1_bare> { 5667 let Inst{11} = 0b0; // h 5668 let Inst{21} = Imm{0}; // l 5669 let Inst{20-16} = MRm; 5670} 5671def SQDMLSLdsv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl", 5672 0b0111, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR128, neon_uimm2_bare> { 5673 let Inst{11} = Imm{1}; // h 5674 let Inst{21} = Imm{0}; // l 5675 let Inst{20-16} = MRm; 5676} 5677 5678multiclass Neon_ScalarXIndexedElem_MLAL_Patterns< 5679 SDPatternOperator opnode, 5680 SDPatternOperator coreopnode, 5681 Instruction INST, 5682 ValueType ResTy, RegisterClass ResFPRC, RegisterClass FPRC, 5683 ValueType OpTy, 5684 ValueType OpVTy, ValueType ExTy, RegisterOperand VPRC, Operand OpImm> { 5685 5686 def : Pat<(ResTy (opnode 5687 (ResTy ResFPRC:$Ra), 5688 (ResTy (coreopnode (OpTy FPRC:$Rn), 5689 (OpTy (scalar_to_vector 5690 (ExTy (vector_extract (OpVTy VPRC:$MRm), OpImm:$Imm)))))))), 5691 (ResTy (INST (ResTy ResFPRC:$Ra), 5692 (OpTy FPRC:$Rn), (OpVTy VPRC:$MRm), OpImm:$Imm))>; 5693 5694 // swapped operands 5695 def : Pat<(ResTy (opnode 5696 (ResTy ResFPRC:$Ra), 5697 (ResTy (coreopnode 5698 (OpTy (scalar_to_vector 5699 (ExTy (vector_extract (OpVTy VPRC:$MRm), OpImm:$Imm)))), 5700 (OpTy FPRC:$Rn))))), 5701 (ResTy (INST (ResTy ResFPRC:$Ra), 5702 (OpTy FPRC:$Rn), (OpVTy VPRC:$MRm), OpImm:$Imm))>; 5703} 5704 5705// Patterns for Scalar Signed saturating 5706// doubling multiply-add long (scalar, by element) 5707defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqadds, 5708 int_arm_neon_vqdmull, SQDMLALshv_4H, v1i32, FPR32, FPR16, v1i16, v4i16, 5709 i32, VPR64Lo, neon_uimm2_bare>; 5710defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqadds, 5711 int_arm_neon_vqdmull, SQDMLALshv_8H, v1i32, FPR32, FPR16, v1i16, v8i16, 5712 i32, VPR128Lo, neon_uimm3_bare>; 5713defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqadds, 5714 int_arm_neon_vqdmull, SQDMLALdsv_2S, v1i64, FPR64, FPR32, v1i32, v2i32, 5715 i32, VPR64Lo, neon_uimm1_bare>; 5716defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqadds, 5717 int_arm_neon_vqdmull, SQDMLALdsv_4S, v1i64, FPR64, FPR32, v1i32, v4i32, 5718 i32, VPR128Lo, neon_uimm2_bare>; 5719 5720// Patterns for Scalar Signed saturating 5721// doubling multiply-sub long (scalar, by element) 5722defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqsubs, 5723 int_arm_neon_vqdmull, SQDMLSLshv_4H, v1i32, FPR32, FPR16, v1i16, v4i16, 5724 i32, VPR64Lo, neon_uimm2_bare>; 5725defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqsubs, 5726 int_arm_neon_vqdmull, SQDMLSLshv_8H, v1i32, FPR32, FPR16, v1i16, v8i16, 5727 i32, VPR128Lo, neon_uimm3_bare>; 5728defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqsubs, 5729 int_arm_neon_vqdmull, SQDMLSLdsv_2S, v1i64, FPR64, FPR32, v1i32, v2i32, 5730 i32, VPR64Lo, neon_uimm1_bare>; 5731defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqsubs, 5732 int_arm_neon_vqdmull, SQDMLSLdsv_4S, v1i64, FPR64, FPR32, v1i32, v4i32, 5733 i32, VPR128Lo, neon_uimm2_bare>; 5734 5735// Scalar general arithmetic operation 5736class Neon_Scalar_GeneralMath2D_pattern<SDPatternOperator opnode, 5737 Instruction INST> 5738 : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn))), (INST FPR64:$Rn)>; 5739 5740class Neon_Scalar_GeneralMath3D_pattern<SDPatternOperator opnode, 5741 Instruction INST> 5742 : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), 5743 (INST FPR64:$Rn, FPR64:$Rm)>; 5744 5745class Neon_Scalar_GeneralMath4D_pattern<SDPatternOperator opnode, 5746 Instruction INST> 5747 : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm), 5748 (v1f64 FPR64:$Ra))), 5749 (INST FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; 5750 5751def : Neon_Scalar_GeneralMath3D_pattern<fadd, FADDddd>; 5752def : Neon_Scalar_GeneralMath3D_pattern<fmul, FMULddd>; 5753def : Neon_Scalar_GeneralMath3D_pattern<fsub, FSUBddd>; 5754def : Neon_Scalar_GeneralMath3D_pattern<fdiv, FDIVddd>; 5755def : Neon_Scalar_GeneralMath3D_pattern<int_arm_neon_vabds, FABDddd>; 5756def : Neon_Scalar_GeneralMath3D_pattern<int_arm_neon_vmaxs, FMAXddd>; 5757def : Neon_Scalar_GeneralMath3D_pattern<int_arm_neon_vmins, FMINddd>; 5758def : Neon_Scalar_GeneralMath3D_pattern<int_aarch64_neon_vmaxnm, FMAXNMddd>; 5759def : Neon_Scalar_GeneralMath3D_pattern<int_aarch64_neon_vminnm, FMINNMddd>; 5760 5761def : Neon_Scalar_GeneralMath2D_pattern<fabs, FABSdd>; 5762def : Neon_Scalar_GeneralMath2D_pattern<fneg, FNEGdd>; 5763 5764def : Neon_Scalar_GeneralMath4D_pattern<fma, FMADDdddd>; 5765def : Neon_Scalar_GeneralMath4D_pattern<fmsub, FMSUBdddd>; 5766 5767// Scalar Signed saturating doubling multiply returning 5768// high half (scalar, by element) 5769def SQDMULHhhv_4H : NeonI_ScalarXIndexedElemArith<"sqdmulh", 5770 0b1100, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR64Lo, neon_uimm2_bare> { 5771 let Inst{11} = 0b0; // h 5772 let Inst{21} = Imm{1}; // l 5773 let Inst{20} = Imm{0}; // m 5774 let Inst{19-16} = MRm{3-0}; 5775} 5776def SQDMULHhhv_8H : NeonI_ScalarXIndexedElemArith<"sqdmulh", 5777 0b1100, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR128Lo, neon_uimm3_bare> { 5778 let Inst{11} = Imm{2}; // h 5779 let Inst{21} = Imm{1}; // l 5780 let Inst{20} = Imm{0}; // m 5781 let Inst{19-16} = MRm{3-0}; 5782} 5783def SQDMULHssv_2S : NeonI_ScalarXIndexedElemArith<"sqdmulh", 5784 0b1100, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR64, neon_uimm1_bare> { 5785 let Inst{11} = 0b0; // h 5786 let Inst{21} = Imm{0}; // l 5787 let Inst{20-16} = MRm; 5788} 5789def SQDMULHssv_4S : NeonI_ScalarXIndexedElemArith<"sqdmulh", 5790 0b1100, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> { 5791 let Inst{11} = Imm{1}; // h 5792 let Inst{21} = Imm{0}; // l 5793 let Inst{20-16} = MRm; 5794} 5795 5796// Patterns for Scalar Signed saturating doubling multiply returning 5797// high half (scalar, by element) 5798defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmulh, 5799 SQDMULHhhv_4H, v1i16, FPR16, v1i16, i16, v4i16, 5800 i32, VPR64Lo, neon_uimm2_bare>; 5801defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmulh, 5802 SQDMULHhhv_8H, v1i16, FPR16, v1i16, i16, v8i16, 5803 i32, VPR128Lo, neon_uimm3_bare>; 5804defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmulh, 5805 SQDMULHssv_2S, v1i32, FPR32, v1i32, i32, v2i32, 5806 i32, VPR64Lo, neon_uimm1_bare>; 5807defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmulh, 5808 SQDMULHssv_4S, v1i32, FPR32, v1i32, i32, v4i32, 5809 i32, VPR128Lo, neon_uimm2_bare>; 5810 5811// Scalar Signed saturating rounding doubling multiply 5812// returning high half (scalar, by element) 5813def SQRDMULHhhv_4H : NeonI_ScalarXIndexedElemArith<"sqrdmulh", 5814 0b1101, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR64Lo, neon_uimm2_bare> { 5815 let Inst{11} = 0b0; // h 5816 let Inst{21} = Imm{1}; // l 5817 let Inst{20} = Imm{0}; // m 5818 let Inst{19-16} = MRm{3-0}; 5819} 5820def SQRDMULHhhv_8H : NeonI_ScalarXIndexedElemArith<"sqrdmulh", 5821 0b1101, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR128Lo, neon_uimm3_bare> { 5822 let Inst{11} = Imm{2}; // h 5823 let Inst{21} = Imm{1}; // l 5824 let Inst{20} = Imm{0}; // m 5825 let Inst{19-16} = MRm{3-0}; 5826} 5827def SQRDMULHssv_2S : NeonI_ScalarXIndexedElemArith<"sqrdmulh", 5828 0b1101, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR64, neon_uimm1_bare> { 5829 let Inst{11} = 0b0; // h 5830 let Inst{21} = Imm{0}; // l 5831 let Inst{20-16} = MRm; 5832} 5833def SQRDMULHssv_4S : NeonI_ScalarXIndexedElemArith<"sqrdmulh", 5834 0b1101, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> { 5835 let Inst{11} = Imm{1}; // h 5836 let Inst{21} = Imm{0}; // l 5837 let Inst{20-16} = MRm; 5838} 5839 5840defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqrdmulh, 5841 SQRDMULHhhv_4H, v1i16, FPR16, v1i16, i16, v4i16, i32, 5842 VPR64Lo, neon_uimm2_bare>; 5843defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqrdmulh, 5844 SQRDMULHhhv_8H, v1i16, FPR16, v1i16, i16, v8i16, i32, 5845 VPR128Lo, neon_uimm3_bare>; 5846defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqrdmulh, 5847 SQRDMULHssv_2S, v1i32, FPR32, v1i32, i32, v2i32, i32, 5848 VPR64Lo, neon_uimm1_bare>; 5849defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqrdmulh, 5850 SQRDMULHssv_4S, v1i32, FPR32, v1i32, i32, v4i32, i32, 5851 VPR128Lo, neon_uimm2_bare>; 5852 5853// Scalar Copy - DUP element to scalar 5854class NeonI_Scalar_DUP<string asmop, string asmlane, 5855 RegisterClass ResRC, RegisterOperand VPRC, 5856 Operand OpImm> 5857 : NeonI_ScalarCopy<(outs ResRC:$Rd), (ins VPRC:$Rn, OpImm:$Imm), 5858 asmop # "\t$Rd, $Rn." # asmlane # "[$Imm]", 5859 [], 5860 NoItinerary> { 5861 bits<4> Imm; 5862} 5863 5864def DUPbv_B : NeonI_Scalar_DUP<"dup", "b", FPR8, VPR128, neon_uimm4_bare> { 5865 let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1}; 5866} 5867def DUPhv_H : NeonI_Scalar_DUP<"dup", "h", FPR16, VPR128, neon_uimm3_bare> { 5868 let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0}; 5869} 5870def DUPsv_S : NeonI_Scalar_DUP<"dup", "s", FPR32, VPR128, neon_uimm2_bare> { 5871 let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0}; 5872} 5873def DUPdv_D : NeonI_Scalar_DUP<"dup", "d", FPR64, VPR128, neon_uimm1_bare> { 5874 let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0}; 5875} 5876 5877multiclass NeonI_Scalar_DUP_Elt_pattern<Instruction DUPI, ValueType ResTy, 5878 ValueType OpTy, Operand OpImm, 5879 ValueType OpNTy, ValueType ExTy, Operand OpNImm> { 5880 def : Pat<(ResTy (vector_extract (OpTy VPR128:$Rn), OpImm:$Imm)), 5881 (ResTy (DUPI (OpTy VPR128:$Rn), OpImm:$Imm))>; 5882 5883 def : Pat<(ResTy (vector_extract (OpNTy VPR64:$Rn), OpNImm:$Imm)), 5884 (ResTy (DUPI 5885 (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), 5886 OpNImm:$Imm))>; 5887} 5888 5889// Patterns for vector extract of FP data using scalar DUP instructions 5890defm : NeonI_Scalar_DUP_Elt_pattern<DUPsv_S, f32, 5891 v4f32, neon_uimm2_bare, v2f32, v4f32, neon_uimm1_bare>; 5892defm : NeonI_Scalar_DUP_Elt_pattern<DUPdv_D, f64, 5893 v2f64, neon_uimm1_bare, v1f64, v2f64, neon_uimm0_bare>; 5894 5895multiclass NeonI_Scalar_DUP_Ext_Vec_pattern<Instruction DUPI, 5896 ValueType ResTy, ValueType OpTy,Operand OpLImm, 5897 ValueType NOpTy, ValueType ExTy, Operand OpNImm> { 5898 5899 def : Pat<(ResTy (extract_subvector (OpTy VPR128:$Rn), OpLImm:$Imm)), 5900 (ResTy (DUPI VPR128:$Rn, OpLImm:$Imm))>; 5901 5902 def : Pat<(ResTy (extract_subvector (NOpTy VPR64:$Rn), OpNImm:$Imm)), 5903 (ResTy (DUPI 5904 (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), 5905 OpNImm:$Imm))>; 5906} 5907 5908// Patterns for extract subvectors of v1ix data using scalar DUP instructions. 5909defm : NeonI_Scalar_DUP_Ext_Vec_pattern<DUPbv_B, v1i8, v16i8, neon_uimm4_bare, 5910 v8i8, v16i8, neon_uimm3_bare>; 5911defm : NeonI_Scalar_DUP_Ext_Vec_pattern<DUPhv_H, v1i16, v8i16, neon_uimm3_bare, 5912 v4i16, v8i16, neon_uimm2_bare>; 5913defm : NeonI_Scalar_DUP_Ext_Vec_pattern<DUPsv_S, v1i32, v4i32, neon_uimm2_bare, 5914 v2i32, v4i32, neon_uimm1_bare>; 5915 5916multiclass NeonI_Scalar_DUP_Copy_pattern1<Instruction DUPI, ValueType ResTy, 5917 ValueType OpTy, ValueType ElemTy, 5918 Operand OpImm, ValueType OpNTy, 5919 ValueType ExTy, Operand OpNImm> { 5920 5921 def : Pat<(ResTy (vector_insert (ResTy undef), 5922 (ElemTy (vector_extract (OpTy VPR128:$Rn), OpImm:$Imm)), 5923 (neon_uimm0_bare:$Imm))), 5924 (ResTy (DUPI (OpTy VPR128:$Rn), OpImm:$Imm))>; 5925 5926 def : Pat<(ResTy (vector_insert (ResTy undef), 5927 (ElemTy (vector_extract (OpNTy VPR64:$Rn), OpNImm:$Imm)), 5928 (OpNImm:$Imm))), 5929 (ResTy (DUPI 5930 (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), 5931 OpNImm:$Imm))>; 5932} 5933 5934multiclass NeonI_Scalar_DUP_Copy_pattern2<Instruction DUPI, ValueType ResTy, 5935 ValueType OpTy, ValueType ElemTy, 5936 Operand OpImm, ValueType OpNTy, 5937 ValueType ExTy, Operand OpNImm> { 5938 5939 def : Pat<(ResTy (scalar_to_vector 5940 (ElemTy (vector_extract (OpTy VPR128:$Rn), OpImm:$Imm)))), 5941 (ResTy (DUPI (OpTy VPR128:$Rn), OpImm:$Imm))>; 5942 5943 def : Pat<(ResTy (scalar_to_vector 5944 (ElemTy (vector_extract (OpNTy VPR64:$Rn), OpNImm:$Imm)))), 5945 (ResTy (DUPI 5946 (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), 5947 OpNImm:$Imm))>; 5948} 5949 5950// Patterns for vector copy to v1ix and v1fx vectors using scalar DUP 5951// instructions. 5952defm : NeonI_Scalar_DUP_Copy_pattern1<DUPdv_D, 5953 v1i64, v2i64, i64, neon_uimm1_bare, 5954 v1i64, v2i64, neon_uimm0_bare>; 5955defm : NeonI_Scalar_DUP_Copy_pattern1<DUPsv_S, 5956 v1i32, v4i32, i32, neon_uimm2_bare, 5957 v2i32, v4i32, neon_uimm1_bare>; 5958defm : NeonI_Scalar_DUP_Copy_pattern1<DUPhv_H, 5959 v1i16, v8i16, i32, neon_uimm3_bare, 5960 v4i16, v8i16, neon_uimm2_bare>; 5961defm : NeonI_Scalar_DUP_Copy_pattern1<DUPbv_B, 5962 v1i8, v16i8, i32, neon_uimm4_bare, 5963 v8i8, v16i8, neon_uimm3_bare>; 5964defm : NeonI_Scalar_DUP_Copy_pattern1<DUPdv_D, 5965 v1f64, v2f64, f64, neon_uimm1_bare, 5966 v1f64, v2f64, neon_uimm0_bare>; 5967defm : NeonI_Scalar_DUP_Copy_pattern1<DUPsv_S, 5968 v1f32, v4f32, f32, neon_uimm2_bare, 5969 v2f32, v4f32, neon_uimm1_bare>; 5970defm : NeonI_Scalar_DUP_Copy_pattern2<DUPdv_D, 5971 v1i64, v2i64, i64, neon_uimm1_bare, 5972 v1i64, v2i64, neon_uimm0_bare>; 5973defm : NeonI_Scalar_DUP_Copy_pattern2<DUPsv_S, 5974 v1i32, v4i32, i32, neon_uimm2_bare, 5975 v2i32, v4i32, neon_uimm1_bare>; 5976defm : NeonI_Scalar_DUP_Copy_pattern2<DUPhv_H, 5977 v1i16, v8i16, i32, neon_uimm3_bare, 5978 v4i16, v8i16, neon_uimm2_bare>; 5979defm : NeonI_Scalar_DUP_Copy_pattern2<DUPbv_B, 5980 v1i8, v16i8, i32, neon_uimm4_bare, 5981 v8i8, v16i8, neon_uimm3_bare>; 5982defm : NeonI_Scalar_DUP_Copy_pattern2<DUPdv_D, 5983 v1f64, v2f64, f64, neon_uimm1_bare, 5984 v1f64, v2f64, neon_uimm0_bare>; 5985defm : NeonI_Scalar_DUP_Copy_pattern2<DUPsv_S, 5986 v1f32, v4f32, f32, neon_uimm2_bare, 5987 v2f32, v4f32, neon_uimm1_bare>; 5988 5989multiclass NeonI_Scalar_DUP_alias<string asmop, string asmlane, 5990 Instruction DUPI, Operand OpImm, 5991 RegisterClass ResRC> { 5992 def : NeonInstAlias<!strconcat(asmop, "$Rd, $Rn" # asmlane # "[$Imm]"), 5993 (DUPI ResRC:$Rd, VPR128:$Rn, OpImm:$Imm), 0b0>; 5994} 5995 5996// Aliases for Scalar copy - DUP element (scalar) 5997// FIXME: This is actually the preferred syntax but TableGen can't deal with 5998// custom printing of aliases. 5999defm : NeonI_Scalar_DUP_alias<"mov", ".b", DUPbv_B, neon_uimm4_bare, FPR8>; 6000defm : NeonI_Scalar_DUP_alias<"mov", ".h", DUPhv_H, neon_uimm3_bare, FPR16>; 6001defm : NeonI_Scalar_DUP_alias<"mov", ".s", DUPsv_S, neon_uimm2_bare, FPR32>; 6002defm : NeonI_Scalar_DUP_alias<"mov", ".d", DUPdv_D, neon_uimm1_bare, FPR64>; 6003 6004multiclass NeonI_SDUP<PatFrag GetLow, PatFrag GetHigh, ValueType ResTy, 6005 ValueType OpTy> { 6006 def : Pat<(ResTy (GetLow VPR128:$Rn)), 6007 (ResTy (EXTRACT_SUBREG (OpTy VPR128:$Rn), sub_64))>; 6008 def : Pat<(ResTy (GetHigh VPR128:$Rn)), 6009 (ResTy (DUPdv_D (OpTy VPR128:$Rn), 1))>; 6010} 6011 6012defm : NeonI_SDUP<Neon_Low16B, Neon_High16B, v8i8, v16i8>; 6013defm : NeonI_SDUP<Neon_Low8H, Neon_High8H, v4i16, v8i16>; 6014defm : NeonI_SDUP<Neon_Low4S, Neon_High4S, v2i32, v4i32>; 6015defm : NeonI_SDUP<Neon_Low2D, Neon_High2D, v1i64, v2i64>; 6016defm : NeonI_SDUP<Neon_Low4float, Neon_High4float, v2f32, v4f32>; 6017defm : NeonI_SDUP<Neon_Low2double, Neon_High2double, v1f64, v2f64>; 6018 6019//===----------------------------------------------------------------------===// 6020// Non-Instruction Patterns 6021//===----------------------------------------------------------------------===// 6022 6023// 64-bit vector bitcasts... 6024 6025def : Pat<(v1i64 (bitconvert (v8i8 VPR64:$src))), (v1i64 VPR64:$src)>; 6026def : Pat<(v2f32 (bitconvert (v8i8 VPR64:$src))), (v2f32 VPR64:$src)>; 6027def : Pat<(v2i32 (bitconvert (v8i8 VPR64:$src))), (v2i32 VPR64:$src)>; 6028def : Pat<(v4i16 (bitconvert (v8i8 VPR64:$src))), (v4i16 VPR64:$src)>; 6029 6030def : Pat<(v1i64 (bitconvert (v4i16 VPR64:$src))), (v1i64 VPR64:$src)>; 6031def : Pat<(v2i32 (bitconvert (v4i16 VPR64:$src))), (v2i32 VPR64:$src)>; 6032def : Pat<(v2f32 (bitconvert (v4i16 VPR64:$src))), (v2f32 VPR64:$src)>; 6033def : Pat<(v8i8 (bitconvert (v4i16 VPR64:$src))), (v8i8 VPR64:$src)>; 6034 6035def : Pat<(v1i64 (bitconvert (v2i32 VPR64:$src))), (v1i64 VPR64:$src)>; 6036def : Pat<(v2f32 (bitconvert (v2i32 VPR64:$src))), (v2f32 VPR64:$src)>; 6037def : Pat<(v4i16 (bitconvert (v2i32 VPR64:$src))), (v4i16 VPR64:$src)>; 6038def : Pat<(v8i8 (bitconvert (v2i32 VPR64:$src))), (v8i8 VPR64:$src)>; 6039 6040def : Pat<(v1i64 (bitconvert (v2f32 VPR64:$src))), (v1i64 VPR64:$src)>; 6041def : Pat<(v2i32 (bitconvert (v2f32 VPR64:$src))), (v2i32 VPR64:$src)>; 6042def : Pat<(v4i16 (bitconvert (v2f32 VPR64:$src))), (v4i16 VPR64:$src)>; 6043def : Pat<(v8i8 (bitconvert (v2f32 VPR64:$src))), (v8i8 VPR64:$src)>; 6044 6045def : Pat<(v2f32 (bitconvert (v1i64 VPR64:$src))), (v2f32 VPR64:$src)>; 6046def : Pat<(v2i32 (bitconvert (v1i64 VPR64:$src))), (v2i32 VPR64:$src)>; 6047def : Pat<(v4i16 (bitconvert (v1i64 VPR64:$src))), (v4i16 VPR64:$src)>; 6048def : Pat<(v8i8 (bitconvert (v1i64 VPR64:$src))), (v8i8 VPR64:$src)>; 6049 6050// ..and 128-bit vector bitcasts... 6051 6052def : Pat<(v2f64 (bitconvert (v16i8 VPR128:$src))), (v2f64 VPR128:$src)>; 6053def : Pat<(v2i64 (bitconvert (v16i8 VPR128:$src))), (v2i64 VPR128:$src)>; 6054def : Pat<(v4f32 (bitconvert (v16i8 VPR128:$src))), (v4f32 VPR128:$src)>; 6055def : Pat<(v4i32 (bitconvert (v16i8 VPR128:$src))), (v4i32 VPR128:$src)>; 6056def : Pat<(v8i16 (bitconvert (v16i8 VPR128:$src))), (v8i16 VPR128:$src)>; 6057 6058def : Pat<(v2f64 (bitconvert (v8i16 VPR128:$src))), (v2f64 VPR128:$src)>; 6059def : Pat<(v2i64 (bitconvert (v8i16 VPR128:$src))), (v2i64 VPR128:$src)>; 6060def : Pat<(v4i32 (bitconvert (v8i16 VPR128:$src))), (v4i32 VPR128:$src)>; 6061def : Pat<(v4f32 (bitconvert (v8i16 VPR128:$src))), (v4f32 VPR128:$src)>; 6062def : Pat<(v16i8 (bitconvert (v8i16 VPR128:$src))), (v16i8 VPR128:$src)>; 6063 6064def : Pat<(v2f64 (bitconvert (v4i32 VPR128:$src))), (v2f64 VPR128:$src)>; 6065def : Pat<(v2i64 (bitconvert (v4i32 VPR128:$src))), (v2i64 VPR128:$src)>; 6066def : Pat<(v4f32 (bitconvert (v4i32 VPR128:$src))), (v4f32 VPR128:$src)>; 6067def : Pat<(v8i16 (bitconvert (v4i32 VPR128:$src))), (v8i16 VPR128:$src)>; 6068def : Pat<(v16i8 (bitconvert (v4i32 VPR128:$src))), (v16i8 VPR128:$src)>; 6069 6070def : Pat<(v2f64 (bitconvert (v4f32 VPR128:$src))), (v2f64 VPR128:$src)>; 6071def : Pat<(v2i64 (bitconvert (v4f32 VPR128:$src))), (v2i64 VPR128:$src)>; 6072def : Pat<(v4i32 (bitconvert (v4f32 VPR128:$src))), (v4i32 VPR128:$src)>; 6073def : Pat<(v8i16 (bitconvert (v4f32 VPR128:$src))), (v8i16 VPR128:$src)>; 6074def : Pat<(v16i8 (bitconvert (v4f32 VPR128:$src))), (v16i8 VPR128:$src)>; 6075 6076def : Pat<(v2f64 (bitconvert (v2i64 VPR128:$src))), (v2f64 VPR128:$src)>; 6077def : Pat<(v4f32 (bitconvert (v2i64 VPR128:$src))), (v4f32 VPR128:$src)>; 6078def : Pat<(v4i32 (bitconvert (v2i64 VPR128:$src))), (v4i32 VPR128:$src)>; 6079def : Pat<(v8i16 (bitconvert (v2i64 VPR128:$src))), (v8i16 VPR128:$src)>; 6080def : Pat<(v16i8 (bitconvert (v2i64 VPR128:$src))), (v16i8 VPR128:$src)>; 6081 6082def : Pat<(v2i64 (bitconvert (v2f64 VPR128:$src))), (v2i64 VPR128:$src)>; 6083def : Pat<(v4f32 (bitconvert (v2f64 VPR128:$src))), (v4f32 VPR128:$src)>; 6084def : Pat<(v4i32 (bitconvert (v2f64 VPR128:$src))), (v4i32 VPR128:$src)>; 6085def : Pat<(v8i16 (bitconvert (v2f64 VPR128:$src))), (v8i16 VPR128:$src)>; 6086def : Pat<(v16i8 (bitconvert (v2f64 VPR128:$src))), (v16i8 VPR128:$src)>; 6087 6088// ...and scalar bitcasts... 6089def : Pat<(f16 (bitconvert (v1i16 FPR16:$src))), (f16 FPR16:$src)>; 6090def : Pat<(f32 (bitconvert (v1i32 FPR32:$src))), (f32 FPR32:$src)>; 6091def : Pat<(f64 (bitconvert (v1i64 FPR64:$src))), (f64 FPR64:$src)>; 6092def : Pat<(f32 (bitconvert (v1f32 FPR32:$src))), (f32 FPR32:$src)>; 6093def : Pat<(f64 (bitconvert (v1f64 FPR64:$src))), (f64 FPR64:$src)>; 6094 6095def : Pat<(i64 (bitconvert (v1i64 FPR64:$src))), (FMOVxd $src)>; 6096def : Pat<(i64 (bitconvert (v1f64 FPR64:$src))), (FMOVxd $src)>; 6097def : Pat<(i64 (bitconvert (v2i32 FPR64:$src))), (FMOVxd $src)>; 6098def : Pat<(i64 (bitconvert (v2f32 FPR64:$src))), (FMOVxd $src)>; 6099def : Pat<(i64 (bitconvert (v4i16 FPR64:$src))), (FMOVxd $src)>; 6100def : Pat<(i64 (bitconvert (v8i8 FPR64:$src))), (FMOVxd $src)>; 6101 6102def : Pat<(i32 (bitconvert (v1i32 FPR32:$src))), (FMOVws $src)>; 6103 6104def : Pat<(v8i8 (bitconvert (v1i64 VPR64:$src))), (v8i8 VPR64:$src)>; 6105def : Pat<(v4i16 (bitconvert (v1i64 VPR64:$src))), (v4i16 VPR64:$src)>; 6106def : Pat<(v2i32 (bitconvert (v1i64 VPR64:$src))), (v2i32 VPR64:$src)>; 6107 6108def : Pat<(f64 (bitconvert (v8i8 VPR64:$src))), (f64 VPR64:$src)>; 6109def : Pat<(f64 (bitconvert (v4i16 VPR64:$src))), (f64 VPR64:$src)>; 6110def : Pat<(f64 (bitconvert (v2i32 VPR64:$src))), (f64 VPR64:$src)>; 6111def : Pat<(f64 (bitconvert (v2f32 VPR64:$src))), (f64 VPR64:$src)>; 6112def : Pat<(f64 (bitconvert (v1i64 VPR64:$src))), (f64 VPR64:$src)>; 6113 6114def : Pat<(f128 (bitconvert (v16i8 VPR128:$src))), (f128 VPR128:$src)>; 6115def : Pat<(f128 (bitconvert (v8i16 VPR128:$src))), (f128 VPR128:$src)>; 6116def : Pat<(f128 (bitconvert (v4i32 VPR128:$src))), (f128 VPR128:$src)>; 6117def : Pat<(f128 (bitconvert (v2i64 VPR128:$src))), (f128 VPR128:$src)>; 6118def : Pat<(f128 (bitconvert (v4f32 VPR128:$src))), (f128 VPR128:$src)>; 6119def : Pat<(f128 (bitconvert (v2f64 VPR128:$src))), (f128 VPR128:$src)>; 6120 6121def : Pat<(v1i16 (bitconvert (f16 FPR16:$src))), (v1i16 FPR16:$src)>; 6122def : Pat<(v1i32 (bitconvert (f32 FPR32:$src))), (v1i32 FPR32:$src)>; 6123def : Pat<(v1i64 (bitconvert (f64 FPR64:$src))), (v1i64 FPR64:$src)>; 6124def : Pat<(v1f32 (bitconvert (f32 FPR32:$src))), (v1f32 FPR32:$src)>; 6125def : Pat<(v1f64 (bitconvert (f64 FPR64:$src))), (v1f64 FPR64:$src)>; 6126 6127def : Pat<(v1i64 (bitconvert (i64 GPR64:$src))), (FMOVdx $src)>; 6128def : Pat<(v1f64 (bitconvert (i64 GPR64:$src))), (FMOVdx $src)>; 6129def : Pat<(v2i32 (bitconvert (i64 GPR64:$src))), (FMOVdx $src)>; 6130def : Pat<(v2f32 (bitconvert (i64 GPR64:$src))), (FMOVdx $src)>; 6131def : Pat<(v4i16 (bitconvert (i64 GPR64:$src))), (FMOVdx $src)>; 6132def : Pat<(v8i8 (bitconvert (i64 GPR64:$src))), (FMOVdx $src)>; 6133 6134def : Pat<(v1i32 (bitconvert (i32 GPR32:$src))), (FMOVsw $src)>; 6135 6136def : Pat<(v8i8 (bitconvert (f64 FPR64:$src))), (v8i8 FPR64:$src)>; 6137def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))), (v4i16 FPR64:$src)>; 6138def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))), (v2i32 FPR64:$src)>; 6139def : Pat<(v2f32 (bitconvert (f64 FPR64:$src))), (v2f32 FPR64:$src)>; 6140def : Pat<(v1i64 (bitconvert (f64 FPR64:$src))), (v1i64 FPR64:$src)>; 6141 6142def : Pat<(v16i8 (bitconvert (f128 FPR128:$src))), (v16i8 FPR128:$src)>; 6143def : Pat<(v8i16 (bitconvert (f128 FPR128:$src))), (v8i16 FPR128:$src)>; 6144def : Pat<(v4i32 (bitconvert (f128 FPR128:$src))), (v4i32 FPR128:$src)>; 6145def : Pat<(v2i64 (bitconvert (f128 FPR128:$src))), (v2i64 FPR128:$src)>; 6146def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))), (v4f32 FPR128:$src)>; 6147def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))), (v2f64 FPR128:$src)>; 6148 6149// Scalar Three Same 6150 6151def neon_uimm3 : Operand<i64>, 6152 ImmLeaf<i64, [{return Imm < 8;}]> { 6153 let ParserMatchClass = uimm3_asmoperand; 6154 let PrintMethod = "printUImmHexOperand"; 6155} 6156 6157def neon_uimm4 : Operand<i64>, 6158 ImmLeaf<i64, [{return Imm < 16;}]> { 6159 let ParserMatchClass = uimm4_asmoperand; 6160 let PrintMethod = "printUImmHexOperand"; 6161} 6162 6163// Bitwise Extract 6164class NeonI_Extract<bit q, bits<2> op2, string asmop, 6165 string OpS, RegisterOperand OpVPR, Operand OpImm> 6166 : NeonI_BitExtract<q, op2, (outs OpVPR:$Rd), 6167 (ins OpVPR:$Rn, OpVPR:$Rm, OpImm:$Index), 6168 asmop # "\t$Rd." # OpS # ", $Rn." # OpS # 6169 ", $Rm." # OpS # ", $Index", 6170 [], 6171 NoItinerary>{ 6172 bits<4> Index; 6173} 6174 6175def EXTvvvi_8b : NeonI_Extract<0b0, 0b00, "ext", "8b", 6176 VPR64, neon_uimm3> { 6177 let Inst{14-11} = {0b0, Index{2}, Index{1}, Index{0}}; 6178} 6179 6180def EXTvvvi_16b: NeonI_Extract<0b1, 0b00, "ext", "16b", 6181 VPR128, neon_uimm4> { 6182 let Inst{14-11} = Index; 6183} 6184 6185class NI_Extract<ValueType OpTy, RegisterOperand OpVPR, Instruction INST, 6186 Operand OpImm> 6187 : Pat<(OpTy (Neon_vextract (OpTy OpVPR:$Rn), (OpTy OpVPR:$Rm), 6188 (i64 OpImm:$Imm))), 6189 (INST OpVPR:$Rn, OpVPR:$Rm, OpImm:$Imm)>; 6190 6191def : NI_Extract<v8i8, VPR64, EXTvvvi_8b, neon_uimm3>; 6192def : NI_Extract<v4i16, VPR64, EXTvvvi_8b, neon_uimm3>; 6193def : NI_Extract<v2i32, VPR64, EXTvvvi_8b, neon_uimm3>; 6194def : NI_Extract<v1i64, VPR64, EXTvvvi_8b, neon_uimm3>; 6195def : NI_Extract<v2f32, VPR64, EXTvvvi_8b, neon_uimm3>; 6196def : NI_Extract<v1f64, VPR64, EXTvvvi_8b, neon_uimm3>; 6197def : NI_Extract<v16i8, VPR128, EXTvvvi_16b, neon_uimm4>; 6198def : NI_Extract<v8i16, VPR128, EXTvvvi_16b, neon_uimm4>; 6199def : NI_Extract<v4i32, VPR128, EXTvvvi_16b, neon_uimm4>; 6200def : NI_Extract<v2i64, VPR128, EXTvvvi_16b, neon_uimm4>; 6201def : NI_Extract<v4f32, VPR128, EXTvvvi_16b, neon_uimm4>; 6202def : NI_Extract<v2f64, VPR128, EXTvvvi_16b, neon_uimm4>; 6203 6204// Table lookup 6205class NI_TBL<bit q, bits<2> op2, bits<2> len, bit op, 6206 string asmop, string OpS, RegisterOperand OpVPR, 6207 RegisterOperand VecList> 6208 : NeonI_TBL<q, op2, len, op, 6209 (outs OpVPR:$Rd), (ins VecList:$Rn, OpVPR:$Rm), 6210 asmop # "\t$Rd." # OpS # ", $Rn, $Rm." # OpS, 6211 [], 6212 NoItinerary>; 6213 6214// The vectors in look up table are always 16b 6215multiclass NI_TBL_pat<bits<2> len, bit op, string asmop, string List> { 6216 def _8b : NI_TBL<0, 0b00, len, op, asmop, "8b", VPR64, 6217 !cast<RegisterOperand>(List # "16B_operand")>; 6218 6219 def _16b : NI_TBL<1, 0b00, len, op, asmop, "16b", VPR128, 6220 !cast<RegisterOperand>(List # "16B_operand")>; 6221} 6222 6223defm TBL1 : NI_TBL_pat<0b00, 0b0, "tbl", "VOne">; 6224defm TBL2 : NI_TBL_pat<0b01, 0b0, "tbl", "VPair">; 6225defm TBL3 : NI_TBL_pat<0b10, 0b0, "tbl", "VTriple">; 6226defm TBL4 : NI_TBL_pat<0b11, 0b0, "tbl", "VQuad">; 6227 6228// Table lookup extention 6229class NI_TBX<bit q, bits<2> op2, bits<2> len, bit op, 6230 string asmop, string OpS, RegisterOperand OpVPR, 6231 RegisterOperand VecList> 6232 : NeonI_TBL<q, op2, len, op, 6233 (outs OpVPR:$Rd), (ins OpVPR:$src, VecList:$Rn, OpVPR:$Rm), 6234 asmop # "\t$Rd." # OpS # ", $Rn, $Rm." # OpS, 6235 [], 6236 NoItinerary> { 6237 let Constraints = "$src = $Rd"; 6238} 6239 6240// The vectors in look up table are always 16b 6241multiclass NI_TBX_pat<bits<2> len, bit op, string asmop, string List> { 6242 def _8b : NI_TBX<0, 0b00, len, op, asmop, "8b", VPR64, 6243 !cast<RegisterOperand>(List # "16B_operand")>; 6244 6245 def _16b : NI_TBX<1, 0b00, len, op, asmop, "16b", VPR128, 6246 !cast<RegisterOperand>(List # "16B_operand")>; 6247} 6248 6249defm TBX1 : NI_TBX_pat<0b00, 0b1, "tbx", "VOne">; 6250defm TBX2 : NI_TBX_pat<0b01, 0b1, "tbx", "VPair">; 6251defm TBX3 : NI_TBX_pat<0b10, 0b1, "tbx", "VTriple">; 6252defm TBX4 : NI_TBX_pat<0b11, 0b1, "tbx", "VQuad">; 6253 6254class NeonI_INS_main<string asmop, string Res, ValueType ResTy, 6255 RegisterClass OpGPR, ValueType OpTy, Operand OpImm> 6256 : NeonI_copy<0b1, 0b0, 0b0011, 6257 (outs VPR128:$Rd), (ins VPR128:$src, OpGPR:$Rn, OpImm:$Imm), 6258 asmop # "\t$Rd." # Res # "[$Imm], $Rn", 6259 [(set (ResTy VPR128:$Rd), 6260 (ResTy (vector_insert 6261 (ResTy VPR128:$src), 6262 (OpTy OpGPR:$Rn), 6263 (OpImm:$Imm))))], 6264 NoItinerary> { 6265 bits<4> Imm; 6266 let Constraints = "$src = $Rd"; 6267} 6268 6269//Insert element (vector, from main) 6270def INSbw : NeonI_INS_main<"ins", "b", v16i8, GPR32, i32, 6271 neon_uimm4_bare> { 6272 let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1}; 6273} 6274def INShw : NeonI_INS_main<"ins", "h", v8i16, GPR32, i32, 6275 neon_uimm3_bare> { 6276 let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0}; 6277} 6278def INSsw : NeonI_INS_main<"ins", "s", v4i32, GPR32, i32, 6279 neon_uimm2_bare> { 6280 let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0}; 6281} 6282def INSdx : NeonI_INS_main<"ins", "d", v2i64, GPR64, i64, 6283 neon_uimm1_bare> { 6284 let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0}; 6285} 6286 6287def : NeonInstAlias<"mov $Rd.b[$Imm], $Rn", 6288 (INSbw VPR128:$Rd, GPR32:$Rn, neon_uimm4_bare:$Imm), 0>; 6289def : NeonInstAlias<"mov $Rd.h[$Imm], $Rn", 6290 (INShw VPR128:$Rd, GPR32:$Rn, neon_uimm3_bare:$Imm), 0>; 6291def : NeonInstAlias<"mov $Rd.s[$Imm], $Rn", 6292 (INSsw VPR128:$Rd, GPR32:$Rn, neon_uimm2_bare:$Imm), 0>; 6293def : NeonInstAlias<"mov $Rd.d[$Imm], $Rn", 6294 (INSdx VPR128:$Rd, GPR64:$Rn, neon_uimm1_bare:$Imm), 0>; 6295 6296class Neon_INS_main_pattern <ValueType ResTy,ValueType ExtResTy, 6297 RegisterClass OpGPR, ValueType OpTy, 6298 Operand OpImm, Instruction INS> 6299 : Pat<(ResTy (vector_insert 6300 (ResTy VPR64:$src), 6301 (OpTy OpGPR:$Rn), 6302 (OpImm:$Imm))), 6303 (ResTy (EXTRACT_SUBREG 6304 (ExtResTy (INS (ExtResTy (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64)), 6305 OpGPR:$Rn, OpImm:$Imm)), sub_64))>; 6306 6307def INSbw_pattern : Neon_INS_main_pattern<v8i8, v16i8, GPR32, i32, 6308 neon_uimm3_bare, INSbw>; 6309def INShw_pattern : Neon_INS_main_pattern<v4i16, v8i16, GPR32, i32, 6310 neon_uimm2_bare, INShw>; 6311def INSsw_pattern : Neon_INS_main_pattern<v2i32, v4i32, GPR32, i32, 6312 neon_uimm1_bare, INSsw>; 6313def INSdx_pattern : Neon_INS_main_pattern<v1i64, v2i64, GPR64, i64, 6314 neon_uimm0_bare, INSdx>; 6315 6316class NeonI_INS_element<string asmop, string Res, Operand ResImm> 6317 : NeonI_insert<0b1, 0b1, 6318 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn, 6319 ResImm:$Immd, ResImm:$Immn), 6320 asmop # "\t$Rd." # Res # "[$Immd], $Rn." # Res # "[$Immn]", 6321 [], 6322 NoItinerary> { 6323 let Constraints = "$src = $Rd"; 6324 bits<4> Immd; 6325 bits<4> Immn; 6326} 6327 6328//Insert element (vector, from element) 6329def INSELb : NeonI_INS_element<"ins", "b", neon_uimm4_bare> { 6330 let Inst{20-16} = {Immd{3}, Immd{2}, Immd{1}, Immd{0}, 0b1}; 6331 let Inst{14-11} = {Immn{3}, Immn{2}, Immn{1}, Immn{0}}; 6332} 6333def INSELh : NeonI_INS_element<"ins", "h", neon_uimm3_bare> { 6334 let Inst{20-16} = {Immd{2}, Immd{1}, Immd{0}, 0b1, 0b0}; 6335 let Inst{14-11} = {Immn{2}, Immn{1}, Immn{0}, 0b0}; 6336 // bit 11 is unspecified, but should be set to zero. 6337} 6338def INSELs : NeonI_INS_element<"ins", "s", neon_uimm2_bare> { 6339 let Inst{20-16} = {Immd{1}, Immd{0}, 0b1, 0b0, 0b0}; 6340 let Inst{14-11} = {Immn{1}, Immn{0}, 0b0, 0b0}; 6341 // bits 11-12 are unspecified, but should be set to zero. 6342} 6343def INSELd : NeonI_INS_element<"ins", "d", neon_uimm1_bare> { 6344 let Inst{20-16} = {Immd, 0b1, 0b0, 0b0, 0b0}; 6345 let Inst{14-11} = {Immn{0}, 0b0, 0b0, 0b0}; 6346 // bits 11-13 are unspecified, but should be set to zero. 6347} 6348 6349def : NeonInstAlias<"mov $Rd.b[$Immd], $Rn.b[$Immn]", 6350 (INSELb VPR128:$Rd, VPR128:$Rn, 6351 neon_uimm4_bare:$Immd, neon_uimm4_bare:$Immn), 0>; 6352def : NeonInstAlias<"mov $Rd.h[$Immd], $Rn.h[$Immn]", 6353 (INSELh VPR128:$Rd, VPR128:$Rn, 6354 neon_uimm3_bare:$Immd, neon_uimm3_bare:$Immn), 0>; 6355def : NeonInstAlias<"mov $Rd.s[$Immd], $Rn.s[$Immn]", 6356 (INSELs VPR128:$Rd, VPR128:$Rn, 6357 neon_uimm2_bare:$Immd, neon_uimm2_bare:$Immn), 0>; 6358def : NeonInstAlias<"mov $Rd.d[$Immd], $Rn.d[$Immn]", 6359 (INSELd VPR128:$Rd, VPR128:$Rn, 6360 neon_uimm1_bare:$Immd, neon_uimm1_bare:$Immn), 0>; 6361 6362multiclass Neon_INS_elt_pattern<ValueType ResTy, ValueType NaTy, 6363 ValueType MidTy, Operand StImm, Operand NaImm, 6364 Instruction INS> { 6365def : Pat<(ResTy (vector_insert 6366 (ResTy VPR128:$src), 6367 (MidTy (vector_extract 6368 (ResTy VPR128:$Rn), 6369 (StImm:$Immn))), 6370 (StImm:$Immd))), 6371 (INS (ResTy VPR128:$src), (ResTy VPR128:$Rn), 6372 StImm:$Immd, StImm:$Immn)>; 6373 6374def : Pat <(ResTy (vector_insert 6375 (ResTy VPR128:$src), 6376 (MidTy (vector_extract 6377 (NaTy VPR64:$Rn), 6378 (NaImm:$Immn))), 6379 (StImm:$Immd))), 6380 (INS (ResTy VPR128:$src), 6381 (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$Rn), sub_64)), 6382 StImm:$Immd, NaImm:$Immn)>; 6383 6384def : Pat <(NaTy (vector_insert 6385 (NaTy VPR64:$src), 6386 (MidTy (vector_extract 6387 (ResTy VPR128:$Rn), 6388 (StImm:$Immn))), 6389 (NaImm:$Immd))), 6390 (NaTy (EXTRACT_SUBREG 6391 (ResTy (INS 6392 (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$src), sub_64)), 6393 (ResTy VPR128:$Rn), 6394 NaImm:$Immd, StImm:$Immn)), 6395 sub_64))>; 6396 6397def : Pat <(NaTy (vector_insert 6398 (NaTy VPR64:$src), 6399 (MidTy (vector_extract 6400 (NaTy VPR64:$Rn), 6401 (NaImm:$Immn))), 6402 (NaImm:$Immd))), 6403 (NaTy (EXTRACT_SUBREG 6404 (ResTy (INS 6405 (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$src), sub_64)), 6406 (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$Rn), sub_64)), 6407 NaImm:$Immd, NaImm:$Immn)), 6408 sub_64))>; 6409} 6410 6411defm : Neon_INS_elt_pattern<v4f32, v2f32, f32, neon_uimm2_bare, 6412 neon_uimm1_bare, INSELs>; 6413defm : Neon_INS_elt_pattern<v2f64, v1f64, f64, neon_uimm1_bare, 6414 neon_uimm0_bare, INSELd>; 6415defm : Neon_INS_elt_pattern<v16i8, v8i8, i32, neon_uimm4_bare, 6416 neon_uimm3_bare, INSELb>; 6417defm : Neon_INS_elt_pattern<v8i16, v4i16, i32, neon_uimm3_bare, 6418 neon_uimm2_bare, INSELh>; 6419defm : Neon_INS_elt_pattern<v4i32, v2i32, i32, neon_uimm2_bare, 6420 neon_uimm1_bare, INSELs>; 6421defm : Neon_INS_elt_pattern<v2i64, v1i64, i64, neon_uimm1_bare, 6422 neon_uimm0_bare, INSELd>; 6423 6424multiclass Neon_INS_elt_float_pattern<ValueType ResTy, ValueType NaTy, 6425 ValueType MidTy, 6426 RegisterClass OpFPR, Operand ResImm, 6427 SubRegIndex SubIndex, Instruction INS> { 6428def : Pat <(ResTy (vector_insert 6429 (ResTy VPR128:$src), 6430 (MidTy OpFPR:$Rn), 6431 (ResImm:$Imm))), 6432 (INS (ResTy VPR128:$src), 6433 (ResTy (SUBREG_TO_REG (i64 0), OpFPR:$Rn, SubIndex)), 6434 ResImm:$Imm, 6435 (i64 0))>; 6436 6437def : Pat <(NaTy (vector_insert 6438 (NaTy VPR64:$src), 6439 (MidTy OpFPR:$Rn), 6440 (ResImm:$Imm))), 6441 (NaTy (EXTRACT_SUBREG 6442 (ResTy (INS 6443 (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$src), sub_64)), 6444 (ResTy (SUBREG_TO_REG (i64 0), (MidTy OpFPR:$Rn), SubIndex)), 6445 ResImm:$Imm, 6446 (i64 0))), 6447 sub_64))>; 6448} 6449 6450defm : Neon_INS_elt_float_pattern<v4f32, v2f32, f32, FPR32, neon_uimm2_bare, 6451 sub_32, INSELs>; 6452defm : Neon_INS_elt_float_pattern<v2f64, v1f64, f64, FPR64, neon_uimm1_bare, 6453 sub_64, INSELd>; 6454 6455class NeonI_SMOV<string asmop, string Res, bit Q, 6456 ValueType OpTy, ValueType eleTy, 6457 Operand OpImm, RegisterClass ResGPR, ValueType ResTy> 6458 : NeonI_copy<Q, 0b0, 0b0101, 6459 (outs ResGPR:$Rd), (ins VPR128:$Rn, OpImm:$Imm), 6460 asmop # "\t$Rd, $Rn." # Res # "[$Imm]", 6461 [(set (ResTy ResGPR:$Rd), 6462 (ResTy (sext_inreg 6463 (ResTy (vector_extract 6464 (OpTy VPR128:$Rn), (OpImm:$Imm))), 6465 eleTy)))], 6466 NoItinerary> { 6467 bits<4> Imm; 6468} 6469 6470//Signed integer move (main, from element) 6471def SMOVwb : NeonI_SMOV<"smov", "b", 0b0, v16i8, i8, neon_uimm4_bare, 6472 GPR32, i32> { 6473 let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1}; 6474} 6475def SMOVwh : NeonI_SMOV<"smov", "h", 0b0, v8i16, i16, neon_uimm3_bare, 6476 GPR32, i32> { 6477 let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0}; 6478} 6479def SMOVxb : NeonI_SMOV<"smov", "b", 0b1, v16i8, i8, neon_uimm4_bare, 6480 GPR64, i64> { 6481 let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1}; 6482} 6483def SMOVxh : NeonI_SMOV<"smov", "h", 0b1, v8i16, i16, neon_uimm3_bare, 6484 GPR64, i64> { 6485 let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0}; 6486} 6487def SMOVxs : NeonI_SMOV<"smov", "s", 0b1, v4i32, i32, neon_uimm2_bare, 6488 GPR64, i64> { 6489 let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0}; 6490} 6491 6492multiclass Neon_SMOVx_pattern <ValueType StTy, ValueType NaTy, 6493 ValueType eleTy, Operand StImm, Operand NaImm, 6494 Instruction SMOVI> { 6495 def : Pat<(i64 (sext_inreg 6496 (i64 (anyext 6497 (i32 (vector_extract 6498 (StTy VPR128:$Rn), (StImm:$Imm))))), 6499 eleTy)), 6500 (SMOVI VPR128:$Rn, StImm:$Imm)>; 6501 6502 def : Pat<(i64 (sext 6503 (i32 (vector_extract 6504 (StTy VPR128:$Rn), (StImm:$Imm))))), 6505 (SMOVI VPR128:$Rn, StImm:$Imm)>; 6506 6507 def : Pat<(i64 (sext_inreg 6508 (i64 (vector_extract 6509 (NaTy VPR64:$Rn), (NaImm:$Imm))), 6510 eleTy)), 6511 (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), 6512 NaImm:$Imm)>; 6513 6514 def : Pat<(i64 (sext_inreg 6515 (i64 (anyext 6516 (i32 (vector_extract 6517 (NaTy VPR64:$Rn), (NaImm:$Imm))))), 6518 eleTy)), 6519 (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), 6520 NaImm:$Imm)>; 6521 6522 def : Pat<(i64 (sext 6523 (i32 (vector_extract 6524 (NaTy VPR64:$Rn), (NaImm:$Imm))))), 6525 (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), 6526 NaImm:$Imm)>; 6527} 6528 6529defm : Neon_SMOVx_pattern<v16i8, v8i8, i8, neon_uimm4_bare, 6530 neon_uimm3_bare, SMOVxb>; 6531defm : Neon_SMOVx_pattern<v8i16, v4i16, i16, neon_uimm3_bare, 6532 neon_uimm2_bare, SMOVxh>; 6533defm : Neon_SMOVx_pattern<v4i32, v2i32, i32, neon_uimm2_bare, 6534 neon_uimm1_bare, SMOVxs>; 6535 6536class Neon_SMOVw_pattern <ValueType StTy, ValueType NaTy, 6537 ValueType eleTy, Operand StImm, Operand NaImm, 6538 Instruction SMOVI> 6539 : Pat<(i32 (sext_inreg 6540 (i32 (vector_extract 6541 (NaTy VPR64:$Rn), (NaImm:$Imm))), 6542 eleTy)), 6543 (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), 6544 NaImm:$Imm)>; 6545 6546def : Neon_SMOVw_pattern<v16i8, v8i8, i8, neon_uimm4_bare, 6547 neon_uimm3_bare, SMOVwb>; 6548def : Neon_SMOVw_pattern<v8i16, v4i16, i16, neon_uimm3_bare, 6549 neon_uimm2_bare, SMOVwh>; 6550 6551class NeonI_UMOV<string asmop, string Res, bit Q, 6552 ValueType OpTy, Operand OpImm, 6553 RegisterClass ResGPR, ValueType ResTy> 6554 : NeonI_copy<Q, 0b0, 0b0111, 6555 (outs ResGPR:$Rd), (ins VPR128:$Rn, OpImm:$Imm), 6556 asmop # "\t$Rd, $Rn." # Res # "[$Imm]", 6557 [(set (ResTy ResGPR:$Rd), 6558 (ResTy (vector_extract 6559 (OpTy VPR128:$Rn), (OpImm:$Imm))))], 6560 NoItinerary> { 6561 bits<4> Imm; 6562} 6563 6564//Unsigned integer move (main, from element) 6565def UMOVwb : NeonI_UMOV<"umov", "b", 0b0, v16i8, neon_uimm4_bare, 6566 GPR32, i32> { 6567 let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1}; 6568} 6569def UMOVwh : NeonI_UMOV<"umov", "h", 0b0, v8i16, neon_uimm3_bare, 6570 GPR32, i32> { 6571 let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0}; 6572} 6573def UMOVws : NeonI_UMOV<"umov", "s", 0b0, v4i32, neon_uimm2_bare, 6574 GPR32, i32> { 6575 let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0}; 6576} 6577def UMOVxd : NeonI_UMOV<"umov", "d", 0b1, v2i64, neon_uimm1_bare, 6578 GPR64, i64> { 6579 let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0}; 6580} 6581 6582def : NeonInstAlias<"mov $Rd, $Rn.s[$Imm]", 6583 (UMOVws GPR32:$Rd, VPR128:$Rn, neon_uimm2_bare:$Imm), 0>; 6584def : NeonInstAlias<"mov $Rd, $Rn.d[$Imm]", 6585 (UMOVxd GPR64:$Rd, VPR128:$Rn, neon_uimm1_bare:$Imm), 0>; 6586 6587class Neon_UMOV_pattern <ValueType StTy, ValueType NaTy, ValueType ResTy, 6588 Operand StImm, Operand NaImm, 6589 Instruction SMOVI> 6590 : Pat<(ResTy (vector_extract 6591 (NaTy VPR64:$Rn), NaImm:$Imm)), 6592 (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), 6593 NaImm:$Imm)>; 6594 6595def : Neon_UMOV_pattern<v16i8, v8i8, i32, neon_uimm4_bare, 6596 neon_uimm3_bare, UMOVwb>; 6597def : Neon_UMOV_pattern<v8i16, v4i16, i32, neon_uimm3_bare, 6598 neon_uimm2_bare, UMOVwh>; 6599def : Neon_UMOV_pattern<v4i32, v2i32, i32, neon_uimm2_bare, 6600 neon_uimm1_bare, UMOVws>; 6601 6602def : Pat<(i32 (and 6603 (i32 (vector_extract 6604 (v16i8 VPR128:$Rn), (neon_uimm4_bare:$Imm))), 6605 255)), 6606 (UMOVwb VPR128:$Rn, neon_uimm4_bare:$Imm)>; 6607 6608def : Pat<(i32 (and 6609 (i32 (vector_extract 6610 (v8i16 VPR128:$Rn), (neon_uimm3_bare:$Imm))), 6611 65535)), 6612 (UMOVwh VPR128:$Rn, neon_uimm3_bare:$Imm)>; 6613 6614def : Pat<(i64 (zext 6615 (i32 (vector_extract 6616 (v2i64 VPR128:$Rn), (neon_uimm1_bare:$Imm))))), 6617 (UMOVxd VPR128:$Rn, neon_uimm1_bare:$Imm)>; 6618 6619def : Pat<(i32 (and 6620 (i32 (vector_extract 6621 (v8i8 VPR64:$Rn), (neon_uimm3_bare:$Imm))), 6622 255)), 6623 (UMOVwb (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64), 6624 neon_uimm3_bare:$Imm)>; 6625 6626def : Pat<(i32 (and 6627 (i32 (vector_extract 6628 (v4i16 VPR64:$Rn), (neon_uimm2_bare:$Imm))), 6629 65535)), 6630 (UMOVwh (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64), 6631 neon_uimm2_bare:$Imm)>; 6632 6633def : Pat<(i64 (zext 6634 (i32 (vector_extract 6635 (v1i64 VPR64:$Rn), (neon_uimm0_bare:$Imm))))), 6636 (UMOVxd (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64), 6637 neon_uimm0_bare:$Imm)>; 6638 6639// Additional copy patterns for scalar types 6640def : Pat<(i32 (vector_extract (v1i8 FPR8:$Rn), (i64 0))), 6641 (UMOVwb (v16i8 6642 (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8)), (i64 0))>; 6643 6644def : Pat<(i32 (vector_extract (v1i16 FPR16:$Rn), (i64 0))), 6645 (UMOVwh (v8i16 6646 (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16)), (i64 0))>; 6647 6648def : Pat<(i32 (vector_extract (v1i32 FPR32:$Rn), (i64 0))), 6649 (FMOVws FPR32:$Rn)>; 6650 6651def : Pat<(i64 (vector_extract (v1i64 FPR64:$Rn), (i64 0))), 6652 (FMOVxd FPR64:$Rn)>; 6653 6654def : Pat<(f64 (vector_extract (v1f64 FPR64:$Rn), (i64 0))), 6655 (f64 FPR64:$Rn)>; 6656 6657def : Pat<(f32 (vector_extract (v1f32 FPR32:$Rn), (i64 0))), 6658 (f32 FPR32:$Rn)>; 6659 6660def : Pat<(v1i8 (scalar_to_vector GPR32:$Rn)), 6661 (v1i8 (EXTRACT_SUBREG (v16i8 6662 (INSbw (v16i8 (IMPLICIT_DEF)), $Rn, (i64 0))), 6663 sub_8))>; 6664 6665def : Pat<(v1i16 (scalar_to_vector GPR32:$Rn)), 6666 (v1i16 (EXTRACT_SUBREG (v8i16 6667 (INShw (v8i16 (IMPLICIT_DEF)), $Rn, (i64 0))), 6668 sub_16))>; 6669 6670def : Pat<(v1i32 (scalar_to_vector GPR32:$src)), 6671 (FMOVsw $src)>; 6672 6673def : Pat<(v1i64 (scalar_to_vector GPR64:$src)), 6674 (FMOVdx $src)>; 6675 6676def : Pat<(v1f32 (scalar_to_vector (f32 FPR32:$Rn))), 6677 (v1f32 FPR32:$Rn)>; 6678def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$Rn))), 6679 (v1f64 FPR64:$Rn)>; 6680 6681def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$src))), 6682 (FMOVdd $src)>; 6683 6684def : Pat<(v2f64 (scalar_to_vector (f64 FPR64:$src))), 6685 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), 6686 (f64 FPR64:$src), sub_64)>; 6687 6688class NeonI_DUP_Elt<bit Q, string asmop, string rdlane, string rnlane, 6689 RegisterOperand ResVPR, Operand OpImm> 6690 : NeonI_copy<Q, 0b0, 0b0000, (outs ResVPR:$Rd), 6691 (ins VPR128:$Rn, OpImm:$Imm), 6692 asmop # "\t$Rd" # rdlane # ", $Rn" # rnlane # "[$Imm]", 6693 [], 6694 NoItinerary> { 6695 bits<4> Imm; 6696} 6697 6698def DUPELT16b : NeonI_DUP_Elt<0b1, "dup", ".16b", ".b", VPR128, 6699 neon_uimm4_bare> { 6700 let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1}; 6701} 6702 6703def DUPELT8h : NeonI_DUP_Elt<0b1, "dup", ".8h", ".h", VPR128, 6704 neon_uimm3_bare> { 6705 let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0}; 6706} 6707 6708def DUPELT4s : NeonI_DUP_Elt<0b1, "dup", ".4s", ".s", VPR128, 6709 neon_uimm2_bare> { 6710 let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0}; 6711} 6712 6713def DUPELT2d : NeonI_DUP_Elt<0b1, "dup", ".2d", ".d", VPR128, 6714 neon_uimm1_bare> { 6715 let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0}; 6716} 6717 6718def DUPELT8b : NeonI_DUP_Elt<0b0, "dup", ".8b", ".b", VPR64, 6719 neon_uimm4_bare> { 6720 let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1}; 6721} 6722 6723def DUPELT4h : NeonI_DUP_Elt<0b0, "dup", ".4h", ".h", VPR64, 6724 neon_uimm3_bare> { 6725 let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0}; 6726} 6727 6728def DUPELT2s : NeonI_DUP_Elt<0b0, "dup", ".2s", ".s", VPR64, 6729 neon_uimm2_bare> { 6730 let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0}; 6731} 6732 6733multiclass NeonI_DUP_Elt_pattern<Instruction DUPELT, ValueType ResTy, 6734 ValueType OpTy,ValueType NaTy, 6735 ValueType ExTy, Operand OpLImm, 6736 Operand OpNImm> { 6737def : Pat<(ResTy (Neon_vduplane (OpTy VPR128:$Rn), OpLImm:$Imm)), 6738 (ResTy (DUPELT (OpTy VPR128:$Rn), OpLImm:$Imm))>; 6739 6740def : Pat<(ResTy (Neon_vduplane 6741 (NaTy VPR64:$Rn), OpNImm:$Imm)), 6742 (ResTy (DUPELT 6743 (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), OpNImm:$Imm))>; 6744} 6745defm : NeonI_DUP_Elt_pattern<DUPELT16b, v16i8, v16i8, v8i8, v16i8, 6746 neon_uimm4_bare, neon_uimm3_bare>; 6747defm : NeonI_DUP_Elt_pattern<DUPELT8b, v8i8, v16i8, v8i8, v16i8, 6748 neon_uimm4_bare, neon_uimm3_bare>; 6749defm : NeonI_DUP_Elt_pattern<DUPELT8h, v8i16, v8i16, v4i16, v8i16, 6750 neon_uimm3_bare, neon_uimm2_bare>; 6751defm : NeonI_DUP_Elt_pattern<DUPELT4h, v4i16, v8i16, v4i16, v8i16, 6752 neon_uimm3_bare, neon_uimm2_bare>; 6753defm : NeonI_DUP_Elt_pattern<DUPELT4s, v4i32, v4i32, v2i32, v4i32, 6754 neon_uimm2_bare, neon_uimm1_bare>; 6755defm : NeonI_DUP_Elt_pattern<DUPELT2s, v2i32, v4i32, v2i32, v4i32, 6756 neon_uimm2_bare, neon_uimm1_bare>; 6757defm : NeonI_DUP_Elt_pattern<DUPELT2d, v2i64, v2i64, v1i64, v2i64, 6758 neon_uimm1_bare, neon_uimm0_bare>; 6759defm : NeonI_DUP_Elt_pattern<DUPELT4s, v4f32, v4f32, v2f32, v4f32, 6760 neon_uimm2_bare, neon_uimm1_bare>; 6761defm : NeonI_DUP_Elt_pattern<DUPELT2s, v2f32, v4f32, v2f32, v4f32, 6762 neon_uimm2_bare, neon_uimm1_bare>; 6763defm : NeonI_DUP_Elt_pattern<DUPELT2d, v2f64, v2f64, v1f64, v2f64, 6764 neon_uimm1_bare, neon_uimm0_bare>; 6765 6766def : Pat<(v2f32 (Neon_vdup (f32 FPR32:$Rn))), 6767 (v2f32 (DUPELT2s 6768 (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32), 6769 (i64 0)))>; 6770def : Pat<(v4f32 (Neon_vdup (f32 FPR32:$Rn))), 6771 (v4f32 (DUPELT4s 6772 (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32), 6773 (i64 0)))>; 6774def : Pat<(v2f64 (Neon_vdup (f64 FPR64:$Rn))), 6775 (v2f64 (DUPELT2d 6776 (SUBREG_TO_REG (i64 0), FPR64:$Rn, sub_64), 6777 (i64 0)))>; 6778 6779class NeonI_DUP<bit Q, string asmop, string rdlane, 6780 RegisterOperand ResVPR, ValueType ResTy, 6781 RegisterClass OpGPR, ValueType OpTy> 6782 : NeonI_copy<Q, 0b0, 0b0001, (outs ResVPR:$Rd), (ins OpGPR:$Rn), 6783 asmop # "\t$Rd" # rdlane # ", $Rn", 6784 [(set (ResTy ResVPR:$Rd), 6785 (ResTy (Neon_vdup (OpTy OpGPR:$Rn))))], 6786 NoItinerary>; 6787 6788def DUP16b : NeonI_DUP<0b1, "dup", ".16b", VPR128, v16i8, GPR32, i32> { 6789 let Inst{20-16} = 0b00001; 6790 // bits 17-20 are unspecified, but should be set to zero. 6791} 6792 6793def DUP8h : NeonI_DUP<0b1, "dup", ".8h", VPR128, v8i16, GPR32, i32> { 6794 let Inst{20-16} = 0b00010; 6795 // bits 18-20 are unspecified, but should be set to zero. 6796} 6797 6798def DUP4s : NeonI_DUP<0b1, "dup", ".4s", VPR128, v4i32, GPR32, i32> { 6799 let Inst{20-16} = 0b00100; 6800 // bits 19-20 are unspecified, but should be set to zero. 6801} 6802 6803def DUP2d : NeonI_DUP<0b1, "dup", ".2d", VPR128, v2i64, GPR64, i64> { 6804 let Inst{20-16} = 0b01000; 6805 // bit 20 is unspecified, but should be set to zero. 6806} 6807 6808def DUP8b : NeonI_DUP<0b0, "dup", ".8b", VPR64, v8i8, GPR32, i32> { 6809 let Inst{20-16} = 0b00001; 6810 // bits 17-20 are unspecified, but should be set to zero. 6811} 6812 6813def DUP4h : NeonI_DUP<0b0, "dup", ".4h", VPR64, v4i16, GPR32, i32> { 6814 let Inst{20-16} = 0b00010; 6815 // bits 18-20 are unspecified, but should be set to zero. 6816} 6817 6818def DUP2s : NeonI_DUP<0b0, "dup", ".2s", VPR64, v2i32, GPR32, i32> { 6819 let Inst{20-16} = 0b00100; 6820 // bits 19-20 are unspecified, but should be set to zero. 6821} 6822 6823// patterns for CONCAT_VECTORS 6824multiclass Concat_Vector_Pattern<ValueType ResTy, ValueType OpTy> { 6825def : Pat<(ResTy (concat_vectors (OpTy VPR64:$Rn), undef)), 6826 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)>; 6827def : Pat<(ResTy (concat_vectors (OpTy VPR64:$Rn), (OpTy VPR64:$Rm))), 6828 (INSELd 6829 (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), 6830 (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$Rm, sub_64)), 6831 (i64 1), 6832 (i64 0))>; 6833def : Pat<(ResTy (concat_vectors (OpTy VPR64:$Rn), (OpTy VPR64:$Rn))), 6834 (DUPELT2d 6835 (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), 6836 (i64 0))> ; 6837} 6838 6839defm : Concat_Vector_Pattern<v16i8, v8i8>; 6840defm : Concat_Vector_Pattern<v8i16, v4i16>; 6841defm : Concat_Vector_Pattern<v4i32, v2i32>; 6842defm : Concat_Vector_Pattern<v2i64, v1i64>; 6843defm : Concat_Vector_Pattern<v4f32, v2f32>; 6844defm : Concat_Vector_Pattern<v2f64, v1f64>; 6845 6846//patterns for EXTRACT_SUBVECTOR 6847def : Pat<(v8i8 (extract_subvector (v16i8 VPR128:$Rn), (i64 0))), 6848 (v8i8 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>; 6849def : Pat<(v4i16 (extract_subvector (v8i16 VPR128:$Rn), (i64 0))), 6850 (v4i16 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>; 6851def : Pat<(v2i32 (extract_subvector (v4i32 VPR128:$Rn), (i64 0))), 6852 (v2i32 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>; 6853def : Pat<(v1i64 (extract_subvector (v2i64 VPR128:$Rn), (i64 0))), 6854 (v1i64 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>; 6855def : Pat<(v2f32 (extract_subvector (v4f32 VPR128:$Rn), (i64 0))), 6856 (v2f32 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>; 6857def : Pat<(v1f64 (extract_subvector (v2f64 VPR128:$Rn), (i64 0))), 6858 (v1f64 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>; 6859 6860// The followings are for instruction class (3V Elem) 6861 6862// Variant 1 6863 6864class NI_2VE<bit q, bit u, bits<2> size, bits<4> opcode, 6865 string asmop, string ResS, string OpS, string EleOpS, 6866 Operand OpImm, RegisterOperand ResVPR, 6867 RegisterOperand OpVPR, RegisterOperand EleOpVPR> 6868 : NeonI_2VElem<q, u, size, opcode, 6869 (outs ResVPR:$Rd), (ins ResVPR:$src, OpVPR:$Rn, 6870 EleOpVPR:$Re, OpImm:$Index), 6871 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # 6872 ", $Re." # EleOpS # "[$Index]", 6873 [], 6874 NoItinerary> { 6875 bits<3> Index; 6876 bits<5> Re; 6877 6878 let Constraints = "$src = $Rd"; 6879} 6880 6881multiclass NI_2VE_v1<bit u, bits<4> opcode, string asmop> { 6882 // vector register class for element is always 128-bit to cover the max index 6883 def _2s4s : NI_2VE<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s", 6884 neon_uimm2_bare, VPR64, VPR64, VPR128> { 6885 let Inst{11} = {Index{1}}; 6886 let Inst{21} = {Index{0}}; 6887 let Inst{20-16} = Re; 6888 } 6889 6890 def _4s4s : NI_2VE<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s", 6891 neon_uimm2_bare, VPR128, VPR128, VPR128> { 6892 let Inst{11} = {Index{1}}; 6893 let Inst{21} = {Index{0}}; 6894 let Inst{20-16} = Re; 6895 } 6896 6897 // Index operations on 16-bit(H) elements are restricted to using v0-v15. 6898 def _4h8h : NI_2VE<0b0, u, 0b01, opcode, asmop, "4h", "4h", "h", 6899 neon_uimm3_bare, VPR64, VPR64, VPR128Lo> { 6900 let Inst{11} = {Index{2}}; 6901 let Inst{21} = {Index{1}}; 6902 let Inst{20} = {Index{0}}; 6903 let Inst{19-16} = Re{3-0}; 6904 } 6905 6906 def _8h8h : NI_2VE<0b1, u, 0b01, opcode, asmop, "8h", "8h", "h", 6907 neon_uimm3_bare, VPR128, VPR128, VPR128Lo> { 6908 let Inst{11} = {Index{2}}; 6909 let Inst{21} = {Index{1}}; 6910 let Inst{20} = {Index{0}}; 6911 let Inst{19-16} = Re{3-0}; 6912 } 6913} 6914 6915defm MLAvve : NI_2VE_v1<0b1, 0b0000, "mla">; 6916defm MLSvve : NI_2VE_v1<0b1, 0b0100, "mls">; 6917 6918// Pattern for lane in 128-bit vector 6919class NI_2VE_laneq<Instruction INST, Operand OpImm, SDPatternOperator op, 6920 RegisterOperand ResVPR, RegisterOperand OpVPR, 6921 RegisterOperand EleOpVPR, ValueType ResTy, ValueType OpTy, 6922 ValueType EleOpTy> 6923 : Pat<(ResTy (op (ResTy ResVPR:$src), (OpTy OpVPR:$Rn), 6924 (OpTy (Neon_vduplane (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))), 6925 (INST ResVPR:$src, OpVPR:$Rn, EleOpVPR:$Re, OpImm:$Index)>; 6926 6927// Pattern for lane in 64-bit vector 6928class NI_2VE_lane<Instruction INST, Operand OpImm, SDPatternOperator op, 6929 RegisterOperand ResVPR, RegisterOperand OpVPR, 6930 RegisterOperand EleOpVPR, ValueType ResTy, ValueType OpTy, 6931 ValueType EleOpTy> 6932 : Pat<(ResTy (op (ResTy ResVPR:$src), (OpTy OpVPR:$Rn), 6933 (OpTy (Neon_vduplane (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))), 6934 (INST ResVPR:$src, OpVPR:$Rn, 6935 (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>; 6936 6937multiclass NI_2VE_v1_pat<string subop, SDPatternOperator op> 6938{ 6939 def : NI_2VE_laneq<!cast<Instruction>(subop # "_2s4s"), neon_uimm2_bare, 6940 op, VPR64, VPR64, VPR128, v2i32, v2i32, v4i32>; 6941 6942 def : NI_2VE_laneq<!cast<Instruction>(subop # "_4s4s"), neon_uimm2_bare, 6943 op, VPR128, VPR128, VPR128, v4i32, v4i32, v4i32>; 6944 6945 def : NI_2VE_laneq<!cast<Instruction>(subop # "_4h8h"), neon_uimm3_bare, 6946 op, VPR64, VPR64, VPR128Lo, v4i16, v4i16, v8i16>; 6947 6948 def : NI_2VE_laneq<!cast<Instruction>(subop # "_8h8h"), neon_uimm3_bare, 6949 op, VPR128, VPR128, VPR128Lo, v8i16, v8i16, v8i16>; 6950 6951 // Index can only be half of the max value for lane in 64-bit vector 6952 6953 def : NI_2VE_lane<!cast<Instruction>(subop # "_2s4s"), neon_uimm1_bare, 6954 op, VPR64, VPR64, VPR64, v2i32, v2i32, v2i32>; 6955 6956 def : NI_2VE_lane<!cast<Instruction>(subop # "_4h8h"), neon_uimm2_bare, 6957 op, VPR64, VPR64, VPR64Lo, v4i16, v4i16, v4i16>; 6958} 6959 6960defm MLA_lane_v1 : NI_2VE_v1_pat<"MLAvve", Neon_mla>; 6961defm MLS_lane_v1 : NI_2VE_v1_pat<"MLSvve", Neon_mls>; 6962 6963class NI_2VE_2op<bit q, bit u, bits<2> size, bits<4> opcode, 6964 string asmop, string ResS, string OpS, string EleOpS, 6965 Operand OpImm, RegisterOperand ResVPR, 6966 RegisterOperand OpVPR, RegisterOperand EleOpVPR> 6967 : NeonI_2VElem<q, u, size, opcode, 6968 (outs ResVPR:$Rd), (ins OpVPR:$Rn, 6969 EleOpVPR:$Re, OpImm:$Index), 6970 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # 6971 ", $Re." # EleOpS # "[$Index]", 6972 [], 6973 NoItinerary> { 6974 bits<3> Index; 6975 bits<5> Re; 6976} 6977 6978multiclass NI_2VE_v1_2op<bit u, bits<4> opcode, string asmop> { 6979 // vector register class for element is always 128-bit to cover the max index 6980 def _2s4s : NI_2VE_2op<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s", 6981 neon_uimm2_bare, VPR64, VPR64, VPR128> { 6982 let Inst{11} = {Index{1}}; 6983 let Inst{21} = {Index{0}}; 6984 let Inst{20-16} = Re; 6985 } 6986 6987 def _4s4s : NI_2VE_2op<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s", 6988 neon_uimm2_bare, VPR128, VPR128, VPR128> { 6989 let Inst{11} = {Index{1}}; 6990 let Inst{21} = {Index{0}}; 6991 let Inst{20-16} = Re; 6992 } 6993 6994 // Index operations on 16-bit(H) elements are restricted to using v0-v15. 6995 def _4h8h : NI_2VE_2op<0b0, u, 0b01, opcode, asmop, "4h", "4h", "h", 6996 neon_uimm3_bare, VPR64, VPR64, VPR128Lo> { 6997 let Inst{11} = {Index{2}}; 6998 let Inst{21} = {Index{1}}; 6999 let Inst{20} = {Index{0}}; 7000 let Inst{19-16} = Re{3-0}; 7001 } 7002 7003 def _8h8h : NI_2VE_2op<0b1, u, 0b01, opcode, asmop, "8h", "8h", "h", 7004 neon_uimm3_bare, VPR128, VPR128, VPR128Lo> { 7005 let Inst{11} = {Index{2}}; 7006 let Inst{21} = {Index{1}}; 7007 let Inst{20} = {Index{0}}; 7008 let Inst{19-16} = Re{3-0}; 7009 } 7010} 7011 7012defm MULve : NI_2VE_v1_2op<0b0, 0b1000, "mul">; 7013defm SQDMULHve : NI_2VE_v1_2op<0b0, 0b1100, "sqdmulh">; 7014defm SQRDMULHve : NI_2VE_v1_2op<0b0, 0b1101, "sqrdmulh">; 7015 7016// Pattern for lane in 128-bit vector 7017class NI_2VE_mul_laneq<Instruction INST, Operand OpImm, SDPatternOperator op, 7018 RegisterOperand OpVPR, RegisterOperand EleOpVPR, 7019 ValueType ResTy, ValueType OpTy, ValueType EleOpTy> 7020 : Pat<(ResTy (op (OpTy OpVPR:$Rn), 7021 (OpTy (Neon_vduplane (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))), 7022 (INST OpVPR:$Rn, EleOpVPR:$Re, OpImm:$Index)>; 7023 7024// Pattern for lane in 64-bit vector 7025class NI_2VE_mul_lane<Instruction INST, Operand OpImm, SDPatternOperator op, 7026 RegisterOperand OpVPR, RegisterOperand EleOpVPR, 7027 ValueType ResTy, ValueType OpTy, ValueType EleOpTy> 7028 : Pat<(ResTy (op (OpTy OpVPR:$Rn), 7029 (OpTy (Neon_vduplane (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))), 7030 (INST OpVPR:$Rn, 7031 (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>; 7032 7033multiclass NI_2VE_mul_v1_pat<string subop, SDPatternOperator op> { 7034 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_2s4s"), neon_uimm2_bare, 7035 op, VPR64, VPR128, v2i32, v2i32, v4i32>; 7036 7037 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_4s4s"), neon_uimm2_bare, 7038 op, VPR128, VPR128, v4i32, v4i32, v4i32>; 7039 7040 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_4h8h"), neon_uimm3_bare, 7041 op, VPR64, VPR128Lo, v4i16, v4i16, v8i16>; 7042 7043 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_8h8h"), neon_uimm3_bare, 7044 op, VPR128, VPR128Lo, v8i16, v8i16, v8i16>; 7045 7046 // Index can only be half of the max value for lane in 64-bit vector 7047 7048 def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_2s4s"), neon_uimm1_bare, 7049 op, VPR64, VPR64, v2i32, v2i32, v2i32>; 7050 7051 def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_4h8h"), neon_uimm2_bare, 7052 op, VPR64, VPR64Lo, v4i16, v4i16, v4i16>; 7053} 7054 7055defm MUL_lane_v1 : NI_2VE_mul_v1_pat<"MULve", mul>; 7056defm SQDMULH_lane_v1 : NI_2VE_mul_v1_pat<"SQDMULHve", int_arm_neon_vqdmulh>; 7057defm SQRDMULH_lane_v1 : NI_2VE_mul_v1_pat<"SQRDMULHve", int_arm_neon_vqrdmulh>; 7058 7059// Variant 2 7060 7061multiclass NI_2VE_v2_2op<bit u, bits<4> opcode, string asmop> { 7062 // vector register class for element is always 128-bit to cover the max index 7063 def _2s4s : NI_2VE_2op<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s", 7064 neon_uimm2_bare, VPR64, VPR64, VPR128> { 7065 let Inst{11} = {Index{1}}; 7066 let Inst{21} = {Index{0}}; 7067 let Inst{20-16} = Re; 7068 } 7069 7070 def _4s4s : NI_2VE_2op<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s", 7071 neon_uimm2_bare, VPR128, VPR128, VPR128> { 7072 let Inst{11} = {Index{1}}; 7073 let Inst{21} = {Index{0}}; 7074 let Inst{20-16} = Re; 7075 } 7076 7077 // _1d2d doesn't exist! 7078 7079 def _2d2d : NI_2VE_2op<0b1, u, 0b11, opcode, asmop, "2d", "2d", "d", 7080 neon_uimm1_bare, VPR128, VPR128, VPR128> { 7081 let Inst{11} = {Index{0}}; 7082 let Inst{21} = 0b0; 7083 let Inst{20-16} = Re; 7084 } 7085} 7086 7087defm FMULve : NI_2VE_v2_2op<0b0, 0b1001, "fmul">; 7088defm FMULXve : NI_2VE_v2_2op<0b1, 0b1001, "fmulx">; 7089 7090class NI_2VE_mul_lane_2d<Instruction INST, Operand OpImm, SDPatternOperator op, 7091 RegisterOperand OpVPR, RegisterOperand EleOpVPR, 7092 ValueType ResTy, ValueType OpTy, ValueType EleOpTy, 7093 SDPatternOperator coreop> 7094 : Pat<(ResTy (op (OpTy OpVPR:$Rn), 7095 (OpTy (coreop (EleOpTy EleOpVPR:$Re), (EleOpTy EleOpVPR:$Re))))), 7096 (INST OpVPR:$Rn, 7097 (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), 0)>; 7098 7099multiclass NI_2VE_mul_v2_pat<string subop, SDPatternOperator op> { 7100 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_2s4s"), neon_uimm2_bare, 7101 op, VPR64, VPR128, v2f32, v2f32, v4f32>; 7102 7103 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_4s4s"), neon_uimm2_bare, 7104 op, VPR128, VPR128, v4f32, v4f32, v4f32>; 7105 7106 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_2d2d"), neon_uimm1_bare, 7107 op, VPR128, VPR128, v2f64, v2f64, v2f64>; 7108 7109 // Index can only be half of the max value for lane in 64-bit vector 7110 7111 def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_2s4s"), neon_uimm1_bare, 7112 op, VPR64, VPR64, v2f32, v2f32, v2f32>; 7113 7114 def : NI_2VE_mul_lane_2d<!cast<Instruction>(subop # "_2d2d"), neon_uimm1_bare, 7115 op, VPR128, VPR64, v2f64, v2f64, v1f64, 7116 BinOpFrag<(Neon_combine_2d node:$LHS, node:$RHS)>>; 7117} 7118 7119defm FMUL_lane_v2 : NI_2VE_mul_v2_pat<"FMULve", fmul>; 7120defm FMULX_lane_v2 : NI_2VE_mul_v2_pat<"FMULXve", int_aarch64_neon_vmulx>; 7121 7122def : Pat<(v2f32 (fmul (v2f32 (Neon_vdup (f32 FPR32:$Re))), 7123 (v2f32 VPR64:$Rn))), 7124 (FMULve_2s4s VPR64:$Rn, (SUBREG_TO_REG (i32 0), $Re, sub_32), 0)>; 7125 7126def : Pat<(v4f32 (fmul (v4f32 (Neon_vdup (f32 FPR32:$Re))), 7127 (v4f32 VPR128:$Rn))), 7128 (FMULve_4s4s VPR128:$Rn, (SUBREG_TO_REG (i32 0), $Re, sub_32), 0)>; 7129 7130def : Pat<(v2f64 (fmul (v2f64 (Neon_vdup (f64 FPR64:$Re))), 7131 (v2f64 VPR128:$Rn))), 7132 (FMULve_2d2d VPR128:$Rn, (SUBREG_TO_REG (i64 0), $Re, sub_64), 0)>; 7133 7134// The followings are patterns using fma 7135// -ffp-contract=fast generates fma 7136 7137multiclass NI_2VE_v2<bit u, bits<4> opcode, string asmop> { 7138 // vector register class for element is always 128-bit to cover the max index 7139 def _2s4s : NI_2VE<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s", 7140 neon_uimm2_bare, VPR64, VPR64, VPR128> { 7141 let Inst{11} = {Index{1}}; 7142 let Inst{21} = {Index{0}}; 7143 let Inst{20-16} = Re; 7144 } 7145 7146 def _4s4s : NI_2VE<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s", 7147 neon_uimm2_bare, VPR128, VPR128, VPR128> { 7148 let Inst{11} = {Index{1}}; 7149 let Inst{21} = {Index{0}}; 7150 let Inst{20-16} = Re; 7151 } 7152 7153 // _1d2d doesn't exist! 7154 7155 def _2d2d : NI_2VE<0b1, u, 0b11, opcode, asmop, "2d", "2d", "d", 7156 neon_uimm1_bare, VPR128, VPR128, VPR128> { 7157 let Inst{11} = {Index{0}}; 7158 let Inst{21} = 0b0; 7159 let Inst{20-16} = Re; 7160 } 7161} 7162 7163defm FMLAvve : NI_2VE_v2<0b0, 0b0001, "fmla">; 7164defm FMLSvve : NI_2VE_v2<0b0, 0b0101, "fmls">; 7165 7166// Pattern for lane in 128-bit vector 7167class NI_2VEswap_laneq<Instruction INST, Operand OpImm, SDPatternOperator op, 7168 RegisterOperand ResVPR, RegisterOperand OpVPR, 7169 ValueType ResTy, ValueType OpTy, 7170 SDPatternOperator coreop> 7171 : Pat<(ResTy (op (ResTy (coreop (OpTy OpVPR:$Re), (i64 OpImm:$Index))), 7172 (ResTy ResVPR:$src), (ResTy ResVPR:$Rn))), 7173 (INST ResVPR:$src, ResVPR:$Rn, OpVPR:$Re, OpImm:$Index)>; 7174 7175// Pattern for lane 0 7176class NI_2VEfma_lane0<Instruction INST, SDPatternOperator op, 7177 RegisterOperand ResVPR, ValueType ResTy> 7178 : Pat<(ResTy (op (ResTy ResVPR:$Rn), 7179 (ResTy (Neon_vdup (f32 FPR32:$Re))), 7180 (ResTy ResVPR:$src))), 7181 (INST ResVPR:$src, ResVPR:$Rn, 7182 (SUBREG_TO_REG (i32 0), $Re, sub_32), 0)>; 7183 7184// Pattern for lane in 64-bit vector 7185class NI_2VEswap_lane<Instruction INST, Operand OpImm, SDPatternOperator op, 7186 RegisterOperand ResVPR, RegisterOperand OpVPR, 7187 ValueType ResTy, ValueType OpTy, 7188 SDPatternOperator coreop> 7189 : Pat<(ResTy (op (ResTy (coreop (OpTy OpVPR:$Re), (i64 OpImm:$Index))), 7190 (ResTy ResVPR:$Rn), (ResTy ResVPR:$src))), 7191 (INST ResVPR:$src, ResVPR:$Rn, 7192 (SUBREG_TO_REG (i64 0), OpVPR:$Re, sub_64), OpImm:$Index)>; 7193 7194// Pattern for lane in 64-bit vector 7195class NI_2VEswap_lane_2d2d<Instruction INST, Operand OpImm, 7196 SDPatternOperator op, 7197 RegisterOperand ResVPR, RegisterOperand OpVPR, 7198 ValueType ResTy, ValueType OpTy, 7199 SDPatternOperator coreop> 7200 : Pat<(ResTy (op (ResTy (coreop (OpTy OpVPR:$Re), (OpTy OpVPR:$Re))), 7201 (ResTy ResVPR:$Rn), (ResTy ResVPR:$src))), 7202 (INST ResVPR:$src, ResVPR:$Rn, 7203 (SUBREG_TO_REG (i64 0), OpVPR:$Re, sub_64), 0)>; 7204 7205 7206multiclass NI_2VE_fma_v2_pat<string subop, SDPatternOperator op> { 7207 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2s4s"), 7208 neon_uimm2_bare, op, VPR64, VPR128, v2f32, v4f32, 7209 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>; 7210 7211 def : NI_2VEfma_lane0<!cast<Instruction>(subop # "_2s4s"), 7212 op, VPR64, v2f32>; 7213 7214 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_4s4s"), 7215 neon_uimm2_bare, op, VPR128, VPR128, v4f32, v4f32, 7216 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>; 7217 7218 def : NI_2VEfma_lane0<!cast<Instruction>(subop # "_4s4s"), 7219 op, VPR128, v4f32>; 7220 7221 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2d2d"), 7222 neon_uimm1_bare, op, VPR128, VPR128, v2f64, v2f64, 7223 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>; 7224 7225 // Index can only be half of the max value for lane in 64-bit vector 7226 7227 def : NI_2VEswap_lane<!cast<Instruction>(subop # "_2s4s"), 7228 neon_uimm1_bare, op, VPR64, VPR64, v2f32, v2f32, 7229 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>; 7230 7231 def : NI_2VEswap_lane_2d2d<!cast<Instruction>(subop # "_2d2d"), 7232 neon_uimm1_bare, op, VPR128, VPR64, v2f64, v1f64, 7233 BinOpFrag<(Neon_combine_2d node:$LHS, node:$RHS)>>; 7234} 7235 7236defm FMLA_lane_v2_s : NI_2VE_fma_v2_pat<"FMLAvve", fma>; 7237 7238// Pattern for lane 0 7239class NI_2VEfms_lane0<Instruction INST, SDPatternOperator op, 7240 RegisterOperand ResVPR, ValueType ResTy> 7241 : Pat<(ResTy (op (ResTy (fneg ResVPR:$Rn)), 7242 (ResTy (Neon_vdup (f32 FPR32:$Re))), 7243 (ResTy ResVPR:$src))), 7244 (INST ResVPR:$src, ResVPR:$Rn, 7245 (SUBREG_TO_REG (i32 0), $Re, sub_32), 0)>; 7246 7247multiclass NI_2VE_fms_v2_pat<string subop, SDPatternOperator op> 7248{ 7249 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2s4s"), 7250 neon_uimm2_bare, op, VPR64, VPR128, v2f32, v4f32, 7251 BinOpFrag<(fneg (Neon_vduplane node:$LHS, node:$RHS))>>; 7252 7253 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2s4s"), 7254 neon_uimm2_bare, op, VPR64, VPR128, v2f32, v4f32, 7255 BinOpFrag<(Neon_vduplane 7256 (fneg node:$LHS), node:$RHS)>>; 7257 7258 def : NI_2VEfms_lane0<!cast<Instruction>(subop # "_2s4s"), 7259 op, VPR64, v2f32>; 7260 7261 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_4s4s"), 7262 neon_uimm2_bare, op, VPR128, VPR128, v4f32, v4f32, 7263 BinOpFrag<(fneg (Neon_vduplane 7264 node:$LHS, node:$RHS))>>; 7265 7266 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_4s4s"), 7267 neon_uimm2_bare, op, VPR128, VPR128, v4f32, v4f32, 7268 BinOpFrag<(Neon_vduplane 7269 (fneg node:$LHS), node:$RHS)>>; 7270 7271 def : NI_2VEfms_lane0<!cast<Instruction>(subop # "_4s4s"), 7272 op, VPR128, v4f32>; 7273 7274 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2d2d"), 7275 neon_uimm1_bare, op, VPR128, VPR128, v2f64, v2f64, 7276 BinOpFrag<(fneg (Neon_vduplane 7277 node:$LHS, node:$RHS))>>; 7278 7279 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2d2d"), 7280 neon_uimm1_bare, op, VPR128, VPR128, v2f64, v2f64, 7281 BinOpFrag<(Neon_vduplane 7282 (fneg node:$LHS), node:$RHS)>>; 7283 7284 // Index can only be half of the max value for lane in 64-bit vector 7285 7286 def : NI_2VEswap_lane<!cast<Instruction>(subop # "_2s4s"), 7287 neon_uimm1_bare, op, VPR64, VPR64, v2f32, v2f32, 7288 BinOpFrag<(fneg (Neon_vduplane 7289 node:$LHS, node:$RHS))>>; 7290 7291 def : NI_2VEswap_lane<!cast<Instruction>(subop # "_2s4s"), 7292 neon_uimm1_bare, op, VPR64, VPR64, v2f32, v2f32, 7293 BinOpFrag<(Neon_vduplane 7294 (fneg node:$LHS), node:$RHS)>>; 7295 7296 def : NI_2VEswap_lane<!cast<Instruction>(subop # "_4s4s"), 7297 neon_uimm1_bare, op, VPR128, VPR64, v4f32, v2f32, 7298 BinOpFrag<(fneg (Neon_vduplane node:$LHS, node:$RHS))>>; 7299 7300 def : NI_2VEswap_lane<!cast<Instruction>(subop # "_4s4s"), 7301 neon_uimm1_bare, op, VPR128, VPR64, v4f32, v2f32, 7302 BinOpFrag<(Neon_vduplane (fneg node:$LHS), node:$RHS)>>; 7303 7304 def : NI_2VEswap_lane_2d2d<!cast<Instruction>(subop # "_2d2d"), 7305 neon_uimm1_bare, op, VPR128, VPR64, v2f64, v1f64, 7306 BinOpFrag<(fneg (Neon_combine_2d 7307 node:$LHS, node:$RHS))>>; 7308 7309 def : NI_2VEswap_lane_2d2d<!cast<Instruction>(subop # "_2d2d"), 7310 neon_uimm1_bare, op, VPR128, VPR64, v2f64, v1f64, 7311 BinOpFrag<(Neon_combine_2d 7312 (fneg node:$LHS), (fneg node:$RHS))>>; 7313} 7314 7315defm FMLS_lane_v2_s : NI_2VE_fms_v2_pat<"FMLSvve", fma>; 7316 7317// Variant 3: Long type 7318// E.g. SMLAL : 4S/4H/H (v0-v15), 2D/2S/S 7319// SMLAL2: 4S/8H/H (v0-v15), 2D/4S/S 7320 7321multiclass NI_2VE_v3<bit u, bits<4> opcode, string asmop> { 7322 // vector register class for element is always 128-bit to cover the max index 7323 def _2d2s : NI_2VE<0b0, u, 0b10, opcode, asmop, "2d", "2s", "s", 7324 neon_uimm2_bare, VPR128, VPR64, VPR128> { 7325 let Inst{11} = {Index{1}}; 7326 let Inst{21} = {Index{0}}; 7327 let Inst{20-16} = Re; 7328 } 7329 7330 def _2d4s : NI_2VE<0b1, u, 0b10, opcode, asmop # "2", "2d", "4s", "s", 7331 neon_uimm2_bare, VPR128, VPR128, VPR128> { 7332 let Inst{11} = {Index{1}}; 7333 let Inst{21} = {Index{0}}; 7334 let Inst{20-16} = Re; 7335 } 7336 7337 // Index operations on 16-bit(H) elements are restricted to using v0-v15. 7338 def _4s8h : NI_2VE<0b1, u, 0b01, opcode, asmop # "2", "4s", "8h", "h", 7339 neon_uimm3_bare, VPR128, VPR128, VPR128Lo> { 7340 let Inst{11} = {Index{2}}; 7341 let Inst{21} = {Index{1}}; 7342 let Inst{20} = {Index{0}}; 7343 let Inst{19-16} = Re{3-0}; 7344 } 7345 7346 def _4s4h : NI_2VE<0b0, u, 0b01, opcode, asmop, "4s", "4h", "h", 7347 neon_uimm3_bare, VPR128, VPR64, VPR128Lo> { 7348 let Inst{11} = {Index{2}}; 7349 let Inst{21} = {Index{1}}; 7350 let Inst{20} = {Index{0}}; 7351 let Inst{19-16} = Re{3-0}; 7352 } 7353} 7354 7355defm SMLALvve : NI_2VE_v3<0b0, 0b0010, "smlal">; 7356defm UMLALvve : NI_2VE_v3<0b1, 0b0010, "umlal">; 7357defm SMLSLvve : NI_2VE_v3<0b0, 0b0110, "smlsl">; 7358defm UMLSLvve : NI_2VE_v3<0b1, 0b0110, "umlsl">; 7359defm SQDMLALvve : NI_2VE_v3<0b0, 0b0011, "sqdmlal">; 7360defm SQDMLSLvve : NI_2VE_v3<0b0, 0b0111, "sqdmlsl">; 7361 7362multiclass NI_2VE_v3_2op<bit u, bits<4> opcode, string asmop> { 7363 // vector register class for element is always 128-bit to cover the max index 7364 def _2d2s : NI_2VE_2op<0b0, u, 0b10, opcode, asmop, "2d", "2s", "s", 7365 neon_uimm2_bare, VPR128, VPR64, VPR128> { 7366 let Inst{11} = {Index{1}}; 7367 let Inst{21} = {Index{0}}; 7368 let Inst{20-16} = Re; 7369 } 7370 7371 def _2d4s : NI_2VE_2op<0b1, u, 0b10, opcode, asmop # "2", "2d", "4s", "s", 7372 neon_uimm2_bare, VPR128, VPR128, VPR128> { 7373 let Inst{11} = {Index{1}}; 7374 let Inst{21} = {Index{0}}; 7375 let Inst{20-16} = Re; 7376 } 7377 7378 // Index operations on 16-bit(H) elements are restricted to using v0-v15. 7379 def _4s8h : NI_2VE_2op<0b1, u, 0b01, opcode, asmop # "2", "4s", "8h", "h", 7380 neon_uimm3_bare, VPR128, VPR128, VPR128Lo> { 7381 let Inst{11} = {Index{2}}; 7382 let Inst{21} = {Index{1}}; 7383 let Inst{20} = {Index{0}}; 7384 let Inst{19-16} = Re{3-0}; 7385 } 7386 7387 def _4s4h : NI_2VE_2op<0b0, u, 0b01, opcode, asmop, "4s", "4h", "h", 7388 neon_uimm3_bare, VPR128, VPR64, VPR128Lo> { 7389 let Inst{11} = {Index{2}}; 7390 let Inst{21} = {Index{1}}; 7391 let Inst{20} = {Index{0}}; 7392 let Inst{19-16} = Re{3-0}; 7393 } 7394} 7395 7396defm SMULLve : NI_2VE_v3_2op<0b0, 0b1010, "smull">; 7397defm UMULLve : NI_2VE_v3_2op<0b1, 0b1010, "umull">; 7398defm SQDMULLve : NI_2VE_v3_2op<0b0, 0b1011, "sqdmull">; 7399 7400def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$src))), 7401 (FMOVdd $src)>; 7402def : Pat<(v1f32 (scalar_to_vector (f32 FPR32:$src))), 7403 (FMOVss $src)>; 7404 7405// Pattern for lane in 128-bit vector 7406class NI_2VEL2_laneq<Instruction INST, Operand OpImm, SDPatternOperator op, 7407 RegisterOperand EleOpVPR, ValueType ResTy, 7408 ValueType OpTy, ValueType EleOpTy, ValueType HalfOpTy, 7409 SDPatternOperator hiop> 7410 : Pat<(ResTy (op (ResTy VPR128:$src), 7411 (HalfOpTy (hiop (OpTy VPR128:$Rn))), 7412 (HalfOpTy (Neon_vduplane 7413 (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))), 7414 (INST VPR128:$src, VPR128:$Rn, EleOpVPR:$Re, OpImm:$Index)>; 7415 7416// Pattern for lane in 64-bit vector 7417class NI_2VEL2_lane<Instruction INST, Operand OpImm, SDPatternOperator op, 7418 RegisterOperand EleOpVPR, ValueType ResTy, 7419 ValueType OpTy, ValueType EleOpTy, ValueType HalfOpTy, 7420 SDPatternOperator hiop> 7421 : Pat<(ResTy (op (ResTy VPR128:$src), 7422 (HalfOpTy (hiop (OpTy VPR128:$Rn))), 7423 (HalfOpTy (Neon_vduplane 7424 (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))), 7425 (INST VPR128:$src, VPR128:$Rn, 7426 (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>; 7427 7428class NI_2VEL2_lane0<Instruction INST, SDPatternOperator op, 7429 ValueType ResTy, ValueType OpTy, ValueType HalfOpTy, 7430 SDPatternOperator hiop, Instruction DupInst> 7431 : Pat<(ResTy (op (ResTy VPR128:$src), 7432 (HalfOpTy (hiop (OpTy VPR128:$Rn))), 7433 (HalfOpTy (Neon_vdup (i32 GPR32:$Re))))), 7434 (INST VPR128:$src, VPR128:$Rn, (DupInst $Re), 0)>; 7435 7436multiclass NI_2VEL_v3_pat<string subop, SDPatternOperator op> { 7437 def : NI_2VE_laneq<!cast<Instruction>(subop # "_4s4h"), neon_uimm3_bare, 7438 op, VPR128, VPR64, VPR128Lo, v4i32, v4i16, v8i16>; 7439 7440 def : NI_2VE_laneq<!cast<Instruction>(subop # "_2d2s"), neon_uimm2_bare, 7441 op, VPR128, VPR64, VPR128, v2i64, v2i32, v4i32>; 7442 7443 def : NI_2VEL2_laneq<!cast<Instruction>(subop # "_4s8h"), neon_uimm3_bare, 7444 op, VPR128Lo, v4i32, v8i16, v8i16, v4i16, Neon_High8H>; 7445 7446 def : NI_2VEL2_laneq<!cast<Instruction>(subop # "_2d4s"), neon_uimm2_bare, 7447 op, VPR128, v2i64, v4i32, v4i32, v2i32, Neon_High4S>; 7448 7449 def : NI_2VEL2_lane0<!cast<Instruction>(subop # "_4s8h"), 7450 op, v4i32, v8i16, v4i16, Neon_High8H, DUP8h>; 7451 7452 def : NI_2VEL2_lane0<!cast<Instruction>(subop # "_2d4s"), 7453 op, v2i64, v4i32, v2i32, Neon_High4S, DUP4s>; 7454 7455 // Index can only be half of the max value for lane in 64-bit vector 7456 7457 def : NI_2VE_lane<!cast<Instruction>(subop # "_4s4h"), neon_uimm2_bare, 7458 op, VPR128, VPR64, VPR64Lo, v4i32, v4i16, v4i16>; 7459 7460 def : NI_2VE_lane<!cast<Instruction>(subop # "_2d2s"), neon_uimm1_bare, 7461 op, VPR128, VPR64, VPR64, v2i64, v2i32, v2i32>; 7462 7463 def : NI_2VEL2_lane<!cast<Instruction>(subop # "_4s8h"), neon_uimm2_bare, 7464 op, VPR64Lo, v4i32, v8i16, v4i16, v4i16, Neon_High8H>; 7465 7466 def : NI_2VEL2_lane<!cast<Instruction>(subop # "_2d4s"), neon_uimm1_bare, 7467 op, VPR64, v2i64, v4i32, v2i32, v2i32, Neon_High4S>; 7468} 7469 7470defm SMLAL_lane_v3 : NI_2VEL_v3_pat<"SMLALvve", Neon_smlal>; 7471defm UMLAL_lane_v3 : NI_2VEL_v3_pat<"UMLALvve", Neon_umlal>; 7472defm SMLSL_lane_v3 : NI_2VEL_v3_pat<"SMLSLvve", Neon_smlsl>; 7473defm UMLSL_lane_v3 : NI_2VEL_v3_pat<"UMLSLvve", Neon_umlsl>; 7474 7475// Pattern for lane in 128-bit vector 7476class NI_2VEL2_mul_laneq<Instruction INST, Operand OpImm, SDPatternOperator op, 7477 RegisterOperand EleOpVPR, ValueType ResTy, 7478 ValueType OpTy, ValueType EleOpTy, ValueType HalfOpTy, 7479 SDPatternOperator hiop> 7480 : Pat<(ResTy (op 7481 (HalfOpTy (hiop (OpTy VPR128:$Rn))), 7482 (HalfOpTy (Neon_vduplane 7483 (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))), 7484 (INST VPR128:$Rn, EleOpVPR:$Re, OpImm:$Index)>; 7485 7486// Pattern for lane in 64-bit vector 7487class NI_2VEL2_mul_lane<Instruction INST, Operand OpImm, SDPatternOperator op, 7488 RegisterOperand EleOpVPR, ValueType ResTy, 7489 ValueType OpTy, ValueType EleOpTy, ValueType HalfOpTy, 7490 SDPatternOperator hiop> 7491 : Pat<(ResTy (op 7492 (HalfOpTy (hiop (OpTy VPR128:$Rn))), 7493 (HalfOpTy (Neon_vduplane 7494 (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))), 7495 (INST VPR128:$Rn, 7496 (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>; 7497 7498// Pattern for fixed lane 0 7499class NI_2VEL2_mul_lane0<Instruction INST, SDPatternOperator op, 7500 ValueType ResTy, ValueType OpTy, ValueType HalfOpTy, 7501 SDPatternOperator hiop, Instruction DupInst> 7502 : Pat<(ResTy (op 7503 (HalfOpTy (hiop (OpTy VPR128:$Rn))), 7504 (HalfOpTy (Neon_vdup (i32 GPR32:$Re))))), 7505 (INST VPR128:$Rn, (DupInst $Re), 0)>; 7506 7507multiclass NI_2VEL_mul_v3_pat<string subop, SDPatternOperator op> { 7508 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_4s4h"), neon_uimm3_bare, 7509 op, VPR64, VPR128Lo, v4i32, v4i16, v8i16>; 7510 7511 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_2d2s"), neon_uimm2_bare, 7512 op, VPR64, VPR128, v2i64, v2i32, v4i32>; 7513 7514 def : NI_2VEL2_mul_laneq<!cast<Instruction>(subop # "_4s8h"), neon_uimm3_bare, 7515 op, VPR128Lo, v4i32, v8i16, v8i16, v4i16, Neon_High8H>; 7516 7517 def : NI_2VEL2_mul_laneq<!cast<Instruction>(subop # "_2d4s"), neon_uimm2_bare, 7518 op, VPR128, v2i64, v4i32, v4i32, v2i32, Neon_High4S>; 7519 7520 def : NI_2VEL2_mul_lane0<!cast<Instruction>(subop # "_4s8h"), 7521 op, v4i32, v8i16, v4i16, Neon_High8H, DUP8h>; 7522 7523 def : NI_2VEL2_mul_lane0<!cast<Instruction>(subop # "_2d4s"), 7524 op, v2i64, v4i32, v2i32, Neon_High4S, DUP4s>; 7525 7526 // Index can only be half of the max value for lane in 64-bit vector 7527 7528 def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_4s4h"), neon_uimm2_bare, 7529 op, VPR64, VPR64Lo, v4i32, v4i16, v4i16>; 7530 7531 def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_2d2s"), neon_uimm1_bare, 7532 op, VPR64, VPR64, v2i64, v2i32, v2i32>; 7533 7534 def : NI_2VEL2_mul_lane<!cast<Instruction>(subop # "_4s8h"), neon_uimm2_bare, 7535 op, VPR64Lo, v4i32, v8i16, v4i16, v4i16, Neon_High8H>; 7536 7537 def : NI_2VEL2_mul_lane<!cast<Instruction>(subop # "_2d4s"), neon_uimm1_bare, 7538 op, VPR64, v2i64, v4i32, v2i32, v2i32, Neon_High4S>; 7539} 7540 7541defm SMULL_lane_v3 : NI_2VEL_mul_v3_pat<"SMULLve", int_arm_neon_vmulls>; 7542defm UMULL_lane_v3 : NI_2VEL_mul_v3_pat<"UMULLve", int_arm_neon_vmullu>; 7543defm SQDMULL_lane_v3 : NI_2VEL_mul_v3_pat<"SQDMULLve", int_arm_neon_vqdmull>; 7544 7545multiclass NI_qdma<SDPatternOperator op> { 7546 def _4s : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm), 7547 (op node:$Ra, 7548 (v4i32 (int_arm_neon_vqdmull node:$Rn, node:$Rm)))>; 7549 7550 def _2d : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm), 7551 (op node:$Ra, 7552 (v2i64 (int_arm_neon_vqdmull node:$Rn, node:$Rm)))>; 7553} 7554 7555defm Neon_qdmlal : NI_qdma<int_arm_neon_vqadds>; 7556defm Neon_qdmlsl : NI_qdma<int_arm_neon_vqsubs>; 7557 7558multiclass NI_2VEL_v3_qdma_pat<string subop, string op> { 7559 def : NI_2VE_laneq<!cast<Instruction>(subop # "_4s4h"), neon_uimm3_bare, 7560 !cast<PatFrag>(op # "_4s"), VPR128, VPR64, VPR128Lo, 7561 v4i32, v4i16, v8i16>; 7562 7563 def : NI_2VE_laneq<!cast<Instruction>(subop # "_2d2s"), neon_uimm2_bare, 7564 !cast<PatFrag>(op # "_2d"), VPR128, VPR64, VPR128, 7565 v2i64, v2i32, v4i32>; 7566 7567 def : NI_2VEL2_laneq<!cast<Instruction>(subop # "_4s8h"), neon_uimm3_bare, 7568 !cast<PatFrag>(op # "_4s"), VPR128Lo, 7569 v4i32, v8i16, v8i16, v4i16, Neon_High8H>; 7570 7571 def : NI_2VEL2_laneq<!cast<Instruction>(subop # "_2d4s"), neon_uimm2_bare, 7572 !cast<PatFrag>(op # "_2d"), VPR128, 7573 v2i64, v4i32, v4i32, v2i32, Neon_High4S>; 7574 7575 def : NI_2VEL2_lane0<!cast<Instruction>(subop # "_4s8h"), 7576 !cast<PatFrag>(op # "_4s"), 7577 v4i32, v8i16, v4i16, Neon_High8H, DUP8h>; 7578 7579 def : NI_2VEL2_lane0<!cast<Instruction>(subop # "_2d4s"), 7580 !cast<PatFrag>(op # "_2d"), 7581 v2i64, v4i32, v2i32, Neon_High4S, DUP4s>; 7582 7583 // Index can only be half of the max value for lane in 64-bit vector 7584 7585 def : NI_2VE_lane<!cast<Instruction>(subop # "_4s4h"), neon_uimm2_bare, 7586 !cast<PatFrag>(op # "_4s"), VPR128, VPR64, VPR64Lo, 7587 v4i32, v4i16, v4i16>; 7588 7589 def : NI_2VE_lane<!cast<Instruction>(subop # "_2d2s"), neon_uimm1_bare, 7590 !cast<PatFrag>(op # "_2d"), VPR128, VPR64, VPR64, 7591 v2i64, v2i32, v2i32>; 7592 7593 def : NI_2VEL2_lane<!cast<Instruction>(subop # "_4s8h"), neon_uimm2_bare, 7594 !cast<PatFrag>(op # "_4s"), VPR64Lo, 7595 v4i32, v8i16, v4i16, v4i16, Neon_High8H>; 7596 7597 def : NI_2VEL2_lane<!cast<Instruction>(subop # "_2d4s"), neon_uimm1_bare, 7598 !cast<PatFrag>(op # "_2d"), VPR64, 7599 v2i64, v4i32, v2i32, v2i32, Neon_High4S>; 7600} 7601 7602defm SQDMLAL_lane_v3 : NI_2VEL_v3_qdma_pat<"SQDMLALvve", "Neon_qdmlal">; 7603defm SQDMLSL_lane_v3 : NI_2VEL_v3_qdma_pat<"SQDMLSLvve", "Neon_qdmlsl">; 7604 7605// End of implementation for instruction class (3V Elem) 7606 7607class NeonI_REV<string asmop, string Res, bits<2> size, bit Q, bit U, 7608 bits<5> opcode, RegisterOperand ResVPR, ValueType ResTy, 7609 SDPatternOperator Neon_Rev> 7610 : NeonI_2VMisc<Q, U, size, opcode, 7611 (outs ResVPR:$Rd), (ins ResVPR:$Rn), 7612 asmop # "\t$Rd." # Res # ", $Rn." # Res, 7613 [(set (ResTy ResVPR:$Rd), 7614 (ResTy (Neon_Rev (ResTy ResVPR:$Rn))))], 7615 NoItinerary> ; 7616 7617def REV64_16b : NeonI_REV<"rev64", "16b", 0b00, 0b1, 0b0, 0b00000, VPR128, 7618 v16i8, Neon_rev64>; 7619def REV64_8h : NeonI_REV<"rev64", "8h", 0b01, 0b1, 0b0, 0b00000, VPR128, 7620 v8i16, Neon_rev64>; 7621def REV64_4s : NeonI_REV<"rev64", "4s", 0b10, 0b1, 0b0, 0b00000, VPR128, 7622 v4i32, Neon_rev64>; 7623def REV64_8b : NeonI_REV<"rev64", "8b", 0b00, 0b0, 0b0, 0b00000, VPR64, 7624 v8i8, Neon_rev64>; 7625def REV64_4h : NeonI_REV<"rev64", "4h", 0b01, 0b0, 0b0, 0b00000, VPR64, 7626 v4i16, Neon_rev64>; 7627def REV64_2s : NeonI_REV<"rev64", "2s", 0b10, 0b0, 0b0, 0b00000, VPR64, 7628 v2i32, Neon_rev64>; 7629 7630def : Pat<(v4f32 (Neon_rev64 (v4f32 VPR128:$Rn))), (REV64_4s VPR128:$Rn)>; 7631def : Pat<(v2f32 (Neon_rev64 (v2f32 VPR64:$Rn))), (REV64_2s VPR64:$Rn)>; 7632 7633def REV32_16b : NeonI_REV<"rev32", "16b", 0b00, 0b1, 0b1, 0b00000, VPR128, 7634 v16i8, Neon_rev32>; 7635def REV32_8h : NeonI_REV<"rev32", "8h", 0b01, 0b1, 0b1, 0b00000, VPR128, 7636 v8i16, Neon_rev32>; 7637def REV32_8b : NeonI_REV<"rev32", "8b", 0b00, 0b0, 0b1, 0b00000, VPR64, 7638 v8i8, Neon_rev32>; 7639def REV32_4h : NeonI_REV<"rev32", "4h", 0b01, 0b0, 0b1, 0b00000, VPR64, 7640 v4i16, Neon_rev32>; 7641 7642def REV16_16b : NeonI_REV<"rev16", "16b", 0b00, 0b1, 0b0, 0b00001, VPR128, 7643 v16i8, Neon_rev16>; 7644def REV16_8b : NeonI_REV<"rev16", "8b", 0b00, 0b0, 0b0, 0b00001, VPR64, 7645 v8i8, Neon_rev16>; 7646 7647multiclass NeonI_PairwiseAdd<string asmop, bit U, bits<5> opcode, 7648 SDPatternOperator Neon_Padd> { 7649 def 16b8h : NeonI_2VMisc<0b1, U, 0b00, opcode, 7650 (outs VPR128:$Rd), (ins VPR128:$Rn), 7651 asmop # "\t$Rd.8h, $Rn.16b", 7652 [(set (v8i16 VPR128:$Rd), 7653 (v8i16 (Neon_Padd (v16i8 VPR128:$Rn))))], 7654 NoItinerary>; 7655 7656 def 8b4h : NeonI_2VMisc<0b0, U, 0b00, opcode, 7657 (outs VPR64:$Rd), (ins VPR64:$Rn), 7658 asmop # "\t$Rd.4h, $Rn.8b", 7659 [(set (v4i16 VPR64:$Rd), 7660 (v4i16 (Neon_Padd (v8i8 VPR64:$Rn))))], 7661 NoItinerary>; 7662 7663 def 8h4s : NeonI_2VMisc<0b1, U, 0b01, opcode, 7664 (outs VPR128:$Rd), (ins VPR128:$Rn), 7665 asmop # "\t$Rd.4s, $Rn.8h", 7666 [(set (v4i32 VPR128:$Rd), 7667 (v4i32 (Neon_Padd (v8i16 VPR128:$Rn))))], 7668 NoItinerary>; 7669 7670 def 4h2s : NeonI_2VMisc<0b0, U, 0b01, opcode, 7671 (outs VPR64:$Rd), (ins VPR64:$Rn), 7672 asmop # "\t$Rd.2s, $Rn.4h", 7673 [(set (v2i32 VPR64:$Rd), 7674 (v2i32 (Neon_Padd (v4i16 VPR64:$Rn))))], 7675 NoItinerary>; 7676 7677 def 4s2d : NeonI_2VMisc<0b1, U, 0b10, opcode, 7678 (outs VPR128:$Rd), (ins VPR128:$Rn), 7679 asmop # "\t$Rd.2d, $Rn.4s", 7680 [(set (v2i64 VPR128:$Rd), 7681 (v2i64 (Neon_Padd (v4i32 VPR128:$Rn))))], 7682 NoItinerary>; 7683 7684 def 2s1d : NeonI_2VMisc<0b0, U, 0b10, opcode, 7685 (outs VPR64:$Rd), (ins VPR64:$Rn), 7686 asmop # "\t$Rd.1d, $Rn.2s", 7687 [(set (v1i64 VPR64:$Rd), 7688 (v1i64 (Neon_Padd (v2i32 VPR64:$Rn))))], 7689 NoItinerary>; 7690} 7691 7692defm SADDLP : NeonI_PairwiseAdd<"saddlp", 0b0, 0b00010, 7693 int_arm_neon_vpaddls>; 7694defm UADDLP : NeonI_PairwiseAdd<"uaddlp", 0b1, 0b00010, 7695 int_arm_neon_vpaddlu>; 7696 7697multiclass NeonI_PairwiseAddAcc<string asmop, bit U, bits<5> opcode, 7698 SDPatternOperator Neon_Padd> { 7699 let Constraints = "$src = $Rd" in { 7700 def 16b8h : NeonI_2VMisc<0b1, U, 0b00, opcode, 7701 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), 7702 asmop # "\t$Rd.8h, $Rn.16b", 7703 [(set (v8i16 VPR128:$Rd), 7704 (v8i16 (Neon_Padd 7705 (v8i16 VPR128:$src), (v16i8 VPR128:$Rn))))], 7706 NoItinerary>; 7707 7708 def 8b4h : NeonI_2VMisc<0b0, U, 0b00, opcode, 7709 (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn), 7710 asmop # "\t$Rd.4h, $Rn.8b", 7711 [(set (v4i16 VPR64:$Rd), 7712 (v4i16 (Neon_Padd 7713 (v4i16 VPR64:$src), (v8i8 VPR64:$Rn))))], 7714 NoItinerary>; 7715 7716 def 8h4s : NeonI_2VMisc<0b1, U, 0b01, opcode, 7717 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), 7718 asmop # "\t$Rd.4s, $Rn.8h", 7719 [(set (v4i32 VPR128:$Rd), 7720 (v4i32 (Neon_Padd 7721 (v4i32 VPR128:$src), (v8i16 VPR128:$Rn))))], 7722 NoItinerary>; 7723 7724 def 4h2s : NeonI_2VMisc<0b0, U, 0b01, opcode, 7725 (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn), 7726 asmop # "\t$Rd.2s, $Rn.4h", 7727 [(set (v2i32 VPR64:$Rd), 7728 (v2i32 (Neon_Padd 7729 (v2i32 VPR64:$src), (v4i16 VPR64:$Rn))))], 7730 NoItinerary>; 7731 7732 def 4s2d : NeonI_2VMisc<0b1, U, 0b10, opcode, 7733 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), 7734 asmop # "\t$Rd.2d, $Rn.4s", 7735 [(set (v2i64 VPR128:$Rd), 7736 (v2i64 (Neon_Padd 7737 (v2i64 VPR128:$src), (v4i32 VPR128:$Rn))))], 7738 NoItinerary>; 7739 7740 def 2s1d : NeonI_2VMisc<0b0, U, 0b10, opcode, 7741 (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn), 7742 asmop # "\t$Rd.1d, $Rn.2s", 7743 [(set (v1i64 VPR64:$Rd), 7744 (v1i64 (Neon_Padd 7745 (v1i64 VPR64:$src), (v2i32 VPR64:$Rn))))], 7746 NoItinerary>; 7747 } 7748} 7749 7750defm SADALP : NeonI_PairwiseAddAcc<"sadalp", 0b0, 0b00110, 7751 int_arm_neon_vpadals>; 7752defm UADALP : NeonI_PairwiseAddAcc<"uadalp", 0b1, 0b00110, 7753 int_arm_neon_vpadalu>; 7754 7755multiclass NeonI_2VMisc_BHSDsize_1Arg<string asmop, bit U, bits<5> opcode> { 7756 def 16b : NeonI_2VMisc<0b1, U, 0b00, opcode, 7757 (outs VPR128:$Rd), (ins VPR128:$Rn), 7758 asmop # "\t$Rd.16b, $Rn.16b", 7759 [], NoItinerary>; 7760 7761 def 8h : NeonI_2VMisc<0b1, U, 0b01, opcode, 7762 (outs VPR128:$Rd), (ins VPR128:$Rn), 7763 asmop # "\t$Rd.8h, $Rn.8h", 7764 [], NoItinerary>; 7765 7766 def 4s : NeonI_2VMisc<0b1, U, 0b10, opcode, 7767 (outs VPR128:$Rd), (ins VPR128:$Rn), 7768 asmop # "\t$Rd.4s, $Rn.4s", 7769 [], NoItinerary>; 7770 7771 def 2d : NeonI_2VMisc<0b1, U, 0b11, opcode, 7772 (outs VPR128:$Rd), (ins VPR128:$Rn), 7773 asmop # "\t$Rd.2d, $Rn.2d", 7774 [], NoItinerary>; 7775 7776 def 8b : NeonI_2VMisc<0b0, U, 0b00, opcode, 7777 (outs VPR64:$Rd), (ins VPR64:$Rn), 7778 asmop # "\t$Rd.8b, $Rn.8b", 7779 [], NoItinerary>; 7780 7781 def 4h : NeonI_2VMisc<0b0, U, 0b01, opcode, 7782 (outs VPR64:$Rd), (ins VPR64:$Rn), 7783 asmop # "\t$Rd.4h, $Rn.4h", 7784 [], NoItinerary>; 7785 7786 def 2s : NeonI_2VMisc<0b0, U, 0b10, opcode, 7787 (outs VPR64:$Rd), (ins VPR64:$Rn), 7788 asmop # "\t$Rd.2s, $Rn.2s", 7789 [], NoItinerary>; 7790} 7791 7792defm SQABS : NeonI_2VMisc_BHSDsize_1Arg<"sqabs", 0b0, 0b00111>; 7793defm SQNEG : NeonI_2VMisc_BHSDsize_1Arg<"sqneg", 0b1, 0b00111>; 7794defm ABS : NeonI_2VMisc_BHSDsize_1Arg<"abs", 0b0, 0b01011>; 7795defm NEG : NeonI_2VMisc_BHSDsize_1Arg<"neg", 0b1, 0b01011>; 7796 7797multiclass NeonI_2VMisc_BHSD_1Arg_Pattern<string Prefix, 7798 SDPatternOperator Neon_Op> { 7799 def : Pat<(v16i8 (Neon_Op (v16i8 VPR128:$Rn))), 7800 (v16i8 (!cast<Instruction>(Prefix # 16b) (v16i8 VPR128:$Rn)))>; 7801 7802 def : Pat<(v8i16 (Neon_Op (v8i16 VPR128:$Rn))), 7803 (v8i16 (!cast<Instruction>(Prefix # 8h) (v8i16 VPR128:$Rn)))>; 7804 7805 def : Pat<(v4i32 (Neon_Op (v4i32 VPR128:$Rn))), 7806 (v4i32 (!cast<Instruction>(Prefix # 4s) (v4i32 VPR128:$Rn)))>; 7807 7808 def : Pat<(v2i64 (Neon_Op (v2i64 VPR128:$Rn))), 7809 (v2i64 (!cast<Instruction>(Prefix # 2d) (v2i64 VPR128:$Rn)))>; 7810 7811 def : Pat<(v8i8 (Neon_Op (v8i8 VPR64:$Rn))), 7812 (v8i8 (!cast<Instruction>(Prefix # 8b) (v8i8 VPR64:$Rn)))>; 7813 7814 def : Pat<(v4i16 (Neon_Op (v4i16 VPR64:$Rn))), 7815 (v4i16 (!cast<Instruction>(Prefix # 4h) (v4i16 VPR64:$Rn)))>; 7816 7817 def : Pat<(v2i32 (Neon_Op (v2i32 VPR64:$Rn))), 7818 (v2i32 (!cast<Instruction>(Prefix # 2s) (v2i32 VPR64:$Rn)))>; 7819} 7820 7821defm : NeonI_2VMisc_BHSD_1Arg_Pattern<"SQABS", int_arm_neon_vqabs>; 7822defm : NeonI_2VMisc_BHSD_1Arg_Pattern<"SQNEG", int_arm_neon_vqneg>; 7823defm : NeonI_2VMisc_BHSD_1Arg_Pattern<"ABS", int_arm_neon_vabs>; 7824 7825def : Pat<(v16i8 (sub 7826 (v16i8 Neon_AllZero), 7827 (v16i8 VPR128:$Rn))), 7828 (v16i8 (NEG16b (v16i8 VPR128:$Rn)))>; 7829def : Pat<(v8i8 (sub 7830 (v8i8 Neon_AllZero), 7831 (v8i8 VPR64:$Rn))), 7832 (v8i8 (NEG8b (v8i8 VPR64:$Rn)))>; 7833def : Pat<(v8i16 (sub 7834 (v8i16 (bitconvert (v16i8 Neon_AllZero))), 7835 (v8i16 VPR128:$Rn))), 7836 (v8i16 (NEG8h (v8i16 VPR128:$Rn)))>; 7837def : Pat<(v4i16 (sub 7838 (v4i16 (bitconvert (v8i8 Neon_AllZero))), 7839 (v4i16 VPR64:$Rn))), 7840 (v4i16 (NEG4h (v4i16 VPR64:$Rn)))>; 7841def : Pat<(v4i32 (sub 7842 (v4i32 (bitconvert (v16i8 Neon_AllZero))), 7843 (v4i32 VPR128:$Rn))), 7844 (v4i32 (NEG4s (v4i32 VPR128:$Rn)))>; 7845def : Pat<(v2i32 (sub 7846 (v2i32 (bitconvert (v8i8 Neon_AllZero))), 7847 (v2i32 VPR64:$Rn))), 7848 (v2i32 (NEG2s (v2i32 VPR64:$Rn)))>; 7849def : Pat<(v2i64 (sub 7850 (v2i64 (bitconvert (v16i8 Neon_AllZero))), 7851 (v2i64 VPR128:$Rn))), 7852 (v2i64 (NEG2d (v2i64 VPR128:$Rn)))>; 7853 7854multiclass NeonI_2VMisc_BHSDsize_2Args<string asmop, bit U, bits<5> opcode> { 7855 let Constraints = "$src = $Rd" in { 7856 def 16b : NeonI_2VMisc<0b1, U, 0b00, opcode, 7857 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), 7858 asmop # "\t$Rd.16b, $Rn.16b", 7859 [], NoItinerary>; 7860 7861 def 8h : NeonI_2VMisc<0b1, U, 0b01, opcode, 7862 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), 7863 asmop # "\t$Rd.8h, $Rn.8h", 7864 [], NoItinerary>; 7865 7866 def 4s : NeonI_2VMisc<0b1, U, 0b10, opcode, 7867 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), 7868 asmop # "\t$Rd.4s, $Rn.4s", 7869 [], NoItinerary>; 7870 7871 def 2d : NeonI_2VMisc<0b1, U, 0b11, opcode, 7872 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), 7873 asmop # "\t$Rd.2d, $Rn.2d", 7874 [], NoItinerary>; 7875 7876 def 8b : NeonI_2VMisc<0b0, U, 0b00, opcode, 7877 (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn), 7878 asmop # "\t$Rd.8b, $Rn.8b", 7879 [], NoItinerary>; 7880 7881 def 4h : NeonI_2VMisc<0b0, U, 0b01, opcode, 7882 (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn), 7883 asmop # "\t$Rd.4h, $Rn.4h", 7884 [], NoItinerary>; 7885 7886 def 2s : NeonI_2VMisc<0b0, U, 0b10, opcode, 7887 (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn), 7888 asmop # "\t$Rd.2s, $Rn.2s", 7889 [], NoItinerary>; 7890 } 7891} 7892 7893defm SUQADD : NeonI_2VMisc_BHSDsize_2Args<"suqadd", 0b0, 0b00011>; 7894defm USQADD : NeonI_2VMisc_BHSDsize_2Args<"usqadd", 0b1, 0b00011>; 7895 7896multiclass NeonI_2VMisc_BHSD_2Args_Pattern<string Prefix, 7897 SDPatternOperator Neon_Op> { 7898 def : Pat<(v16i8 (Neon_Op (v16i8 VPR128:$src), (v16i8 VPR128:$Rn))), 7899 (v16i8 (!cast<Instruction>(Prefix # 16b) 7900 (v16i8 VPR128:$src), (v16i8 VPR128:$Rn)))>; 7901 7902 def : Pat<(v8i16 (Neon_Op (v8i16 VPR128:$src), (v8i16 VPR128:$Rn))), 7903 (v8i16 (!cast<Instruction>(Prefix # 8h) 7904 (v8i16 VPR128:$src), (v8i16 VPR128:$Rn)))>; 7905 7906 def : Pat<(v4i32 (Neon_Op (v4i32 VPR128:$src), (v4i32 VPR128:$Rn))), 7907 (v4i32 (!cast<Instruction>(Prefix # 4s) 7908 (v4i32 VPR128:$src), (v4i32 VPR128:$Rn)))>; 7909 7910 def : Pat<(v2i64 (Neon_Op (v2i64 VPR128:$src), (v2i64 VPR128:$Rn))), 7911 (v2i64 (!cast<Instruction>(Prefix # 2d) 7912 (v2i64 VPR128:$src), (v2i64 VPR128:$Rn)))>; 7913 7914 def : Pat<(v8i8 (Neon_Op (v8i8 VPR64:$src), (v8i8 VPR64:$Rn))), 7915 (v8i8 (!cast<Instruction>(Prefix # 8b) 7916 (v8i8 VPR64:$src), (v8i8 VPR64:$Rn)))>; 7917 7918 def : Pat<(v4i16 (Neon_Op (v4i16 VPR64:$src), (v4i16 VPR64:$Rn))), 7919 (v4i16 (!cast<Instruction>(Prefix # 4h) 7920 (v4i16 VPR64:$src), (v4i16 VPR64:$Rn)))>; 7921 7922 def : Pat<(v2i32 (Neon_Op (v2i32 VPR64:$src), (v2i32 VPR64:$Rn))), 7923 (v2i32 (!cast<Instruction>(Prefix # 2s) 7924 (v2i32 VPR64:$src), (v2i32 VPR64:$Rn)))>; 7925} 7926 7927defm : NeonI_2VMisc_BHSD_2Args_Pattern<"SUQADD", int_aarch64_neon_suqadd>; 7928defm : NeonI_2VMisc_BHSD_2Args_Pattern<"USQADD", int_aarch64_neon_usqadd>; 7929 7930multiclass NeonI_2VMisc_BHSsizes<string asmop, bit U, 7931 SDPatternOperator Neon_Op> { 7932 def 16b : NeonI_2VMisc<0b1, U, 0b00, 0b00100, 7933 (outs VPR128:$Rd), (ins VPR128:$Rn), 7934 asmop # "\t$Rd.16b, $Rn.16b", 7935 [(set (v16i8 VPR128:$Rd), 7936 (v16i8 (Neon_Op (v16i8 VPR128:$Rn))))], 7937 NoItinerary>; 7938 7939 def 8h : NeonI_2VMisc<0b1, U, 0b01, 0b00100, 7940 (outs VPR128:$Rd), (ins VPR128:$Rn), 7941 asmop # "\t$Rd.8h, $Rn.8h", 7942 [(set (v8i16 VPR128:$Rd), 7943 (v8i16 (Neon_Op (v8i16 VPR128:$Rn))))], 7944 NoItinerary>; 7945 7946 def 4s : NeonI_2VMisc<0b1, U, 0b10, 0b00100, 7947 (outs VPR128:$Rd), (ins VPR128:$Rn), 7948 asmop # "\t$Rd.4s, $Rn.4s", 7949 [(set (v4i32 VPR128:$Rd), 7950 (v4i32 (Neon_Op (v4i32 VPR128:$Rn))))], 7951 NoItinerary>; 7952 7953 def 8b : NeonI_2VMisc<0b0, U, 0b00, 0b00100, 7954 (outs VPR64:$Rd), (ins VPR64:$Rn), 7955 asmop # "\t$Rd.8b, $Rn.8b", 7956 [(set (v8i8 VPR64:$Rd), 7957 (v8i8 (Neon_Op (v8i8 VPR64:$Rn))))], 7958 NoItinerary>; 7959 7960 def 4h : NeonI_2VMisc<0b0, U, 0b01, 0b00100, 7961 (outs VPR64:$Rd), (ins VPR64:$Rn), 7962 asmop # "\t$Rd.4h, $Rn.4h", 7963 [(set (v4i16 VPR64:$Rd), 7964 (v4i16 (Neon_Op (v4i16 VPR64:$Rn))))], 7965 NoItinerary>; 7966 7967 def 2s : NeonI_2VMisc<0b0, U, 0b10, 0b00100, 7968 (outs VPR64:$Rd), (ins VPR64:$Rn), 7969 asmop # "\t$Rd.2s, $Rn.2s", 7970 [(set (v2i32 VPR64:$Rd), 7971 (v2i32 (Neon_Op (v2i32 VPR64:$Rn))))], 7972 NoItinerary>; 7973} 7974 7975defm CLS : NeonI_2VMisc_BHSsizes<"cls", 0b0, int_arm_neon_vcls>; 7976defm CLZ : NeonI_2VMisc_BHSsizes<"clz", 0b1, ctlz>; 7977 7978multiclass NeonI_2VMisc_Bsize<string asmop, bit U, bits<2> size, 7979 bits<5> Opcode> { 7980 def 16b : NeonI_2VMisc<0b1, U, size, Opcode, 7981 (outs VPR128:$Rd), (ins VPR128:$Rn), 7982 asmop # "\t$Rd.16b, $Rn.16b", 7983 [], NoItinerary>; 7984 7985 def 8b : NeonI_2VMisc<0b0, U, size, Opcode, 7986 (outs VPR64:$Rd), (ins VPR64:$Rn), 7987 asmop # "\t$Rd.8b, $Rn.8b", 7988 [], NoItinerary>; 7989} 7990 7991defm CNT : NeonI_2VMisc_Bsize<"cnt", 0b0, 0b00, 0b00101>; 7992defm NOT : NeonI_2VMisc_Bsize<"not", 0b1, 0b00, 0b00101>; 7993defm RBIT : NeonI_2VMisc_Bsize<"rbit", 0b1, 0b01, 0b00101>; 7994 7995def : NeonInstAlias<"mvn $Rd.16b, $Rn.16b", 7996 (NOT16b VPR128:$Rd, VPR128:$Rn), 0>; 7997def : NeonInstAlias<"mvn $Rd.8b, $Rn.8b", 7998 (NOT8b VPR64:$Rd, VPR64:$Rn), 0>; 7999 8000def : Pat<(v16i8 (ctpop (v16i8 VPR128:$Rn))), 8001 (v16i8 (CNT16b (v16i8 VPR128:$Rn)))>; 8002def : Pat<(v8i8 (ctpop (v8i8 VPR64:$Rn))), 8003 (v8i8 (CNT8b (v8i8 VPR64:$Rn)))>; 8004 8005def : Pat<(v16i8 (xor 8006 (v16i8 VPR128:$Rn), 8007 (v16i8 Neon_AllOne))), 8008 (v16i8 (NOT16b (v16i8 VPR128:$Rn)))>; 8009def : Pat<(v8i8 (xor 8010 (v8i8 VPR64:$Rn), 8011 (v8i8 Neon_AllOne))), 8012 (v8i8 (NOT8b (v8i8 VPR64:$Rn)))>; 8013def : Pat<(v8i16 (xor 8014 (v8i16 VPR128:$Rn), 8015 (v8i16 (bitconvert (v16i8 Neon_AllOne))))), 8016 (NOT16b VPR128:$Rn)>; 8017def : Pat<(v4i16 (xor 8018 (v4i16 VPR64:$Rn), 8019 (v4i16 (bitconvert (v8i8 Neon_AllOne))))), 8020 (NOT8b VPR64:$Rn)>; 8021def : Pat<(v4i32 (xor 8022 (v4i32 VPR128:$Rn), 8023 (v4i32 (bitconvert (v16i8 Neon_AllOne))))), 8024 (NOT16b VPR128:$Rn)>; 8025def : Pat<(v2i32 (xor 8026 (v2i32 VPR64:$Rn), 8027 (v2i32 (bitconvert (v8i8 Neon_AllOne))))), 8028 (NOT8b VPR64:$Rn)>; 8029def : Pat<(v2i64 (xor 8030 (v2i64 VPR128:$Rn), 8031 (v2i64 (bitconvert (v16i8 Neon_AllOne))))), 8032 (NOT16b VPR128:$Rn)>; 8033 8034def : Pat<(v16i8 (int_aarch64_neon_rbit (v16i8 VPR128:$Rn))), 8035 (v16i8 (RBIT16b (v16i8 VPR128:$Rn)))>; 8036def : Pat<(v8i8 (int_aarch64_neon_rbit (v8i8 VPR64:$Rn))), 8037 (v8i8 (RBIT8b (v8i8 VPR64:$Rn)))>; 8038 8039multiclass NeonI_2VMisc_SDsizes<string asmop, bit U, bits<5> opcode, 8040 SDPatternOperator Neon_Op> { 8041 def 4s : NeonI_2VMisc<0b1, U, 0b10, opcode, 8042 (outs VPR128:$Rd), (ins VPR128:$Rn), 8043 asmop # "\t$Rd.4s, $Rn.4s", 8044 [(set (v4f32 VPR128:$Rd), 8045 (v4f32 (Neon_Op (v4f32 VPR128:$Rn))))], 8046 NoItinerary>; 8047 8048 def 2d : NeonI_2VMisc<0b1, U, 0b11, opcode, 8049 (outs VPR128:$Rd), (ins VPR128:$Rn), 8050 asmop # "\t$Rd.2d, $Rn.2d", 8051 [(set (v2f64 VPR128:$Rd), 8052 (v2f64 (Neon_Op (v2f64 VPR128:$Rn))))], 8053 NoItinerary>; 8054 8055 def 2s : NeonI_2VMisc<0b0, U, 0b10, opcode, 8056 (outs VPR64:$Rd), (ins VPR64:$Rn), 8057 asmop # "\t$Rd.2s, $Rn.2s", 8058 [(set (v2f32 VPR64:$Rd), 8059 (v2f32 (Neon_Op (v2f32 VPR64:$Rn))))], 8060 NoItinerary>; 8061} 8062 8063defm FABS : NeonI_2VMisc_SDsizes<"fabs", 0b0, 0b01111, fabs>; 8064defm FNEG : NeonI_2VMisc_SDsizes<"fneg", 0b1, 0b01111, fneg>; 8065 8066multiclass NeonI_2VMisc_HSD_Narrow<string asmop, bit U, bits<5> opcode> { 8067 def 8h8b : NeonI_2VMisc<0b0, U, 0b00, opcode, 8068 (outs VPR64:$Rd), (ins VPR128:$Rn), 8069 asmop # "\t$Rd.8b, $Rn.8h", 8070 [], NoItinerary>; 8071 8072 def 4s4h : NeonI_2VMisc<0b0, U, 0b01, opcode, 8073 (outs VPR64:$Rd), (ins VPR128:$Rn), 8074 asmop # "\t$Rd.4h, $Rn.4s", 8075 [], NoItinerary>; 8076 8077 def 2d2s : NeonI_2VMisc<0b0, U, 0b10, opcode, 8078 (outs VPR64:$Rd), (ins VPR128:$Rn), 8079 asmop # "\t$Rd.2s, $Rn.2d", 8080 [], NoItinerary>; 8081 8082 let Constraints = "$Rd = $src" in { 8083 def 8h16b : NeonI_2VMisc<0b1, U, 0b00, opcode, 8084 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), 8085 asmop # "2\t$Rd.16b, $Rn.8h", 8086 [], NoItinerary>; 8087 8088 def 4s8h : NeonI_2VMisc<0b1, U, 0b01, opcode, 8089 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), 8090 asmop # "2\t$Rd.8h, $Rn.4s", 8091 [], NoItinerary>; 8092 8093 def 2d4s : NeonI_2VMisc<0b1, U, 0b10, opcode, 8094 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), 8095 asmop # "2\t$Rd.4s, $Rn.2d", 8096 [], NoItinerary>; 8097 } 8098} 8099 8100defm XTN : NeonI_2VMisc_HSD_Narrow<"xtn", 0b0, 0b10010>; 8101defm SQXTUN : NeonI_2VMisc_HSD_Narrow<"sqxtun", 0b1, 0b10010>; 8102defm SQXTN : NeonI_2VMisc_HSD_Narrow<"sqxtn", 0b0, 0b10100>; 8103defm UQXTN : NeonI_2VMisc_HSD_Narrow<"uqxtn", 0b1, 0b10100>; 8104 8105multiclass NeonI_2VMisc_Narrow_Patterns<string Prefix, 8106 SDPatternOperator Neon_Op> { 8107 def : Pat<(v8i8 (Neon_Op (v8i16 VPR128:$Rn))), 8108 (v8i8 (!cast<Instruction>(Prefix # 8h8b) (v8i16 VPR128:$Rn)))>; 8109 8110 def : Pat<(v4i16 (Neon_Op (v4i32 VPR128:$Rn))), 8111 (v4i16 (!cast<Instruction>(Prefix # 4s4h) (v4i32 VPR128:$Rn)))>; 8112 8113 def : Pat<(v2i32 (Neon_Op (v2i64 VPR128:$Rn))), 8114 (v2i32 (!cast<Instruction>(Prefix # 2d2s) (v2i64 VPR128:$Rn)))>; 8115 8116 def : Pat<(v16i8 (concat_vectors 8117 (v8i8 VPR64:$src), 8118 (v8i8 (Neon_Op (v8i16 VPR128:$Rn))))), 8119 (!cast<Instruction>(Prefix # 8h16b) 8120 (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64), 8121 VPR128:$Rn)>; 8122 8123 def : Pat<(v8i16 (concat_vectors 8124 (v4i16 VPR64:$src), 8125 (v4i16 (Neon_Op (v4i32 VPR128:$Rn))))), 8126 (!cast<Instruction>(Prefix # 4s8h) 8127 (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64), 8128 VPR128:$Rn)>; 8129 8130 def : Pat<(v4i32 (concat_vectors 8131 (v2i32 VPR64:$src), 8132 (v2i32 (Neon_Op (v2i64 VPR128:$Rn))))), 8133 (!cast<Instruction>(Prefix # 2d4s) 8134 (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64), 8135 VPR128:$Rn)>; 8136} 8137 8138defm : NeonI_2VMisc_Narrow_Patterns<"XTN", trunc>; 8139defm : NeonI_2VMisc_Narrow_Patterns<"SQXTUN", int_arm_neon_vqmovnsu>; 8140defm : NeonI_2VMisc_Narrow_Patterns<"SQXTN", int_arm_neon_vqmovns>; 8141defm : NeonI_2VMisc_Narrow_Patterns<"UQXTN", int_arm_neon_vqmovnu>; 8142 8143multiclass NeonI_2VMisc_SHIFT<string asmop, bit U, bits<5> opcode> { 8144 let DecoderMethod = "DecodeSHLLInstruction" in { 8145 def 8b8h : NeonI_2VMisc<0b0, U, 0b00, opcode, 8146 (outs VPR128:$Rd), 8147 (ins VPR64:$Rn, uimm_exact8:$Imm), 8148 asmop # "\t$Rd.8h, $Rn.8b, $Imm", 8149 [], NoItinerary>; 8150 8151 def 4h4s : NeonI_2VMisc<0b0, U, 0b01, opcode, 8152 (outs VPR128:$Rd), 8153 (ins VPR64:$Rn, uimm_exact16:$Imm), 8154 asmop # "\t$Rd.4s, $Rn.4h, $Imm", 8155 [], NoItinerary>; 8156 8157 def 2s2d : NeonI_2VMisc<0b0, U, 0b10, opcode, 8158 (outs VPR128:$Rd), 8159 (ins VPR64:$Rn, uimm_exact32:$Imm), 8160 asmop # "\t$Rd.2d, $Rn.2s, $Imm", 8161 [], NoItinerary>; 8162 8163 def 16b8h : NeonI_2VMisc<0b1, U, 0b00, opcode, 8164 (outs VPR128:$Rd), 8165 (ins VPR128:$Rn, uimm_exact8:$Imm), 8166 asmop # "2\t$Rd.8h, $Rn.16b, $Imm", 8167 [], NoItinerary>; 8168 8169 def 8h4s : NeonI_2VMisc<0b1, U, 0b01, opcode, 8170 (outs VPR128:$Rd), 8171 (ins VPR128:$Rn, uimm_exact16:$Imm), 8172 asmop # "2\t$Rd.4s, $Rn.8h, $Imm", 8173 [], NoItinerary>; 8174 8175 def 4s2d : NeonI_2VMisc<0b1, U, 0b10, opcode, 8176 (outs VPR128:$Rd), 8177 (ins VPR128:$Rn, uimm_exact32:$Imm), 8178 asmop # "2\t$Rd.2d, $Rn.4s, $Imm", 8179 [], NoItinerary>; 8180 } 8181} 8182 8183defm SHLL : NeonI_2VMisc_SHIFT<"shll", 0b1, 0b10011>; 8184 8185class NeonI_SHLL_Patterns<ValueType OpTy, ValueType DesTy, 8186 SDPatternOperator ExtOp, Operand Neon_Imm, 8187 string suffix> 8188 : Pat<(DesTy (shl 8189 (DesTy (ExtOp (OpTy VPR64:$Rn))), 8190 (DesTy (Neon_vdup 8191 (i32 Neon_Imm:$Imm))))), 8192 (!cast<Instruction>("SHLL" # suffix) VPR64:$Rn, Neon_Imm:$Imm)>; 8193 8194class NeonI_SHLL_High_Patterns<ValueType OpTy, ValueType DesTy, 8195 SDPatternOperator ExtOp, Operand Neon_Imm, 8196 string suffix, PatFrag GetHigh> 8197 : Pat<(DesTy (shl 8198 (DesTy (ExtOp 8199 (OpTy (GetHigh VPR128:$Rn)))), 8200 (DesTy (Neon_vdup 8201 (i32 Neon_Imm:$Imm))))), 8202 (!cast<Instruction>("SHLL" # suffix) VPR128:$Rn, Neon_Imm:$Imm)>; 8203 8204def : NeonI_SHLL_Patterns<v8i8, v8i16, zext, uimm_exact8, "8b8h">; 8205def : NeonI_SHLL_Patterns<v8i8, v8i16, sext, uimm_exact8, "8b8h">; 8206def : NeonI_SHLL_Patterns<v4i16, v4i32, zext, uimm_exact16, "4h4s">; 8207def : NeonI_SHLL_Patterns<v4i16, v4i32, sext, uimm_exact16, "4h4s">; 8208def : NeonI_SHLL_Patterns<v2i32, v2i64, zext, uimm_exact32, "2s2d">; 8209def : NeonI_SHLL_Patterns<v2i32, v2i64, sext, uimm_exact32, "2s2d">; 8210def : NeonI_SHLL_High_Patterns<v8i8, v8i16, zext, uimm_exact8, "16b8h", 8211 Neon_High16B>; 8212def : NeonI_SHLL_High_Patterns<v8i8, v8i16, sext, uimm_exact8, "16b8h", 8213 Neon_High16B>; 8214def : NeonI_SHLL_High_Patterns<v4i16, v4i32, zext, uimm_exact16, "8h4s", 8215 Neon_High8H>; 8216def : NeonI_SHLL_High_Patterns<v4i16, v4i32, sext, uimm_exact16, "8h4s", 8217 Neon_High8H>; 8218def : NeonI_SHLL_High_Patterns<v2i32, v2i64, zext, uimm_exact32, "4s2d", 8219 Neon_High4S>; 8220def : NeonI_SHLL_High_Patterns<v2i32, v2i64, sext, uimm_exact32, "4s2d", 8221 Neon_High4S>; 8222 8223multiclass NeonI_2VMisc_SD_Narrow<string asmop, bit U, bits<5> opcode> { 8224 def 4s4h : NeonI_2VMisc<0b0, U, 0b00, opcode, 8225 (outs VPR64:$Rd), (ins VPR128:$Rn), 8226 asmop # "\t$Rd.4h, $Rn.4s", 8227 [], NoItinerary>; 8228 8229 def 2d2s : NeonI_2VMisc<0b0, U, 0b01, opcode, 8230 (outs VPR64:$Rd), (ins VPR128:$Rn), 8231 asmop # "\t$Rd.2s, $Rn.2d", 8232 [], NoItinerary>; 8233 8234 let Constraints = "$src = $Rd" in { 8235 def 4s8h : NeonI_2VMisc<0b1, U, 0b00, opcode, 8236 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), 8237 asmop # "2\t$Rd.8h, $Rn.4s", 8238 [], NoItinerary>; 8239 8240 def 2d4s : NeonI_2VMisc<0b1, U, 0b01, opcode, 8241 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), 8242 asmop # "2\t$Rd.4s, $Rn.2d", 8243 [], NoItinerary>; 8244 } 8245} 8246 8247defm FCVTN : NeonI_2VMisc_SD_Narrow<"fcvtn", 0b0, 0b10110>; 8248 8249multiclass NeonI_2VMisc_Narrow_Pattern<string prefix, 8250 SDPatternOperator f32_to_f16_Op, 8251 SDPatternOperator f64_to_f32_Op> { 8252 8253 def : Pat<(v4i16 (f32_to_f16_Op (v4f32 VPR128:$Rn))), 8254 (!cast<Instruction>(prefix # "4s4h") (v4f32 VPR128:$Rn))>; 8255 8256 def : Pat<(v8i16 (concat_vectors 8257 (v4i16 VPR64:$src), 8258 (v4i16 (f32_to_f16_Op (v4f32 VPR128:$Rn))))), 8259 (!cast<Instruction>(prefix # "4s8h") 8260 (v4f32 (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64)), 8261 (v4f32 VPR128:$Rn))>; 8262 8263 def : Pat<(v2f32 (f64_to_f32_Op (v2f64 VPR128:$Rn))), 8264 (!cast<Instruction>(prefix # "2d2s") (v2f64 VPR128:$Rn))>; 8265 8266 def : Pat<(v4f32 (concat_vectors 8267 (v2f32 VPR64:$src), 8268 (v2f32 (f64_to_f32_Op (v2f64 VPR128:$Rn))))), 8269 (!cast<Instruction>(prefix # "2d4s") 8270 (v4f32 (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64)), 8271 (v2f64 VPR128:$Rn))>; 8272} 8273 8274defm : NeonI_2VMisc_Narrow_Pattern<"FCVTN", int_arm_neon_vcvtfp2hf, fround>; 8275 8276multiclass NeonI_2VMisc_D_Narrow<string asmop, string prefix, bit U, 8277 bits<5> opcode> { 8278 def 2d2s : NeonI_2VMisc<0b0, U, 0b01, opcode, 8279 (outs VPR64:$Rd), (ins VPR128:$Rn), 8280 asmop # "\t$Rd.2s, $Rn.2d", 8281 [], NoItinerary>; 8282 8283 def 2d4s : NeonI_2VMisc<0b1, U, 0b01, opcode, 8284 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), 8285 asmop # "2\t$Rd.4s, $Rn.2d", 8286 [], NoItinerary> { 8287 let Constraints = "$src = $Rd"; 8288 } 8289 8290 def : Pat<(v2f32 (int_aarch64_neon_fcvtxn (v2f64 VPR128:$Rn))), 8291 (!cast<Instruction>(prefix # "2d2s") VPR128:$Rn)>; 8292 8293 def : Pat<(v4f32 (concat_vectors 8294 (v2f32 VPR64:$src), 8295 (v2f32 (int_aarch64_neon_fcvtxn (v2f64 VPR128:$Rn))))), 8296 (!cast<Instruction>(prefix # "2d4s") 8297 (v4f32 (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64)), 8298 VPR128:$Rn)>; 8299} 8300 8301defm FCVTXN : NeonI_2VMisc_D_Narrow<"fcvtxn","FCVTXN", 0b1, 0b10110>; 8302 8303def Neon_High4Float : PatFrag<(ops node:$in), 8304 (extract_subvector (v4f32 node:$in), (iPTR 2))>; 8305 8306multiclass NeonI_2VMisc_HS_Extend<string asmop, bit U, bits<5> opcode> { 8307 def 4h4s : NeonI_2VMisc<0b0, U, 0b00, opcode, 8308 (outs VPR128:$Rd), (ins VPR64:$Rn), 8309 asmop # "\t$Rd.4s, $Rn.4h", 8310 [], NoItinerary>; 8311 8312 def 2s2d : NeonI_2VMisc<0b0, U, 0b01, opcode, 8313 (outs VPR128:$Rd), (ins VPR64:$Rn), 8314 asmop # "\t$Rd.2d, $Rn.2s", 8315 [], NoItinerary>; 8316 8317 def 8h4s : NeonI_2VMisc<0b1, U, 0b00, opcode, 8318 (outs VPR128:$Rd), (ins VPR128:$Rn), 8319 asmop # "2\t$Rd.4s, $Rn.8h", 8320 [], NoItinerary>; 8321 8322 def 4s2d : NeonI_2VMisc<0b1, U, 0b01, opcode, 8323 (outs VPR128:$Rd), (ins VPR128:$Rn), 8324 asmop # "2\t$Rd.2d, $Rn.4s", 8325 [], NoItinerary>; 8326} 8327 8328defm FCVTL : NeonI_2VMisc_HS_Extend<"fcvtl", 0b0, 0b10111>; 8329 8330multiclass NeonI_2VMisc_Extend_Pattern<string prefix> { 8331 def : Pat<(v4f32 (int_arm_neon_vcvthf2fp (v4i16 VPR64:$Rn))), 8332 (!cast<Instruction>(prefix # "4h4s") VPR64:$Rn)>; 8333 8334 def : Pat<(v4f32 (int_arm_neon_vcvthf2fp 8335 (v4i16 (Neon_High8H 8336 (v8i16 VPR128:$Rn))))), 8337 (!cast<Instruction>(prefix # "8h4s") VPR128:$Rn)>; 8338 8339 def : Pat<(v2f64 (fextend (v2f32 VPR64:$Rn))), 8340 (!cast<Instruction>(prefix # "2s2d") VPR64:$Rn)>; 8341 8342 def : Pat<(v2f64 (fextend 8343 (v2f32 (Neon_High4Float 8344 (v4f32 VPR128:$Rn))))), 8345 (!cast<Instruction>(prefix # "4s2d") VPR128:$Rn)>; 8346} 8347 8348defm : NeonI_2VMisc_Extend_Pattern<"FCVTL">; 8349 8350multiclass NeonI_2VMisc_SD_Conv<string asmop, bit Size, bit U, bits<5> opcode, 8351 ValueType ResTy4s, ValueType OpTy4s, 8352 ValueType ResTy2d, ValueType OpTy2d, 8353 ValueType ResTy2s, ValueType OpTy2s, 8354 SDPatternOperator Neon_Op> { 8355 8356 def 4s : NeonI_2VMisc<0b1, U, {Size, 0b0}, opcode, 8357 (outs VPR128:$Rd), (ins VPR128:$Rn), 8358 asmop # "\t$Rd.4s, $Rn.4s", 8359 [(set (ResTy4s VPR128:$Rd), 8360 (ResTy4s (Neon_Op (OpTy4s VPR128:$Rn))))], 8361 NoItinerary>; 8362 8363 def 2d : NeonI_2VMisc<0b1, U, {Size, 0b1}, opcode, 8364 (outs VPR128:$Rd), (ins VPR128:$Rn), 8365 asmop # "\t$Rd.2d, $Rn.2d", 8366 [(set (ResTy2d VPR128:$Rd), 8367 (ResTy2d (Neon_Op (OpTy2d VPR128:$Rn))))], 8368 NoItinerary>; 8369 8370 def 2s : NeonI_2VMisc<0b0, U, {Size, 0b0}, opcode, 8371 (outs VPR64:$Rd), (ins VPR64:$Rn), 8372 asmop # "\t$Rd.2s, $Rn.2s", 8373 [(set (ResTy2s VPR64:$Rd), 8374 (ResTy2s (Neon_Op (OpTy2s VPR64:$Rn))))], 8375 NoItinerary>; 8376} 8377 8378multiclass NeonI_2VMisc_fp_to_int<string asmop, bit Size, bit U, 8379 bits<5> opcode, SDPatternOperator Neon_Op> { 8380 defm _ : NeonI_2VMisc_SD_Conv<asmop, Size, U, opcode, v4i32, v4f32, v2i64, 8381 v2f64, v2i32, v2f32, Neon_Op>; 8382} 8383 8384defm FCVTNS : NeonI_2VMisc_fp_to_int<"fcvtns", 0b0, 0b0, 0b11010, 8385 int_aarch64_neon_fcvtns>; 8386defm FCVTNU : NeonI_2VMisc_fp_to_int<"fcvtnu", 0b0, 0b1, 0b11010, 8387 int_aarch64_neon_fcvtnu>; 8388defm FCVTPS : NeonI_2VMisc_fp_to_int<"fcvtps", 0b1, 0b0, 0b11010, 8389 int_aarch64_neon_fcvtps>; 8390defm FCVTPU : NeonI_2VMisc_fp_to_int<"fcvtpu", 0b1, 0b1, 0b11010, 8391 int_aarch64_neon_fcvtpu>; 8392defm FCVTMS : NeonI_2VMisc_fp_to_int<"fcvtms", 0b0, 0b0, 0b11011, 8393 int_aarch64_neon_fcvtms>; 8394defm FCVTMU : NeonI_2VMisc_fp_to_int<"fcvtmu", 0b0, 0b1, 0b11011, 8395 int_aarch64_neon_fcvtmu>; 8396defm FCVTZS : NeonI_2VMisc_fp_to_int<"fcvtzs", 0b1, 0b0, 0b11011, fp_to_sint>; 8397defm FCVTZU : NeonI_2VMisc_fp_to_int<"fcvtzu", 0b1, 0b1, 0b11011, fp_to_uint>; 8398defm FCVTAS : NeonI_2VMisc_fp_to_int<"fcvtas", 0b0, 0b0, 0b11100, 8399 int_aarch64_neon_fcvtas>; 8400defm FCVTAU : NeonI_2VMisc_fp_to_int<"fcvtau", 0b0, 0b1, 0b11100, 8401 int_aarch64_neon_fcvtau>; 8402 8403multiclass NeonI_2VMisc_int_to_fp<string asmop, bit Size, bit U, 8404 bits<5> opcode, SDPatternOperator Neon_Op> { 8405 defm _ : NeonI_2VMisc_SD_Conv<asmop, Size, U, opcode, v4f32, v4i32, v2f64, 8406 v2i64, v2f32, v2i32, Neon_Op>; 8407} 8408 8409defm SCVTF : NeonI_2VMisc_int_to_fp<"scvtf", 0b0, 0b0, 0b11101, sint_to_fp>; 8410defm UCVTF : NeonI_2VMisc_int_to_fp<"ucvtf", 0b0, 0b1, 0b11101, uint_to_fp>; 8411 8412multiclass NeonI_2VMisc_fp_to_fp<string asmop, bit Size, bit U, 8413 bits<5> opcode, SDPatternOperator Neon_Op> { 8414 defm _ : NeonI_2VMisc_SD_Conv<asmop, Size, U, opcode, v4f32, v4f32, v2f64, 8415 v2f64, v2f32, v2f32, Neon_Op>; 8416} 8417 8418defm FRINTN : NeonI_2VMisc_fp_to_fp<"frintn", 0b0, 0b0, 0b11000, 8419 int_aarch64_neon_frintn>; 8420defm FRINTA : NeonI_2VMisc_fp_to_fp<"frinta", 0b0, 0b1, 0b11000, frnd>; 8421defm FRINTP : NeonI_2VMisc_fp_to_fp<"frintp", 0b1, 0b0, 0b11000, fceil>; 8422defm FRINTM : NeonI_2VMisc_fp_to_fp<"frintm", 0b0, 0b0, 0b11001, ffloor>; 8423defm FRINTX : NeonI_2VMisc_fp_to_fp<"frintx", 0b0, 0b1, 0b11001, frint>; 8424defm FRINTZ : NeonI_2VMisc_fp_to_fp<"frintz", 0b1, 0b0, 0b11001, ftrunc>; 8425defm FRINTI : NeonI_2VMisc_fp_to_fp<"frinti", 0b1, 0b1, 0b11001, fnearbyint>; 8426defm FRECPE : NeonI_2VMisc_fp_to_fp<"frecpe", 0b1, 0b0, 0b11101, 8427 int_arm_neon_vrecpe>; 8428defm FRSQRTE : NeonI_2VMisc_fp_to_fp<"frsqrte", 0b1, 0b1, 0b11101, 8429 int_arm_neon_vrsqrte>; 8430defm FSQRT : NeonI_2VMisc_fp_to_fp<"fsqrt", 0b1, 0b1, 0b11111, fsqrt>; 8431 8432multiclass NeonI_2VMisc_S_Conv<string asmop, bit Size, bit U, 8433 bits<5> opcode, SDPatternOperator Neon_Op> { 8434 def 4s : NeonI_2VMisc<0b1, U, {Size, 0b0}, opcode, 8435 (outs VPR128:$Rd), (ins VPR128:$Rn), 8436 asmop # "\t$Rd.4s, $Rn.4s", 8437 [(set (v4i32 VPR128:$Rd), 8438 (v4i32 (Neon_Op (v4i32 VPR128:$Rn))))], 8439 NoItinerary>; 8440 8441 def 2s : NeonI_2VMisc<0b0, U, {Size, 0b0}, opcode, 8442 (outs VPR64:$Rd), (ins VPR64:$Rn), 8443 asmop # "\t$Rd.2s, $Rn.2s", 8444 [(set (v2i32 VPR64:$Rd), 8445 (v2i32 (Neon_Op (v2i32 VPR64:$Rn))))], 8446 NoItinerary>; 8447} 8448 8449defm URECPE : NeonI_2VMisc_S_Conv<"urecpe", 0b1, 0b0, 0b11100, 8450 int_arm_neon_vrecpe>; 8451defm URSQRTE : NeonI_2VMisc_S_Conv<"ursqrte", 0b1, 0b1, 0b11100, 8452 int_arm_neon_vrsqrte>; 8453 8454// Crypto Class 8455class NeonI_Cryptoaes_2v<bits<2> size, bits<5> opcode, 8456 string asmop, SDPatternOperator opnode> 8457 : NeonI_Crypto_AES<size, opcode, 8458 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), 8459 asmop # "\t$Rd.16b, $Rn.16b", 8460 [(set (v16i8 VPR128:$Rd), 8461 (v16i8 (opnode (v16i8 VPR128:$src), 8462 (v16i8 VPR128:$Rn))))], 8463 NoItinerary>{ 8464 let Constraints = "$src = $Rd"; 8465 let Predicates = [HasNEON, HasCrypto]; 8466} 8467 8468def AESE : NeonI_Cryptoaes_2v<0b00, 0b00100, "aese", int_arm_neon_aese>; 8469def AESD : NeonI_Cryptoaes_2v<0b00, 0b00101, "aesd", int_arm_neon_aesd>; 8470 8471class NeonI_Cryptoaes<bits<2> size, bits<5> opcode, 8472 string asmop, SDPatternOperator opnode> 8473 : NeonI_Crypto_AES<size, opcode, 8474 (outs VPR128:$Rd), (ins VPR128:$Rn), 8475 asmop # "\t$Rd.16b, $Rn.16b", 8476 [(set (v16i8 VPR128:$Rd), 8477 (v16i8 (opnode (v16i8 VPR128:$Rn))))], 8478 NoItinerary>; 8479 8480def AESMC : NeonI_Cryptoaes<0b00, 0b00110, "aesmc", int_arm_neon_aesmc>; 8481def AESIMC : NeonI_Cryptoaes<0b00, 0b00111, "aesimc", int_arm_neon_aesimc>; 8482 8483class NeonI_Cryptosha_vv<bits<2> size, bits<5> opcode, 8484 string asmop, SDPatternOperator opnode> 8485 : NeonI_Crypto_SHA<size, opcode, 8486 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), 8487 asmop # "\t$Rd.4s, $Rn.4s", 8488 [(set (v4i32 VPR128:$Rd), 8489 (v4i32 (opnode (v4i32 VPR128:$src), 8490 (v4i32 VPR128:$Rn))))], 8491 NoItinerary> { 8492 let Constraints = "$src = $Rd"; 8493 let Predicates = [HasNEON, HasCrypto]; 8494} 8495 8496def SHA1SU1 : NeonI_Cryptosha_vv<0b00, 0b00001, "sha1su1", 8497 int_arm_neon_sha1su1>; 8498def SHA256SU0 : NeonI_Cryptosha_vv<0b00, 0b00010, "sha256su0", 8499 int_arm_neon_sha256su0>; 8500 8501class NeonI_Cryptosha_ss<bits<2> size, bits<5> opcode, 8502 string asmop, SDPatternOperator opnode> 8503 : NeonI_Crypto_SHA<size, opcode, 8504 (outs FPR32:$Rd), (ins FPR32:$Rn), 8505 asmop # "\t$Rd, $Rn", 8506 [(set (v1i32 FPR32:$Rd), 8507 (v1i32 (opnode (v1i32 FPR32:$Rn))))], 8508 NoItinerary> { 8509 let Predicates = [HasNEON, HasCrypto]; 8510} 8511 8512def SHA1H : NeonI_Cryptosha_ss<0b00, 0b00000, "sha1h", int_arm_neon_sha1h>; 8513 8514class NeonI_Cryptosha3_vvv<bits<2> size, bits<3> opcode, string asmop, 8515 SDPatternOperator opnode> 8516 : NeonI_Crypto_3VSHA<size, opcode, 8517 (outs VPR128:$Rd), 8518 (ins VPR128:$src, VPR128:$Rn, VPR128:$Rm), 8519 asmop # "\t$Rd.4s, $Rn.4s, $Rm.4s", 8520 [(set (v4i32 VPR128:$Rd), 8521 (v4i32 (opnode (v4i32 VPR128:$src), 8522 (v4i32 VPR128:$Rn), 8523 (v4i32 VPR128:$Rm))))], 8524 NoItinerary> { 8525 let Constraints = "$src = $Rd"; 8526 let Predicates = [HasNEON, HasCrypto]; 8527} 8528 8529def SHA1SU0 : NeonI_Cryptosha3_vvv<0b00, 0b011, "sha1su0", 8530 int_arm_neon_sha1su0>; 8531def SHA256SU1 : NeonI_Cryptosha3_vvv<0b00, 0b110, "sha256su1", 8532 int_arm_neon_sha256su1>; 8533 8534class NeonI_Cryptosha3_qqv<bits<2> size, bits<3> opcode, string asmop, 8535 SDPatternOperator opnode> 8536 : NeonI_Crypto_3VSHA<size, opcode, 8537 (outs FPR128:$Rd), 8538 (ins FPR128:$src, FPR128:$Rn, VPR128:$Rm), 8539 asmop # "\t$Rd, $Rn, $Rm.4s", 8540 [(set (v4i32 FPR128:$Rd), 8541 (v4i32 (opnode (v4i32 FPR128:$src), 8542 (v4i32 FPR128:$Rn), 8543 (v4i32 VPR128:$Rm))))], 8544 NoItinerary> { 8545 let Constraints = "$src = $Rd"; 8546 let Predicates = [HasNEON, HasCrypto]; 8547} 8548 8549def SHA256H : NeonI_Cryptosha3_qqv<0b00, 0b100, "sha256h", 8550 int_arm_neon_sha256h>; 8551def SHA256H2 : NeonI_Cryptosha3_qqv<0b00, 0b101, "sha256h2", 8552 int_arm_neon_sha256h2>; 8553 8554class NeonI_Cryptosha3_qsv<bits<2> size, bits<3> opcode, string asmop, 8555 SDPatternOperator opnode> 8556 : NeonI_Crypto_3VSHA<size, opcode, 8557 (outs FPR128:$Rd), 8558 (ins FPR128:$src, FPR32:$Rn, VPR128:$Rm), 8559 asmop # "\t$Rd, $Rn, $Rm.4s", 8560 [(set (v4i32 FPR128:$Rd), 8561 (v4i32 (opnode (v4i32 FPR128:$src), 8562 (v1i32 FPR32:$Rn), 8563 (v4i32 VPR128:$Rm))))], 8564 NoItinerary> { 8565 let Constraints = "$src = $Rd"; 8566 let Predicates = [HasNEON, HasCrypto]; 8567} 8568 8569def SHA1C : NeonI_Cryptosha3_qsv<0b00, 0b000, "sha1c", int_aarch64_neon_sha1c>; 8570def SHA1P : NeonI_Cryptosha3_qsv<0b00, 0b001, "sha1p", int_aarch64_neon_sha1p>; 8571def SHA1M : NeonI_Cryptosha3_qsv<0b00, 0b010, "sha1m", int_aarch64_neon_sha1m>; 8572 8573// 8574// Patterns for handling half-precision values 8575// 8576 8577// Convert f16 value coming in as i16 value to f32 8578def : Pat<(f32 (f16_to_f32 (i32 (and (i32 GPR32:$Rn), 65535)))), 8579 (FCVTsh (EXTRACT_SUBREG (FMOVsw GPR32:$Rn), sub_16))>; 8580def : Pat<(f32 (f16_to_f32 (i32 (assertzext GPR32:$Rn)))), 8581 (FCVTsh (EXTRACT_SUBREG (FMOVsw GPR32:$Rn), sub_16))>; 8582 8583def : Pat<(f32 (f16_to_f32 (i32 (assertzext (i32 ( 8584 f32_to_f16 (f32 FPR32:$Rn))))))), 8585 (f32 FPR32:$Rn)>; 8586 8587// Patterns for vector extract of half-precision FP value in i16 storage type 8588def : Pat<(f32 (f16_to_f32 ( i32 (and (i32 (vector_extract 8589 (v4i16 VPR64:$Rn), neon_uimm2_bare:$Imm)), 65535)))), 8590 (FCVTsh (f16 (DUPhv_H 8591 (v8i16 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), 8592 neon_uimm2_bare:$Imm)))>; 8593 8594def : Pat<(f32 (f16_to_f32 ( i32 (and (i32 (vector_extract 8595 (v8i16 VPR128:$Rn), neon_uimm3_bare:$Imm)), 65535)))), 8596 (FCVTsh (f16 (DUPhv_H (v8i16 VPR128:$Rn), neon_uimm3_bare:$Imm)))>; 8597 8598// Patterns for vector insert of half-precision FP value 0 in i16 storage type 8599def : Pat<(v8i16 (vector_insert (v8i16 VPR128:$Rn), 8600 (i32 (assertsext (i32 (fp_to_sint(f32 (f16_to_f32 (i32 0))))))), 8601 (neon_uimm3_bare:$Imm))), 8602 (v8i16 (INSELh (v8i16 VPR128:$Rn), 8603 (v8i16 (SUBREG_TO_REG (i64 0), 8604 (f16 (EXTRACT_SUBREG (f32 (FMOVsw (i32 WZR))), sub_16)), 8605 sub_16)), 8606 neon_uimm3_bare:$Imm, 0))>; 8607 8608def : Pat<(v4i16 (vector_insert (v4i16 VPR64:$Rn), 8609 (i32 (assertsext (i32 (fp_to_sint(f32 (f16_to_f32 (i32 0))))))), 8610 (neon_uimm2_bare:$Imm))), 8611 (v4i16 (EXTRACT_SUBREG 8612 (v8i16 (INSELh 8613 (v8i16 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), 8614 (v8i16 (SUBREG_TO_REG (i64 0), 8615 (f16 (EXTRACT_SUBREG (f32 (FMOVsw (i32 WZR))), sub_16)), 8616 sub_16)), 8617 neon_uimm2_bare:$Imm, 0)), 8618 sub_64))>; 8619 8620// Patterns for vector insert of half-precision FP value in i16 storage type 8621def : Pat<(v8i16 (vector_insert (v8i16 VPR128:$Rn), 8622 (i32 (assertsext (i32 (fp_to_sint 8623 (f32 (f16_to_f32 (i32 (and (i32 GPR32:$src), 65535)))))))), 8624 (neon_uimm3_bare:$Imm))), 8625 (v8i16 (INSELh (v8i16 VPR128:$Rn), 8626 (v8i16 (SUBREG_TO_REG (i64 0), 8627 (f16 (EXTRACT_SUBREG (f32 (FMOVsw (i32 GPR32:$src))), sub_16)), 8628 sub_16)), 8629 neon_uimm3_bare:$Imm, 0))>; 8630 8631def : Pat<(v4i16 (vector_insert (v4i16 VPR64:$Rn), 8632 (i32 (assertsext (i32 (fp_to_sint 8633 (f32 (f16_to_f32 (i32 (and (i32 GPR32:$src), 65535)))))))), 8634 (neon_uimm2_bare:$Imm))), 8635 (v4i16 (EXTRACT_SUBREG 8636 (v8i16 (INSELh 8637 (v8i16 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), 8638 (v8i16 (SUBREG_TO_REG (i64 0), 8639 (f16 (EXTRACT_SUBREG (f32 (FMOVsw (i32 GPR32:$src))), sub_16)), 8640 sub_16)), 8641 neon_uimm2_bare:$Imm, 0)), 8642 sub_64))>; 8643 8644def : Pat<(v8i16 (vector_insert (v8i16 VPR128:$Rn), 8645 (i32 (vector_extract (v8i16 VPR128:$src), neon_uimm3_bare:$Imm2)), 8646 (neon_uimm3_bare:$Imm1))), 8647 (v8i16 (INSELh (v8i16 VPR128:$Rn), (v8i16 VPR128:$src), 8648 neon_uimm3_bare:$Imm1, neon_uimm3_bare:$Imm2))>; 8649 8650// Patterns for vector copy of half-precision FP value in i16 storage type 8651def : Pat<(v8i16 (vector_insert (v8i16 VPR128:$Rn), 8652 (i32 (assertsext (i32 (fp_to_sint(f32 (f16_to_f32 (i32 (and (i32 8653 (vector_extract (v8i16 VPR128:$src), neon_uimm3_bare:$Imm2)), 8654 65535)))))))), 8655 (neon_uimm3_bare:$Imm1))), 8656 (v8i16 (INSELh (v8i16 VPR128:$Rn), (v8i16 VPR128:$src), 8657 neon_uimm3_bare:$Imm1, neon_uimm3_bare:$Imm2))>; 8658 8659def : Pat<(v4i16 (vector_insert (v4i16 VPR64:$Rn), 8660 (i32 (assertsext (i32 (fp_to_sint(f32 (f16_to_f32 (i32 (and (i32 8661 (vector_extract (v4i16 VPR64:$src), neon_uimm3_bare:$Imm2)), 8662 65535)))))))), 8663 (neon_uimm3_bare:$Imm1))), 8664 (v4i16 (EXTRACT_SUBREG 8665 (v8i16 (INSELh 8666 (v8i16 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), 8667 (v8i16 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64)), 8668 neon_uimm3_bare:$Imm1, neon_uimm3_bare:$Imm2)), 8669 sub_64))>; 8670 8671 8672