X86InstrSSE.td revision 263508
1//===-- X86InstrSSE.td - SSE Instruction Set ---------------*- tablegen -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file describes the X86 SSE instruction set, defining the instructions, 11// and properties of the instructions which are needed for code generation, 12// machine code emission, and analysis. 13// 14//===----------------------------------------------------------------------===// 15 16class OpndItins<InstrItinClass arg_rr, InstrItinClass arg_rm> { 17 InstrItinClass rr = arg_rr; 18 InstrItinClass rm = arg_rm; 19 // InstrSchedModel info. 20 X86FoldableSchedWrite Sched = WriteFAdd; 21} 22 23class SizeItins<OpndItins arg_s, OpndItins arg_d> { 24 OpndItins s = arg_s; 25 OpndItins d = arg_d; 26} 27 28 29class ShiftOpndItins<InstrItinClass arg_rr, InstrItinClass arg_rm, 30 InstrItinClass arg_ri> { 31 InstrItinClass rr = arg_rr; 32 InstrItinClass rm = arg_rm; 33 InstrItinClass ri = arg_ri; 34} 35 36 37// scalar 38let Sched = WriteFAdd in { 39def SSE_ALU_F32S : OpndItins< 40 IIC_SSE_ALU_F32S_RR, IIC_SSE_ALU_F32S_RM 41>; 42 43def SSE_ALU_F64S : OpndItins< 44 IIC_SSE_ALU_F64S_RR, IIC_SSE_ALU_F64S_RM 45>; 46} 47 48def SSE_ALU_ITINS_S : SizeItins< 49 SSE_ALU_F32S, SSE_ALU_F64S 50>; 51 52let Sched = WriteFMul in { 53def SSE_MUL_F32S : OpndItins< 54 IIC_SSE_MUL_F32S_RR, IIC_SSE_MUL_F64S_RM 55>; 56 57def SSE_MUL_F64S : OpndItins< 58 IIC_SSE_MUL_F64S_RR, IIC_SSE_MUL_F64S_RM 59>; 60} 61 62def SSE_MUL_ITINS_S : SizeItins< 63 SSE_MUL_F32S, SSE_MUL_F64S 64>; 65 66let Sched = WriteFDiv in { 67def SSE_DIV_F32S : OpndItins< 68 IIC_SSE_DIV_F32S_RR, IIC_SSE_DIV_F64S_RM 69>; 70 71def SSE_DIV_F64S : OpndItins< 72 IIC_SSE_DIV_F64S_RR, IIC_SSE_DIV_F64S_RM 73>; 74} 75 76def SSE_DIV_ITINS_S : SizeItins< 77 SSE_DIV_F32S, SSE_DIV_F64S 78>; 79 80// parallel 81let Sched = WriteFAdd in { 82def SSE_ALU_F32P : OpndItins< 83 IIC_SSE_ALU_F32P_RR, IIC_SSE_ALU_F32P_RM 84>; 85 86def SSE_ALU_F64P : OpndItins< 87 IIC_SSE_ALU_F64P_RR, IIC_SSE_ALU_F64P_RM 88>; 89} 90 91def SSE_ALU_ITINS_P : SizeItins< 92 SSE_ALU_F32P, SSE_ALU_F64P 93>; 94 95let Sched = WriteFMul in { 96def SSE_MUL_F32P : OpndItins< 97 IIC_SSE_MUL_F32P_RR, IIC_SSE_MUL_F64P_RM 98>; 99 100def SSE_MUL_F64P : OpndItins< 101 IIC_SSE_MUL_F64P_RR, IIC_SSE_MUL_F64P_RM 102>; 103} 104 105def SSE_MUL_ITINS_P : SizeItins< 106 SSE_MUL_F32P, SSE_MUL_F64P 107>; 108 109let Sched = WriteFDiv in { 110def SSE_DIV_F32P : OpndItins< 111 IIC_SSE_DIV_F32P_RR, IIC_SSE_DIV_F64P_RM 112>; 113 114def SSE_DIV_F64P : OpndItins< 115 IIC_SSE_DIV_F64P_RR, IIC_SSE_DIV_F64P_RM 116>; 117} 118 119def SSE_DIV_ITINS_P : SizeItins< 120 SSE_DIV_F32P, SSE_DIV_F64P 121>; 122 123def SSE_BIT_ITINS_P : OpndItins< 124 IIC_SSE_BIT_P_RR, IIC_SSE_BIT_P_RM 125>; 126 127let Sched = WriteVecALU in { 128def SSE_INTALU_ITINS_P : OpndItins< 129 IIC_SSE_INTALU_P_RR, IIC_SSE_INTALU_P_RM 130>; 131 132def SSE_INTALUQ_ITINS_P : OpndItins< 133 IIC_SSE_INTALUQ_P_RR, IIC_SSE_INTALUQ_P_RM 134>; 135} 136 137let Sched = WriteVecIMul in 138def SSE_INTMUL_ITINS_P : OpndItins< 139 IIC_SSE_INTMUL_P_RR, IIC_SSE_INTMUL_P_RM 140>; 141 142def SSE_INTSHIFT_ITINS_P : ShiftOpndItins< 143 IIC_SSE_INTSH_P_RR, IIC_SSE_INTSH_P_RM, IIC_SSE_INTSH_P_RI 144>; 145 146def SSE_MOVA_ITINS : OpndItins< 147 IIC_SSE_MOVA_P_RR, IIC_SSE_MOVA_P_RM 148>; 149 150def SSE_MOVU_ITINS : OpndItins< 151 IIC_SSE_MOVU_P_RR, IIC_SSE_MOVU_P_RM 152>; 153 154def SSE_DPPD_ITINS : OpndItins< 155 IIC_SSE_DPPD_RR, IIC_SSE_DPPD_RM 156>; 157 158def SSE_DPPS_ITINS : OpndItins< 159 IIC_SSE_DPPS_RR, IIC_SSE_DPPD_RM 160>; 161 162def DEFAULT_ITINS : OpndItins< 163 IIC_ALU_NONMEM, IIC_ALU_MEM 164>; 165 166def SSE_EXTRACT_ITINS : OpndItins< 167 IIC_SSE_EXTRACTPS_RR, IIC_SSE_EXTRACTPS_RM 168>; 169 170def SSE_INSERT_ITINS : OpndItins< 171 IIC_SSE_INSERTPS_RR, IIC_SSE_INSERTPS_RM 172>; 173 174def SSE_MPSADBW_ITINS : OpndItins< 175 IIC_SSE_MPSADBW_RR, IIC_SSE_MPSADBW_RM 176>; 177 178def SSE_PMULLD_ITINS : OpndItins< 179 IIC_SSE_PMULLD_RR, IIC_SSE_PMULLD_RM 180>; 181 182//===----------------------------------------------------------------------===// 183// SSE 1 & 2 Instructions Classes 184//===----------------------------------------------------------------------===// 185 186/// sse12_fp_scalar - SSE 1 & 2 scalar instructions class 187multiclass sse12_fp_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode, 188 RegisterClass RC, X86MemOperand x86memop, 189 OpndItins itins, 190 bit Is2Addr = 1> { 191 let isCommutable = 1 in { 192 def rr : SI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), 193 !if(Is2Addr, 194 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 195 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 196 [(set RC:$dst, (OpNode RC:$src1, RC:$src2))], itins.rr>, 197 Sched<[itins.Sched]>; 198 } 199 def rm : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 200 !if(Is2Addr, 201 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 202 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 203 [(set RC:$dst, (OpNode RC:$src1, (load addr:$src2)))], itins.rm>, 204 Sched<[itins.Sched.Folded, ReadAfterLd]>; 205} 206 207/// sse12_fp_scalar_int - SSE 1 & 2 scalar instructions intrinsics class 208multiclass sse12_fp_scalar_int<bits<8> opc, string OpcodeStr, RegisterClass RC, 209 string asm, string SSEVer, string FPSizeStr, 210 Operand memopr, ComplexPattern mem_cpat, 211 OpndItins itins, 212 bit Is2Addr = 1> { 213 def rr_Int : SI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), 214 !if(Is2Addr, 215 !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), 216 !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 217 [(set RC:$dst, (!cast<Intrinsic>( 218 !strconcat("int_x86_sse", SSEVer, "_", OpcodeStr, FPSizeStr)) 219 RC:$src1, RC:$src2))], itins.rr>, 220 Sched<[itins.Sched]>; 221 def rm_Int : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, memopr:$src2), 222 !if(Is2Addr, 223 !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), 224 !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 225 [(set RC:$dst, (!cast<Intrinsic>(!strconcat("int_x86_sse", 226 SSEVer, "_", OpcodeStr, FPSizeStr)) 227 RC:$src1, mem_cpat:$src2))], itins.rm>, 228 Sched<[itins.Sched.Folded, ReadAfterLd]>; 229} 230 231/// sse12_fp_packed - SSE 1 & 2 packed instructions class 232multiclass sse12_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode, 233 RegisterClass RC, ValueType vt, 234 X86MemOperand x86memop, PatFrag mem_frag, 235 Domain d, OpndItins itins, bit Is2Addr = 1> { 236 let isCommutable = 1 in 237 def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), 238 !if(Is2Addr, 239 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 240 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 241 [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], itins.rr, d>, 242 Sched<[itins.Sched]>; 243 let mayLoad = 1 in 244 def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 245 !if(Is2Addr, 246 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 247 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 248 [(set RC:$dst, (OpNode RC:$src1, (mem_frag addr:$src2)))], 249 itins.rm, d>, 250 Sched<[itins.Sched.Folded, ReadAfterLd]>; 251} 252 253/// sse12_fp_packed_logical_rm - SSE 1 & 2 packed instructions class 254multiclass sse12_fp_packed_logical_rm<bits<8> opc, RegisterClass RC, Domain d, 255 string OpcodeStr, X86MemOperand x86memop, 256 list<dag> pat_rr, list<dag> pat_rm, 257 bit Is2Addr = 1> { 258 let isCommutable = 1, hasSideEffects = 0 in 259 def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), 260 !if(Is2Addr, 261 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 262 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 263 pat_rr, NoItinerary, d>, 264 Sched<[WriteVecLogic]>; 265 def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 266 !if(Is2Addr, 267 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 268 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 269 pat_rm, NoItinerary, d>, 270 Sched<[WriteVecLogicLd, ReadAfterLd]>; 271} 272 273//===----------------------------------------------------------------------===// 274// Non-instruction patterns 275//===----------------------------------------------------------------------===// 276 277// A vector extract of the first f32/f64 position is a subregister copy 278def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), 279 (COPY_TO_REGCLASS (v4f32 VR128:$src), FR32)>; 280def : Pat<(f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), 281 (COPY_TO_REGCLASS (v2f64 VR128:$src), FR64)>; 282 283// A 128-bit subvector extract from the first 256-bit vector position 284// is a subregister copy that needs no instruction. 285def : Pat<(v4i32 (extract_subvector (v8i32 VR256:$src), (iPTR 0))), 286 (v4i32 (EXTRACT_SUBREG (v8i32 VR256:$src), sub_xmm))>; 287def : Pat<(v4f32 (extract_subvector (v8f32 VR256:$src), (iPTR 0))), 288 (v4f32 (EXTRACT_SUBREG (v8f32 VR256:$src), sub_xmm))>; 289 290def : Pat<(v2i64 (extract_subvector (v4i64 VR256:$src), (iPTR 0))), 291 (v2i64 (EXTRACT_SUBREG (v4i64 VR256:$src), sub_xmm))>; 292def : Pat<(v2f64 (extract_subvector (v4f64 VR256:$src), (iPTR 0))), 293 (v2f64 (EXTRACT_SUBREG (v4f64 VR256:$src), sub_xmm))>; 294 295def : Pat<(v8i16 (extract_subvector (v16i16 VR256:$src), (iPTR 0))), 296 (v8i16 (EXTRACT_SUBREG (v16i16 VR256:$src), sub_xmm))>; 297def : Pat<(v16i8 (extract_subvector (v32i8 VR256:$src), (iPTR 0))), 298 (v16i8 (EXTRACT_SUBREG (v32i8 VR256:$src), sub_xmm))>; 299 300// A 128-bit subvector insert to the first 256-bit vector position 301// is a subregister copy that needs no instruction. 302let AddedComplexity = 25 in { // to give priority over vinsertf128rm 303def : Pat<(insert_subvector undef, (v2i64 VR128:$src), (iPTR 0)), 304 (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>; 305def : Pat<(insert_subvector undef, (v2f64 VR128:$src), (iPTR 0)), 306 (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>; 307def : Pat<(insert_subvector undef, (v4i32 VR128:$src), (iPTR 0)), 308 (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>; 309def : Pat<(insert_subvector undef, (v4f32 VR128:$src), (iPTR 0)), 310 (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>; 311def : Pat<(insert_subvector undef, (v8i16 VR128:$src), (iPTR 0)), 312 (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>; 313def : Pat<(insert_subvector undef, (v16i8 VR128:$src), (iPTR 0)), 314 (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>; 315} 316 317// Implicitly promote a 32-bit scalar to a vector. 318def : Pat<(v4f32 (scalar_to_vector FR32:$src)), 319 (COPY_TO_REGCLASS FR32:$src, VR128)>; 320def : Pat<(v8f32 (scalar_to_vector FR32:$src)), 321 (COPY_TO_REGCLASS FR32:$src, VR128)>; 322// Implicitly promote a 64-bit scalar to a vector. 323def : Pat<(v2f64 (scalar_to_vector FR64:$src)), 324 (COPY_TO_REGCLASS FR64:$src, VR128)>; 325def : Pat<(v4f64 (scalar_to_vector FR64:$src)), 326 (COPY_TO_REGCLASS FR64:$src, VR128)>; 327 328// Bitcasts between 128-bit vector types. Return the original type since 329// no instruction is needed for the conversion 330let Predicates = [HasSSE2] in { 331 def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>; 332 def : Pat<(v2i64 (bitconvert (v8i16 VR128:$src))), (v2i64 VR128:$src)>; 333 def : Pat<(v2i64 (bitconvert (v16i8 VR128:$src))), (v2i64 VR128:$src)>; 334 def : Pat<(v2i64 (bitconvert (v2f64 VR128:$src))), (v2i64 VR128:$src)>; 335 def : Pat<(v2i64 (bitconvert (v4f32 VR128:$src))), (v2i64 VR128:$src)>; 336 def : Pat<(v4i32 (bitconvert (v2i64 VR128:$src))), (v4i32 VR128:$src)>; 337 def : Pat<(v4i32 (bitconvert (v8i16 VR128:$src))), (v4i32 VR128:$src)>; 338 def : Pat<(v4i32 (bitconvert (v16i8 VR128:$src))), (v4i32 VR128:$src)>; 339 def : Pat<(v4i32 (bitconvert (v2f64 VR128:$src))), (v4i32 VR128:$src)>; 340 def : Pat<(v4i32 (bitconvert (v4f32 VR128:$src))), (v4i32 VR128:$src)>; 341 def : Pat<(v8i16 (bitconvert (v2i64 VR128:$src))), (v8i16 VR128:$src)>; 342 def : Pat<(v8i16 (bitconvert (v4i32 VR128:$src))), (v8i16 VR128:$src)>; 343 def : Pat<(v8i16 (bitconvert (v16i8 VR128:$src))), (v8i16 VR128:$src)>; 344 def : Pat<(v8i16 (bitconvert (v2f64 VR128:$src))), (v8i16 VR128:$src)>; 345 def : Pat<(v8i16 (bitconvert (v4f32 VR128:$src))), (v8i16 VR128:$src)>; 346 def : Pat<(v16i8 (bitconvert (v2i64 VR128:$src))), (v16i8 VR128:$src)>; 347 def : Pat<(v16i8 (bitconvert (v4i32 VR128:$src))), (v16i8 VR128:$src)>; 348 def : Pat<(v16i8 (bitconvert (v8i16 VR128:$src))), (v16i8 VR128:$src)>; 349 def : Pat<(v16i8 (bitconvert (v2f64 VR128:$src))), (v16i8 VR128:$src)>; 350 def : Pat<(v16i8 (bitconvert (v4f32 VR128:$src))), (v16i8 VR128:$src)>; 351 def : Pat<(v4f32 (bitconvert (v2i64 VR128:$src))), (v4f32 VR128:$src)>; 352 def : Pat<(v4f32 (bitconvert (v4i32 VR128:$src))), (v4f32 VR128:$src)>; 353 def : Pat<(v4f32 (bitconvert (v8i16 VR128:$src))), (v4f32 VR128:$src)>; 354 def : Pat<(v4f32 (bitconvert (v16i8 VR128:$src))), (v4f32 VR128:$src)>; 355 def : Pat<(v4f32 (bitconvert (v2f64 VR128:$src))), (v4f32 VR128:$src)>; 356 def : Pat<(v2f64 (bitconvert (v2i64 VR128:$src))), (v2f64 VR128:$src)>; 357 def : Pat<(v2f64 (bitconvert (v4i32 VR128:$src))), (v2f64 VR128:$src)>; 358 def : Pat<(v2f64 (bitconvert (v8i16 VR128:$src))), (v2f64 VR128:$src)>; 359 def : Pat<(v2f64 (bitconvert (v16i8 VR128:$src))), (v2f64 VR128:$src)>; 360 def : Pat<(v2f64 (bitconvert (v4f32 VR128:$src))), (v2f64 VR128:$src)>; 361} 362 363// Bitcasts between 256-bit vector types. Return the original type since 364// no instruction is needed for the conversion 365let Predicates = [HasAVX] in { 366 def : Pat<(v4f64 (bitconvert (v8f32 VR256:$src))), (v4f64 VR256:$src)>; 367 def : Pat<(v4f64 (bitconvert (v8i32 VR256:$src))), (v4f64 VR256:$src)>; 368 def : Pat<(v4f64 (bitconvert (v4i64 VR256:$src))), (v4f64 VR256:$src)>; 369 def : Pat<(v4f64 (bitconvert (v16i16 VR256:$src))), (v4f64 VR256:$src)>; 370 def : Pat<(v4f64 (bitconvert (v32i8 VR256:$src))), (v4f64 VR256:$src)>; 371 def : Pat<(v8f32 (bitconvert (v8i32 VR256:$src))), (v8f32 VR256:$src)>; 372 def : Pat<(v8f32 (bitconvert (v4i64 VR256:$src))), (v8f32 VR256:$src)>; 373 def : Pat<(v8f32 (bitconvert (v4f64 VR256:$src))), (v8f32 VR256:$src)>; 374 def : Pat<(v8f32 (bitconvert (v32i8 VR256:$src))), (v8f32 VR256:$src)>; 375 def : Pat<(v8f32 (bitconvert (v16i16 VR256:$src))), (v8f32 VR256:$src)>; 376 def : Pat<(v4i64 (bitconvert (v8f32 VR256:$src))), (v4i64 VR256:$src)>; 377 def : Pat<(v4i64 (bitconvert (v8i32 VR256:$src))), (v4i64 VR256:$src)>; 378 def : Pat<(v4i64 (bitconvert (v4f64 VR256:$src))), (v4i64 VR256:$src)>; 379 def : Pat<(v4i64 (bitconvert (v32i8 VR256:$src))), (v4i64 VR256:$src)>; 380 def : Pat<(v4i64 (bitconvert (v16i16 VR256:$src))), (v4i64 VR256:$src)>; 381 def : Pat<(v32i8 (bitconvert (v4f64 VR256:$src))), (v32i8 VR256:$src)>; 382 def : Pat<(v32i8 (bitconvert (v4i64 VR256:$src))), (v32i8 VR256:$src)>; 383 def : Pat<(v32i8 (bitconvert (v8f32 VR256:$src))), (v32i8 VR256:$src)>; 384 def : Pat<(v32i8 (bitconvert (v8i32 VR256:$src))), (v32i8 VR256:$src)>; 385 def : Pat<(v32i8 (bitconvert (v16i16 VR256:$src))), (v32i8 VR256:$src)>; 386 def : Pat<(v8i32 (bitconvert (v32i8 VR256:$src))), (v8i32 VR256:$src)>; 387 def : Pat<(v8i32 (bitconvert (v16i16 VR256:$src))), (v8i32 VR256:$src)>; 388 def : Pat<(v8i32 (bitconvert (v8f32 VR256:$src))), (v8i32 VR256:$src)>; 389 def : Pat<(v8i32 (bitconvert (v4i64 VR256:$src))), (v8i32 VR256:$src)>; 390 def : Pat<(v8i32 (bitconvert (v4f64 VR256:$src))), (v8i32 VR256:$src)>; 391 def : Pat<(v16i16 (bitconvert (v8f32 VR256:$src))), (v16i16 VR256:$src)>; 392 def : Pat<(v16i16 (bitconvert (v8i32 VR256:$src))), (v16i16 VR256:$src)>; 393 def : Pat<(v16i16 (bitconvert (v4i64 VR256:$src))), (v16i16 VR256:$src)>; 394 def : Pat<(v16i16 (bitconvert (v4f64 VR256:$src))), (v16i16 VR256:$src)>; 395 def : Pat<(v16i16 (bitconvert (v32i8 VR256:$src))), (v16i16 VR256:$src)>; 396} 397 398// Alias instructions that map fld0 to xorps for sse or vxorps for avx. 399// This is expanded by ExpandPostRAPseudos. 400let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 401 isPseudo = 1, SchedRW = [WriteZero] in { 402 def FsFLD0SS : I<0, Pseudo, (outs FR32:$dst), (ins), "", 403 [(set FR32:$dst, fp32imm0)]>, Requires<[HasSSE1]>; 404 def FsFLD0SD : I<0, Pseudo, (outs FR64:$dst), (ins), "", 405 [(set FR64:$dst, fpimm0)]>, Requires<[HasSSE2]>; 406} 407 408//===----------------------------------------------------------------------===// 409// AVX & SSE - Zero/One Vectors 410//===----------------------------------------------------------------------===// 411 412// Alias instruction that maps zero vector to pxor / xorp* for sse. 413// This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then 414// swizzled by ExecutionDepsFix to pxor. 415// We set canFoldAsLoad because this can be converted to a constant-pool 416// load of an all-zeros value if folding it would be beneficial. 417let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 418 isPseudo = 1, SchedRW = [WriteZero] in { 419def V_SET0 : I<0, Pseudo, (outs VR128:$dst), (ins), "", 420 [(set VR128:$dst, (v4f32 immAllZerosV))]>; 421} 422 423def : Pat<(v2f64 immAllZerosV), (V_SET0)>; 424def : Pat<(v4i32 immAllZerosV), (V_SET0)>; 425def : Pat<(v2i64 immAllZerosV), (V_SET0)>; 426def : Pat<(v8i16 immAllZerosV), (V_SET0)>; 427def : Pat<(v16i8 immAllZerosV), (V_SET0)>; 428 429 430// The same as done above but for AVX. The 256-bit AVX1 ISA doesn't support PI, 431// and doesn't need it because on sandy bridge the register is set to zero 432// at the rename stage without using any execution unit, so SET0PSY 433// and SET0PDY can be used for vector int instructions without penalty 434let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 435 isPseudo = 1, Predicates = [HasAVX], SchedRW = [WriteZero] in { 436def AVX_SET0 : I<0, Pseudo, (outs VR256:$dst), (ins), "", 437 [(set VR256:$dst, (v8f32 immAllZerosV))]>; 438} 439 440let Predicates = [HasAVX] in 441 def : Pat<(v4f64 immAllZerosV), (AVX_SET0)>; 442 443let Predicates = [HasAVX2] in { 444 def : Pat<(v4i64 immAllZerosV), (AVX_SET0)>; 445 def : Pat<(v8i32 immAllZerosV), (AVX_SET0)>; 446 def : Pat<(v16i16 immAllZerosV), (AVX_SET0)>; 447 def : Pat<(v32i8 immAllZerosV), (AVX_SET0)>; 448} 449 450// AVX1 has no support for 256-bit integer instructions, but since the 128-bit 451// VPXOR instruction writes zero to its upper part, it's safe build zeros. 452let Predicates = [HasAVX1Only] in { 453def : Pat<(v32i8 immAllZerosV), (SUBREG_TO_REG (i8 0), (V_SET0), sub_xmm)>; 454def : Pat<(bc_v32i8 (v8f32 immAllZerosV)), 455 (SUBREG_TO_REG (i8 0), (V_SET0), sub_xmm)>; 456 457def : Pat<(v16i16 immAllZerosV), (SUBREG_TO_REG (i16 0), (V_SET0), sub_xmm)>; 458def : Pat<(bc_v16i16 (v8f32 immAllZerosV)), 459 (SUBREG_TO_REG (i16 0), (V_SET0), sub_xmm)>; 460 461def : Pat<(v8i32 immAllZerosV), (SUBREG_TO_REG (i32 0), (V_SET0), sub_xmm)>; 462def : Pat<(bc_v8i32 (v8f32 immAllZerosV)), 463 (SUBREG_TO_REG (i32 0), (V_SET0), sub_xmm)>; 464 465def : Pat<(v4i64 immAllZerosV), (SUBREG_TO_REG (i64 0), (V_SET0), sub_xmm)>; 466def : Pat<(bc_v4i64 (v8f32 immAllZerosV)), 467 (SUBREG_TO_REG (i64 0), (V_SET0), sub_xmm)>; 468} 469 470// We set canFoldAsLoad because this can be converted to a constant-pool 471// load of an all-ones value if folding it would be beneficial. 472let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 473 isPseudo = 1, SchedRW = [WriteZero] in { 474 def V_SETALLONES : I<0, Pseudo, (outs VR128:$dst), (ins), "", 475 [(set VR128:$dst, (v4i32 immAllOnesV))]>; 476 let Predicates = [HasAVX2] in 477 def AVX2_SETALLONES : I<0, Pseudo, (outs VR256:$dst), (ins), "", 478 [(set VR256:$dst, (v8i32 immAllOnesV))]>; 479} 480 481 482//===----------------------------------------------------------------------===// 483// SSE 1 & 2 - Move FP Scalar Instructions 484// 485// Move Instructions. Register-to-register movss/movsd is not used for FR32/64 486// register copies because it's a partial register update; Register-to-register 487// movss/movsd is not modeled as an INSERT_SUBREG because INSERT_SUBREG requires 488// that the insert be implementable in terms of a copy, and just mentioned, we 489// don't use movss/movsd for copies. 490//===----------------------------------------------------------------------===// 491 492multiclass sse12_move_rr<RegisterClass RC, SDNode OpNode, ValueType vt, 493 X86MemOperand x86memop, string base_opc, 494 string asm_opr> { 495 def rr : SI<0x10, MRMSrcReg, (outs VR128:$dst), 496 (ins VR128:$src1, RC:$src2), 497 !strconcat(base_opc, asm_opr), 498 [(set VR128:$dst, (vt (OpNode VR128:$src1, 499 (scalar_to_vector RC:$src2))))], 500 IIC_SSE_MOV_S_RR>, Sched<[WriteMove]>; 501 502 // For the disassembler 503 let isCodeGenOnly = 1, hasSideEffects = 0 in 504 def rr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst), 505 (ins VR128:$src1, RC:$src2), 506 !strconcat(base_opc, asm_opr), 507 [], IIC_SSE_MOV_S_RR>, Sched<[WriteMove]>; 508} 509 510multiclass sse12_move<RegisterClass RC, SDNode OpNode, ValueType vt, 511 X86MemOperand x86memop, string OpcodeStr> { 512 // AVX 513 defm V#NAME : sse12_move_rr<RC, OpNode, vt, x86memop, OpcodeStr, 514 "\t{$src2, $src1, $dst|$dst, $src1, $src2}">, 515 VEX_4V, VEX_LIG; 516 517 def V#NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src), 518 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 519 [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>, 520 VEX, VEX_LIG, Sched<[WriteStore]>; 521 // SSE1 & 2 522 let Constraints = "$src1 = $dst" in { 523 defm NAME : sse12_move_rr<RC, OpNode, vt, x86memop, OpcodeStr, 524 "\t{$src2, $dst|$dst, $src2}">; 525 } 526 527 def NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src), 528 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 529 [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>, 530 Sched<[WriteStore]>; 531} 532 533// Loading from memory automatically zeroing upper bits. 534multiclass sse12_move_rm<RegisterClass RC, X86MemOperand x86memop, 535 PatFrag mem_pat, string OpcodeStr> { 536 def V#NAME#rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), 537 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 538 [(set RC:$dst, (mem_pat addr:$src))], 539 IIC_SSE_MOV_S_RM>, VEX, VEX_LIG, Sched<[WriteLoad]>; 540 def NAME#rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), 541 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 542 [(set RC:$dst, (mem_pat addr:$src))], 543 IIC_SSE_MOV_S_RM>, Sched<[WriteLoad]>; 544} 545 546defm MOVSS : sse12_move<FR32, X86Movss, v4f32, f32mem, "movss">, XS; 547defm MOVSD : sse12_move<FR64, X86Movsd, v2f64, f64mem, "movsd">, XD; 548 549let canFoldAsLoad = 1, isReMaterializable = 1 in { 550 defm MOVSS : sse12_move_rm<FR32, f32mem, loadf32, "movss">, XS; 551 552 let AddedComplexity = 20 in 553 defm MOVSD : sse12_move_rm<FR64, f64mem, loadf64, "movsd">, XD; 554} 555 556// Patterns 557let Predicates = [UseAVX] in { 558 let AddedComplexity = 15 in { 559 // Move scalar to XMM zero-extended, zeroing a VR128 then do a 560 // MOVS{S,D} to the lower bits. 561 def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))), 562 (VMOVSSrr (v4f32 (V_SET0)), FR32:$src)>; 563 def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))), 564 (VMOVSSrr (v4f32 (V_SET0)), (COPY_TO_REGCLASS VR128:$src, FR32))>; 565 def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))), 566 (VMOVSSrr (v4i32 (V_SET0)), (COPY_TO_REGCLASS VR128:$src, FR32))>; 567 def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))), 568 (VMOVSDrr (v2f64 (V_SET0)), FR64:$src)>; 569 570 // Move low f32 and clear high bits. 571 def : Pat<(v8f32 (X86vzmovl (v8f32 VR256:$src))), 572 (SUBREG_TO_REG (i32 0), 573 (VMOVSSrr (v4f32 (V_SET0)), 574 (EXTRACT_SUBREG (v8f32 VR256:$src), sub_xmm)), sub_xmm)>; 575 def : Pat<(v8i32 (X86vzmovl (v8i32 VR256:$src))), 576 (SUBREG_TO_REG (i32 0), 577 (VMOVSSrr (v4i32 (V_SET0)), 578 (EXTRACT_SUBREG (v8i32 VR256:$src), sub_xmm)), sub_xmm)>; 579 } 580 581 let AddedComplexity = 20 in { 582 // MOVSSrm zeros the high parts of the register; represent this 583 // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0 584 def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))), 585 (COPY_TO_REGCLASS (VMOVSSrm addr:$src), VR128)>; 586 def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))), 587 (COPY_TO_REGCLASS (VMOVSSrm addr:$src), VR128)>; 588 def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))), 589 (COPY_TO_REGCLASS (VMOVSSrm addr:$src), VR128)>; 590 591 // MOVSDrm zeros the high parts of the register; represent this 592 // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0 593 def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))), 594 (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>; 595 def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))), 596 (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>; 597 def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))), 598 (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>; 599 def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))), 600 (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>; 601 def : Pat<(v2f64 (X86vzload addr:$src)), 602 (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>; 603 604 // Represent the same patterns above but in the form they appear for 605 // 256-bit types 606 def : Pat<(v8i32 (X86vzmovl (insert_subvector undef, 607 (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))), 608 (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_xmm)>; 609 def : Pat<(v8f32 (X86vzmovl (insert_subvector undef, 610 (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))), 611 (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_xmm)>; 612 def : Pat<(v4f64 (X86vzmovl (insert_subvector undef, 613 (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))), 614 (SUBREG_TO_REG (i32 0), (VMOVSDrm addr:$src), sub_xmm)>; 615 } 616 def : Pat<(v8f32 (X86vzmovl (insert_subvector undef, 617 (v4f32 (scalar_to_vector FR32:$src)), (iPTR 0)))), 618 (SUBREG_TO_REG (i32 0), 619 (v4f32 (VMOVSSrr (v4f32 (V_SET0)), FR32:$src)), 620 sub_xmm)>; 621 def : Pat<(v4f64 (X86vzmovl (insert_subvector undef, 622 (v2f64 (scalar_to_vector FR64:$src)), (iPTR 0)))), 623 (SUBREG_TO_REG (i64 0), 624 (v2f64 (VMOVSDrr (v2f64 (V_SET0)), FR64:$src)), 625 sub_xmm)>; 626 def : Pat<(v4i64 (X86vzmovl (insert_subvector undef, 627 (v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))), 628 (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_xmm)>; 629 630 // Move low f64 and clear high bits. 631 def : Pat<(v4f64 (X86vzmovl (v4f64 VR256:$src))), 632 (SUBREG_TO_REG (i32 0), 633 (VMOVSDrr (v2f64 (V_SET0)), 634 (EXTRACT_SUBREG (v4f64 VR256:$src), sub_xmm)), sub_xmm)>; 635 636 def : Pat<(v4i64 (X86vzmovl (v4i64 VR256:$src))), 637 (SUBREG_TO_REG (i32 0), 638 (VMOVSDrr (v2i64 (V_SET0)), 639 (EXTRACT_SUBREG (v4i64 VR256:$src), sub_xmm)), sub_xmm)>; 640 641 // Extract and store. 642 def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), 643 addr:$dst), 644 (VMOVSSmr addr:$dst, (COPY_TO_REGCLASS (v4f32 VR128:$src), FR32))>; 645 def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), 646 addr:$dst), 647 (VMOVSDmr addr:$dst, (COPY_TO_REGCLASS (v2f64 VR128:$src), FR64))>; 648 649 // Shuffle with VMOVSS 650 def : Pat<(v4i32 (X86Movss VR128:$src1, VR128:$src2)), 651 (VMOVSSrr (v4i32 VR128:$src1), 652 (COPY_TO_REGCLASS (v4i32 VR128:$src2), FR32))>; 653 def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)), 654 (VMOVSSrr (v4f32 VR128:$src1), 655 (COPY_TO_REGCLASS (v4f32 VR128:$src2), FR32))>; 656 657 // 256-bit variants 658 def : Pat<(v8i32 (X86Movss VR256:$src1, VR256:$src2)), 659 (SUBREG_TO_REG (i32 0), 660 (VMOVSSrr (EXTRACT_SUBREG (v8i32 VR256:$src1), sub_xmm), 661 (EXTRACT_SUBREG (v8i32 VR256:$src2), sub_xmm)), 662 sub_xmm)>; 663 def : Pat<(v8f32 (X86Movss VR256:$src1, VR256:$src2)), 664 (SUBREG_TO_REG (i32 0), 665 (VMOVSSrr (EXTRACT_SUBREG (v8f32 VR256:$src1), sub_xmm), 666 (EXTRACT_SUBREG (v8f32 VR256:$src2), sub_xmm)), 667 sub_xmm)>; 668 669 // Shuffle with VMOVSD 670 def : Pat<(v2i64 (X86Movsd VR128:$src1, VR128:$src2)), 671 (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; 672 def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)), 673 (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; 674 def : Pat<(v4f32 (X86Movsd VR128:$src1, VR128:$src2)), 675 (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; 676 def : Pat<(v4i32 (X86Movsd VR128:$src1, VR128:$src2)), 677 (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; 678 679 // 256-bit variants 680 def : Pat<(v4i64 (X86Movsd VR256:$src1, VR256:$src2)), 681 (SUBREG_TO_REG (i32 0), 682 (VMOVSDrr (EXTRACT_SUBREG (v4i64 VR256:$src1), sub_xmm), 683 (EXTRACT_SUBREG (v4i64 VR256:$src2), sub_xmm)), 684 sub_xmm)>; 685 def : Pat<(v4f64 (X86Movsd VR256:$src1, VR256:$src2)), 686 (SUBREG_TO_REG (i32 0), 687 (VMOVSDrr (EXTRACT_SUBREG (v4f64 VR256:$src1), sub_xmm), 688 (EXTRACT_SUBREG (v4f64 VR256:$src2), sub_xmm)), 689 sub_xmm)>; 690 691 692 // FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem 693 // is during lowering, where it's not possible to recognize the fold cause 694 // it has two uses through a bitcast. One use disappears at isel time and the 695 // fold opportunity reappears. 696 def : Pat<(v2f64 (X86Movlpd VR128:$src1, VR128:$src2)), 697 (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; 698 def : Pat<(v2i64 (X86Movlpd VR128:$src1, VR128:$src2)), 699 (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; 700 def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)), 701 (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; 702 def : Pat<(v4i32 (X86Movlps VR128:$src1, VR128:$src2)), 703 (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; 704} 705 706let Predicates = [UseSSE1] in { 707 let AddedComplexity = 15 in { 708 // Move scalar to XMM zero-extended, zeroing a VR128 then do a 709 // MOVSS to the lower bits. 710 def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))), 711 (MOVSSrr (v4f32 (V_SET0)), FR32:$src)>; 712 def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))), 713 (MOVSSrr (v4f32 (V_SET0)), (COPY_TO_REGCLASS VR128:$src, FR32))>; 714 def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))), 715 (MOVSSrr (v4i32 (V_SET0)), (COPY_TO_REGCLASS VR128:$src, FR32))>; 716 } 717 718 let AddedComplexity = 20 in { 719 // MOVSSrm already zeros the high parts of the register. 720 def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))), 721 (COPY_TO_REGCLASS (MOVSSrm addr:$src), VR128)>; 722 def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))), 723 (COPY_TO_REGCLASS (MOVSSrm addr:$src), VR128)>; 724 def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))), 725 (COPY_TO_REGCLASS (MOVSSrm addr:$src), VR128)>; 726 } 727 728 // Extract and store. 729 def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), 730 addr:$dst), 731 (MOVSSmr addr:$dst, (COPY_TO_REGCLASS VR128:$src, FR32))>; 732 733 // Shuffle with MOVSS 734 def : Pat<(v4i32 (X86Movss VR128:$src1, VR128:$src2)), 735 (MOVSSrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR32))>; 736 def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)), 737 (MOVSSrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR32))>; 738} 739 740let Predicates = [UseSSE2] in { 741 let AddedComplexity = 15 in { 742 // Move scalar to XMM zero-extended, zeroing a VR128 then do a 743 // MOVSD to the lower bits. 744 def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))), 745 (MOVSDrr (v2f64 (V_SET0)), FR64:$src)>; 746 } 747 748 let AddedComplexity = 20 in { 749 // MOVSDrm already zeros the high parts of the register. 750 def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))), 751 (COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>; 752 def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))), 753 (COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>; 754 def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))), 755 (COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>; 756 def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))), 757 (COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>; 758 def : Pat<(v2f64 (X86vzload addr:$src)), 759 (COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>; 760 } 761 762 // Extract and store. 763 def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), 764 addr:$dst), 765 (MOVSDmr addr:$dst, (COPY_TO_REGCLASS VR128:$src, FR64))>; 766 767 // Shuffle with MOVSD 768 def : Pat<(v2i64 (X86Movsd VR128:$src1, VR128:$src2)), 769 (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; 770 def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)), 771 (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; 772 def : Pat<(v4f32 (X86Movsd VR128:$src1, VR128:$src2)), 773 (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; 774 def : Pat<(v4i32 (X86Movsd VR128:$src1, VR128:$src2)), 775 (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; 776 777 // FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem 778 // is during lowering, where it's not possible to recognize the fold cause 779 // it has two uses through a bitcast. One use disappears at isel time and the 780 // fold opportunity reappears. 781 def : Pat<(v2f64 (X86Movlpd VR128:$src1, VR128:$src2)), 782 (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; 783 def : Pat<(v2i64 (X86Movlpd VR128:$src1, VR128:$src2)), 784 (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; 785 def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)), 786 (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; 787 def : Pat<(v4i32 (X86Movlps VR128:$src1, VR128:$src2)), 788 (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; 789} 790 791//===----------------------------------------------------------------------===// 792// SSE 1 & 2 - Move Aligned/Unaligned FP Instructions 793//===----------------------------------------------------------------------===// 794 795multiclass sse12_mov_packed<bits<8> opc, RegisterClass RC, 796 X86MemOperand x86memop, PatFrag ld_frag, 797 string asm, Domain d, 798 OpndItins itins, 799 bit IsReMaterializable = 1> { 800let neverHasSideEffects = 1 in 801 def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src), 802 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], itins.rr, d>, 803 Sched<[WriteMove]>; 804let canFoldAsLoad = 1, isReMaterializable = IsReMaterializable in 805 def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), 806 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 807 [(set RC:$dst, (ld_frag addr:$src))], itins.rm, d>, 808 Sched<[WriteLoad]>; 809} 810 811defm VMOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32, 812 "movaps", SSEPackedSingle, SSE_MOVA_ITINS>, 813 TB, VEX; 814defm VMOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64, 815 "movapd", SSEPackedDouble, SSE_MOVA_ITINS>, 816 TB, OpSize, VEX; 817defm VMOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32, 818 "movups", SSEPackedSingle, SSE_MOVU_ITINS>, 819 TB, VEX; 820defm VMOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64, 821 "movupd", SSEPackedDouble, SSE_MOVU_ITINS, 0>, 822 TB, OpSize, VEX; 823 824defm VMOVAPSY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv8f32, 825 "movaps", SSEPackedSingle, SSE_MOVA_ITINS>, 826 TB, VEX, VEX_L; 827defm VMOVAPDY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv4f64, 828 "movapd", SSEPackedDouble, SSE_MOVA_ITINS>, 829 TB, OpSize, VEX, VEX_L; 830defm VMOVUPSY : sse12_mov_packed<0x10, VR256, f256mem, loadv8f32, 831 "movups", SSEPackedSingle, SSE_MOVU_ITINS>, 832 TB, VEX, VEX_L; 833defm VMOVUPDY : sse12_mov_packed<0x10, VR256, f256mem, loadv4f64, 834 "movupd", SSEPackedDouble, SSE_MOVU_ITINS, 0>, 835 TB, OpSize, VEX, VEX_L; 836defm MOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32, 837 "movaps", SSEPackedSingle, SSE_MOVA_ITINS>, 838 TB; 839defm MOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64, 840 "movapd", SSEPackedDouble, SSE_MOVA_ITINS>, 841 TB, OpSize; 842defm MOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32, 843 "movups", SSEPackedSingle, SSE_MOVU_ITINS>, 844 TB; 845defm MOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64, 846 "movupd", SSEPackedDouble, SSE_MOVU_ITINS, 0>, 847 TB, OpSize; 848 849let SchedRW = [WriteStore] in { 850def VMOVAPSmr : VPSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 851 "movaps\t{$src, $dst|$dst, $src}", 852 [(alignedstore (v4f32 VR128:$src), addr:$dst)], 853 IIC_SSE_MOVA_P_MR>, VEX; 854def VMOVAPDmr : VPDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 855 "movapd\t{$src, $dst|$dst, $src}", 856 [(alignedstore (v2f64 VR128:$src), addr:$dst)], 857 IIC_SSE_MOVA_P_MR>, VEX; 858def VMOVUPSmr : VPSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 859 "movups\t{$src, $dst|$dst, $src}", 860 [(store (v4f32 VR128:$src), addr:$dst)], 861 IIC_SSE_MOVU_P_MR>, VEX; 862def VMOVUPDmr : VPDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 863 "movupd\t{$src, $dst|$dst, $src}", 864 [(store (v2f64 VR128:$src), addr:$dst)], 865 IIC_SSE_MOVU_P_MR>, VEX; 866def VMOVAPSYmr : VPSI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), 867 "movaps\t{$src, $dst|$dst, $src}", 868 [(alignedstore256 (v8f32 VR256:$src), addr:$dst)], 869 IIC_SSE_MOVA_P_MR>, VEX, VEX_L; 870def VMOVAPDYmr : VPDI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), 871 "movapd\t{$src, $dst|$dst, $src}", 872 [(alignedstore256 (v4f64 VR256:$src), addr:$dst)], 873 IIC_SSE_MOVA_P_MR>, VEX, VEX_L; 874def VMOVUPSYmr : VPSI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), 875 "movups\t{$src, $dst|$dst, $src}", 876 [(store (v8f32 VR256:$src), addr:$dst)], 877 IIC_SSE_MOVU_P_MR>, VEX, VEX_L; 878def VMOVUPDYmr : VPDI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), 879 "movupd\t{$src, $dst|$dst, $src}", 880 [(store (v4f64 VR256:$src), addr:$dst)], 881 IIC_SSE_MOVU_P_MR>, VEX, VEX_L; 882} // SchedRW 883 884// For disassembler 885let isCodeGenOnly = 1, hasSideEffects = 0, SchedRW = [WriteMove] in { 886 def VMOVAPSrr_REV : VPSI<0x29, MRMDestReg, (outs VR128:$dst), 887 (ins VR128:$src), 888 "movaps\t{$src, $dst|$dst, $src}", [], 889 IIC_SSE_MOVA_P_RR>, VEX; 890 def VMOVAPDrr_REV : VPDI<0x29, MRMDestReg, (outs VR128:$dst), 891 (ins VR128:$src), 892 "movapd\t{$src, $dst|$dst, $src}", [], 893 IIC_SSE_MOVA_P_RR>, VEX; 894 def VMOVUPSrr_REV : VPSI<0x11, MRMDestReg, (outs VR128:$dst), 895 (ins VR128:$src), 896 "movups\t{$src, $dst|$dst, $src}", [], 897 IIC_SSE_MOVU_P_RR>, VEX; 898 def VMOVUPDrr_REV : VPDI<0x11, MRMDestReg, (outs VR128:$dst), 899 (ins VR128:$src), 900 "movupd\t{$src, $dst|$dst, $src}", [], 901 IIC_SSE_MOVU_P_RR>, VEX; 902 def VMOVAPSYrr_REV : VPSI<0x29, MRMDestReg, (outs VR256:$dst), 903 (ins VR256:$src), 904 "movaps\t{$src, $dst|$dst, $src}", [], 905 IIC_SSE_MOVA_P_RR>, VEX, VEX_L; 906 def VMOVAPDYrr_REV : VPDI<0x29, MRMDestReg, (outs VR256:$dst), 907 (ins VR256:$src), 908 "movapd\t{$src, $dst|$dst, $src}", [], 909 IIC_SSE_MOVA_P_RR>, VEX, VEX_L; 910 def VMOVUPSYrr_REV : VPSI<0x11, MRMDestReg, (outs VR256:$dst), 911 (ins VR256:$src), 912 "movups\t{$src, $dst|$dst, $src}", [], 913 IIC_SSE_MOVU_P_RR>, VEX, VEX_L; 914 def VMOVUPDYrr_REV : VPDI<0x11, MRMDestReg, (outs VR256:$dst), 915 (ins VR256:$src), 916 "movupd\t{$src, $dst|$dst, $src}", [], 917 IIC_SSE_MOVU_P_RR>, VEX, VEX_L; 918} 919 920let Predicates = [HasAVX] in { 921def : Pat<(v8i32 (X86vzmovl 922 (insert_subvector undef, (v4i32 VR128:$src), (iPTR 0)))), 923 (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>; 924def : Pat<(v4i64 (X86vzmovl 925 (insert_subvector undef, (v2i64 VR128:$src), (iPTR 0)))), 926 (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>; 927def : Pat<(v8f32 (X86vzmovl 928 (insert_subvector undef, (v4f32 VR128:$src), (iPTR 0)))), 929 (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>; 930def : Pat<(v4f64 (X86vzmovl 931 (insert_subvector undef, (v2f64 VR128:$src), (iPTR 0)))), 932 (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>; 933} 934 935 936def : Pat<(int_x86_avx_storeu_ps_256 addr:$dst, VR256:$src), 937 (VMOVUPSYmr addr:$dst, VR256:$src)>; 938def : Pat<(int_x86_avx_storeu_pd_256 addr:$dst, VR256:$src), 939 (VMOVUPDYmr addr:$dst, VR256:$src)>; 940 941let SchedRW = [WriteStore] in { 942def MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 943 "movaps\t{$src, $dst|$dst, $src}", 944 [(alignedstore (v4f32 VR128:$src), addr:$dst)], 945 IIC_SSE_MOVA_P_MR>; 946def MOVAPDmr : PDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 947 "movapd\t{$src, $dst|$dst, $src}", 948 [(alignedstore (v2f64 VR128:$src), addr:$dst)], 949 IIC_SSE_MOVA_P_MR>; 950def MOVUPSmr : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 951 "movups\t{$src, $dst|$dst, $src}", 952 [(store (v4f32 VR128:$src), addr:$dst)], 953 IIC_SSE_MOVU_P_MR>; 954def MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 955 "movupd\t{$src, $dst|$dst, $src}", 956 [(store (v2f64 VR128:$src), addr:$dst)], 957 IIC_SSE_MOVU_P_MR>; 958} // SchedRW 959 960// For disassembler 961let isCodeGenOnly = 1, hasSideEffects = 0, SchedRW = [WriteMove] in { 962 def MOVAPSrr_REV : PSI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 963 "movaps\t{$src, $dst|$dst, $src}", [], 964 IIC_SSE_MOVA_P_RR>; 965 def MOVAPDrr_REV : PDI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 966 "movapd\t{$src, $dst|$dst, $src}", [], 967 IIC_SSE_MOVA_P_RR>; 968 def MOVUPSrr_REV : PSI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 969 "movups\t{$src, $dst|$dst, $src}", [], 970 IIC_SSE_MOVU_P_RR>; 971 def MOVUPDrr_REV : PDI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 972 "movupd\t{$src, $dst|$dst, $src}", [], 973 IIC_SSE_MOVU_P_RR>; 974} 975 976let Predicates = [HasAVX] in { 977 def : Pat<(int_x86_sse_storeu_ps addr:$dst, VR128:$src), 978 (VMOVUPSmr addr:$dst, VR128:$src)>; 979 def : Pat<(int_x86_sse2_storeu_pd addr:$dst, VR128:$src), 980 (VMOVUPDmr addr:$dst, VR128:$src)>; 981} 982 983let Predicates = [UseSSE1] in 984 def : Pat<(int_x86_sse_storeu_ps addr:$dst, VR128:$src), 985 (MOVUPSmr addr:$dst, VR128:$src)>; 986let Predicates = [UseSSE2] in 987 def : Pat<(int_x86_sse2_storeu_pd addr:$dst, VR128:$src), 988 (MOVUPDmr addr:$dst, VR128:$src)>; 989 990// Use vmovaps/vmovups for AVX integer load/store. 991let Predicates = [HasAVX] in { 992 // 128-bit load/store 993 def : Pat<(alignedloadv2i64 addr:$src), 994 (VMOVAPSrm addr:$src)>; 995 def : Pat<(loadv2i64 addr:$src), 996 (VMOVUPSrm addr:$src)>; 997 998 def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst), 999 (VMOVAPSmr addr:$dst, VR128:$src)>; 1000 def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst), 1001 (VMOVAPSmr addr:$dst, VR128:$src)>; 1002 def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst), 1003 (VMOVAPSmr addr:$dst, VR128:$src)>; 1004 def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst), 1005 (VMOVAPSmr addr:$dst, VR128:$src)>; 1006 def : Pat<(store (v2i64 VR128:$src), addr:$dst), 1007 (VMOVUPSmr addr:$dst, VR128:$src)>; 1008 def : Pat<(store (v4i32 VR128:$src), addr:$dst), 1009 (VMOVUPSmr addr:$dst, VR128:$src)>; 1010 def : Pat<(store (v8i16 VR128:$src), addr:$dst), 1011 (VMOVUPSmr addr:$dst, VR128:$src)>; 1012 def : Pat<(store (v16i8 VR128:$src), addr:$dst), 1013 (VMOVUPSmr addr:$dst, VR128:$src)>; 1014 1015 // 256-bit load/store 1016 def : Pat<(alignedloadv4i64 addr:$src), 1017 (VMOVAPSYrm addr:$src)>; 1018 def : Pat<(loadv4i64 addr:$src), 1019 (VMOVUPSYrm addr:$src)>; 1020 def : Pat<(alignedstore256 (v4i64 VR256:$src), addr:$dst), 1021 (VMOVAPSYmr addr:$dst, VR256:$src)>; 1022 def : Pat<(alignedstore256 (v8i32 VR256:$src), addr:$dst), 1023 (VMOVAPSYmr addr:$dst, VR256:$src)>; 1024 def : Pat<(alignedstore256 (v16i16 VR256:$src), addr:$dst), 1025 (VMOVAPSYmr addr:$dst, VR256:$src)>; 1026 def : Pat<(alignedstore256 (v32i8 VR256:$src), addr:$dst), 1027 (VMOVAPSYmr addr:$dst, VR256:$src)>; 1028 def : Pat<(store (v4i64 VR256:$src), addr:$dst), 1029 (VMOVUPSYmr addr:$dst, VR256:$src)>; 1030 def : Pat<(store (v8i32 VR256:$src), addr:$dst), 1031 (VMOVUPSYmr addr:$dst, VR256:$src)>; 1032 def : Pat<(store (v16i16 VR256:$src), addr:$dst), 1033 (VMOVUPSYmr addr:$dst, VR256:$src)>; 1034 def : Pat<(store (v32i8 VR256:$src), addr:$dst), 1035 (VMOVUPSYmr addr:$dst, VR256:$src)>; 1036 1037 // Special patterns for storing subvector extracts of lower 128-bits 1038 // Its cheaper to just use VMOVAPS/VMOVUPS instead of VEXTRACTF128mr 1039 def : Pat<(alignedstore (v2f64 (extract_subvector 1040 (v4f64 VR256:$src), (iPTR 0))), addr:$dst), 1041 (VMOVAPDmr addr:$dst, (v2f64 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; 1042 def : Pat<(alignedstore (v4f32 (extract_subvector 1043 (v8f32 VR256:$src), (iPTR 0))), addr:$dst), 1044 (VMOVAPSmr addr:$dst, (v4f32 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; 1045 def : Pat<(alignedstore (v2i64 (extract_subvector 1046 (v4i64 VR256:$src), (iPTR 0))), addr:$dst), 1047 (VMOVAPDmr addr:$dst, (v2i64 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; 1048 def : Pat<(alignedstore (v4i32 (extract_subvector 1049 (v8i32 VR256:$src), (iPTR 0))), addr:$dst), 1050 (VMOVAPSmr addr:$dst, (v4i32 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; 1051 def : Pat<(alignedstore (v8i16 (extract_subvector 1052 (v16i16 VR256:$src), (iPTR 0))), addr:$dst), 1053 (VMOVAPSmr addr:$dst, (v8i16 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; 1054 def : Pat<(alignedstore (v16i8 (extract_subvector 1055 (v32i8 VR256:$src), (iPTR 0))), addr:$dst), 1056 (VMOVAPSmr addr:$dst, (v16i8 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; 1057 1058 def : Pat<(store (v2f64 (extract_subvector 1059 (v4f64 VR256:$src), (iPTR 0))), addr:$dst), 1060 (VMOVUPDmr addr:$dst, (v2f64 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; 1061 def : Pat<(store (v4f32 (extract_subvector 1062 (v8f32 VR256:$src), (iPTR 0))), addr:$dst), 1063 (VMOVUPSmr addr:$dst, (v4f32 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; 1064 def : Pat<(store (v2i64 (extract_subvector 1065 (v4i64 VR256:$src), (iPTR 0))), addr:$dst), 1066 (VMOVUPDmr addr:$dst, (v2i64 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; 1067 def : Pat<(store (v4i32 (extract_subvector 1068 (v8i32 VR256:$src), (iPTR 0))), addr:$dst), 1069 (VMOVUPSmr addr:$dst, (v4i32 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; 1070 def : Pat<(store (v8i16 (extract_subvector 1071 (v16i16 VR256:$src), (iPTR 0))), addr:$dst), 1072 (VMOVUPSmr addr:$dst, (v8i16 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; 1073 def : Pat<(store (v16i8 (extract_subvector 1074 (v32i8 VR256:$src), (iPTR 0))), addr:$dst), 1075 (VMOVUPSmr addr:$dst, (v16i8 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; 1076} 1077 1078// Use movaps / movups for SSE integer load / store (one byte shorter). 1079// The instructions selected below are then converted to MOVDQA/MOVDQU 1080// during the SSE domain pass. 1081let Predicates = [UseSSE1] in { 1082 def : Pat<(alignedloadv2i64 addr:$src), 1083 (MOVAPSrm addr:$src)>; 1084 def : Pat<(loadv2i64 addr:$src), 1085 (MOVUPSrm addr:$src)>; 1086 1087 def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst), 1088 (MOVAPSmr addr:$dst, VR128:$src)>; 1089 def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst), 1090 (MOVAPSmr addr:$dst, VR128:$src)>; 1091 def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst), 1092 (MOVAPSmr addr:$dst, VR128:$src)>; 1093 def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst), 1094 (MOVAPSmr addr:$dst, VR128:$src)>; 1095 def : Pat<(store (v2i64 VR128:$src), addr:$dst), 1096 (MOVUPSmr addr:$dst, VR128:$src)>; 1097 def : Pat<(store (v4i32 VR128:$src), addr:$dst), 1098 (MOVUPSmr addr:$dst, VR128:$src)>; 1099 def : Pat<(store (v8i16 VR128:$src), addr:$dst), 1100 (MOVUPSmr addr:$dst, VR128:$src)>; 1101 def : Pat<(store (v16i8 VR128:$src), addr:$dst), 1102 (MOVUPSmr addr:$dst, VR128:$src)>; 1103} 1104 1105// Alias instruction to load FR32 or FR64 from f128mem using movaps. Upper 1106// bits are disregarded. FIXME: Set encoding to pseudo! 1107let canFoldAsLoad = 1, isReMaterializable = 1, SchedRW = [WriteLoad] in { 1108let isCodeGenOnly = 1 in { 1109 def FsVMOVAPSrm : VPSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src), 1110 "movaps\t{$src, $dst|$dst, $src}", 1111 [(set FR32:$dst, (alignedloadfsf32 addr:$src))], 1112 IIC_SSE_MOVA_P_RM>, VEX; 1113 def FsVMOVAPDrm : VPDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src), 1114 "movapd\t{$src, $dst|$dst, $src}", 1115 [(set FR64:$dst, (alignedloadfsf64 addr:$src))], 1116 IIC_SSE_MOVA_P_RM>, VEX; 1117 def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src), 1118 "movaps\t{$src, $dst|$dst, $src}", 1119 [(set FR32:$dst, (alignedloadfsf32 addr:$src))], 1120 IIC_SSE_MOVA_P_RM>; 1121 def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src), 1122 "movapd\t{$src, $dst|$dst, $src}", 1123 [(set FR64:$dst, (alignedloadfsf64 addr:$src))], 1124 IIC_SSE_MOVA_P_RM>; 1125} 1126} 1127 1128//===----------------------------------------------------------------------===// 1129// SSE 1 & 2 - Move Low packed FP Instructions 1130//===----------------------------------------------------------------------===// 1131 1132multiclass sse12_mov_hilo_packed_base<bits<8>opc, SDNode psnode, SDNode pdnode, 1133 string base_opc, string asm_opr, 1134 InstrItinClass itin> { 1135 def PSrm : PI<opc, MRMSrcMem, 1136 (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), 1137 !strconcat(base_opc, "s", asm_opr), 1138 [(set VR128:$dst, 1139 (psnode VR128:$src1, 1140 (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))], 1141 itin, SSEPackedSingle>, TB, 1142 Sched<[WriteShuffleLd, ReadAfterLd]>; 1143 1144 def PDrm : PI<opc, MRMSrcMem, 1145 (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), 1146 !strconcat(base_opc, "d", asm_opr), 1147 [(set VR128:$dst, (v2f64 (pdnode VR128:$src1, 1148 (scalar_to_vector (loadf64 addr:$src2)))))], 1149 itin, SSEPackedDouble>, TB, OpSize, 1150 Sched<[WriteShuffleLd, ReadAfterLd]>; 1151 1152} 1153 1154multiclass sse12_mov_hilo_packed<bits<8>opc, SDNode psnode, SDNode pdnode, 1155 string base_opc, InstrItinClass itin> { 1156 defm V#NAME : sse12_mov_hilo_packed_base<opc, psnode, pdnode, base_opc, 1157 "\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1158 itin>, VEX_4V; 1159 1160let Constraints = "$src1 = $dst" in 1161 defm NAME : sse12_mov_hilo_packed_base<opc, psnode, pdnode, base_opc, 1162 "\t{$src2, $dst|$dst, $src2}", 1163 itin>; 1164} 1165 1166let AddedComplexity = 20 in { 1167 defm MOVL : sse12_mov_hilo_packed<0x12, X86Movlps, X86Movlpd, "movlp", 1168 IIC_SSE_MOV_LH>; 1169} 1170 1171let SchedRW = [WriteStore] in { 1172def VMOVLPSmr : VPSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 1173 "movlps\t{$src, $dst|$dst, $src}", 1174 [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)), 1175 (iPTR 0))), addr:$dst)], 1176 IIC_SSE_MOV_LH>, VEX; 1177def VMOVLPDmr : VPDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 1178 "movlpd\t{$src, $dst|$dst, $src}", 1179 [(store (f64 (vector_extract (v2f64 VR128:$src), 1180 (iPTR 0))), addr:$dst)], 1181 IIC_SSE_MOV_LH>, VEX; 1182def MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 1183 "movlps\t{$src, $dst|$dst, $src}", 1184 [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)), 1185 (iPTR 0))), addr:$dst)], 1186 IIC_SSE_MOV_LH>; 1187def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 1188 "movlpd\t{$src, $dst|$dst, $src}", 1189 [(store (f64 (vector_extract (v2f64 VR128:$src), 1190 (iPTR 0))), addr:$dst)], 1191 IIC_SSE_MOV_LH>; 1192} // SchedRW 1193 1194let Predicates = [HasAVX] in { 1195 // Shuffle with VMOVLPS 1196 def : Pat<(v4f32 (X86Movlps VR128:$src1, (load addr:$src2))), 1197 (VMOVLPSrm VR128:$src1, addr:$src2)>; 1198 def : Pat<(v4i32 (X86Movlps VR128:$src1, (load addr:$src2))), 1199 (VMOVLPSrm VR128:$src1, addr:$src2)>; 1200 1201 // Shuffle with VMOVLPD 1202 def : Pat<(v2f64 (X86Movlpd VR128:$src1, (load addr:$src2))), 1203 (VMOVLPDrm VR128:$src1, addr:$src2)>; 1204 def : Pat<(v2i64 (X86Movlpd VR128:$src1, (load addr:$src2))), 1205 (VMOVLPDrm VR128:$src1, addr:$src2)>; 1206 1207 // Store patterns 1208 def : Pat<(store (v4f32 (X86Movlps (load addr:$src1), VR128:$src2)), 1209 addr:$src1), 1210 (VMOVLPSmr addr:$src1, VR128:$src2)>; 1211 def : Pat<(store (v4i32 (X86Movlps 1212 (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)), addr:$src1), 1213 (VMOVLPSmr addr:$src1, VR128:$src2)>; 1214 def : Pat<(store (v2f64 (X86Movlpd (load addr:$src1), VR128:$src2)), 1215 addr:$src1), 1216 (VMOVLPDmr addr:$src1, VR128:$src2)>; 1217 def : Pat<(store (v2i64 (X86Movlpd (load addr:$src1), VR128:$src2)), 1218 addr:$src1), 1219 (VMOVLPDmr addr:$src1, VR128:$src2)>; 1220} 1221 1222let Predicates = [UseSSE1] in { 1223 // (store (vector_shuffle (load addr), v2, <4, 5, 2, 3>), addr) using MOVLPS 1224 def : Pat<(store (i64 (vector_extract (bc_v2i64 (v4f32 VR128:$src2)), 1225 (iPTR 0))), addr:$src1), 1226 (MOVLPSmr addr:$src1, VR128:$src2)>; 1227 1228 // Shuffle with MOVLPS 1229 def : Pat<(v4f32 (X86Movlps VR128:$src1, (load addr:$src2))), 1230 (MOVLPSrm VR128:$src1, addr:$src2)>; 1231 def : Pat<(v4i32 (X86Movlps VR128:$src1, (load addr:$src2))), 1232 (MOVLPSrm VR128:$src1, addr:$src2)>; 1233 def : Pat<(X86Movlps VR128:$src1, 1234 (bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))), 1235 (MOVLPSrm VR128:$src1, addr:$src2)>; 1236 1237 // Store patterns 1238 def : Pat<(store (v4f32 (X86Movlps (load addr:$src1), VR128:$src2)), 1239 addr:$src1), 1240 (MOVLPSmr addr:$src1, VR128:$src2)>; 1241 def : Pat<(store (v4i32 (X86Movlps 1242 (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)), 1243 addr:$src1), 1244 (MOVLPSmr addr:$src1, VR128:$src2)>; 1245} 1246 1247let Predicates = [UseSSE2] in { 1248 // Shuffle with MOVLPD 1249 def : Pat<(v2f64 (X86Movlpd VR128:$src1, (load addr:$src2))), 1250 (MOVLPDrm VR128:$src1, addr:$src2)>; 1251 def : Pat<(v2i64 (X86Movlpd VR128:$src1, (load addr:$src2))), 1252 (MOVLPDrm VR128:$src1, addr:$src2)>; 1253 1254 // Store patterns 1255 def : Pat<(store (v2f64 (X86Movlpd (load addr:$src1), VR128:$src2)), 1256 addr:$src1), 1257 (MOVLPDmr addr:$src1, VR128:$src2)>; 1258 def : Pat<(store (v2i64 (X86Movlpd (load addr:$src1), VR128:$src2)), 1259 addr:$src1), 1260 (MOVLPDmr addr:$src1, VR128:$src2)>; 1261} 1262 1263//===----------------------------------------------------------------------===// 1264// SSE 1 & 2 - Move Hi packed FP Instructions 1265//===----------------------------------------------------------------------===// 1266 1267let AddedComplexity = 20 in { 1268 defm MOVH : sse12_mov_hilo_packed<0x16, X86Movlhps, X86Movlhpd, "movhp", 1269 IIC_SSE_MOV_LH>; 1270} 1271 1272let SchedRW = [WriteStore] in { 1273// v2f64 extract element 1 is always custom lowered to unpack high to low 1274// and extract element 0 so the non-store version isn't too horrible. 1275def VMOVHPSmr : VPSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 1276 "movhps\t{$src, $dst|$dst, $src}", 1277 [(store (f64 (vector_extract 1278 (X86Unpckh (bc_v2f64 (v4f32 VR128:$src)), 1279 (bc_v2f64 (v4f32 VR128:$src))), 1280 (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>, VEX; 1281def VMOVHPDmr : VPDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 1282 "movhpd\t{$src, $dst|$dst, $src}", 1283 [(store (f64 (vector_extract 1284 (v2f64 (X86Unpckh VR128:$src, VR128:$src)), 1285 (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>, VEX; 1286def MOVHPSmr : PSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 1287 "movhps\t{$src, $dst|$dst, $src}", 1288 [(store (f64 (vector_extract 1289 (X86Unpckh (bc_v2f64 (v4f32 VR128:$src)), 1290 (bc_v2f64 (v4f32 VR128:$src))), 1291 (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>; 1292def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 1293 "movhpd\t{$src, $dst|$dst, $src}", 1294 [(store (f64 (vector_extract 1295 (v2f64 (X86Unpckh VR128:$src, VR128:$src)), 1296 (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>; 1297} // SchedRW 1298 1299let Predicates = [HasAVX] in { 1300 // VMOVHPS patterns 1301 def : Pat<(X86Movlhps VR128:$src1, 1302 (bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))), 1303 (VMOVHPSrm VR128:$src1, addr:$src2)>; 1304 def : Pat<(X86Movlhps VR128:$src1, 1305 (bc_v4i32 (v2i64 (X86vzload addr:$src2)))), 1306 (VMOVHPSrm VR128:$src1, addr:$src2)>; 1307 1308 // FIXME: Instead of X86Unpckl, there should be a X86Movlhpd here, the problem 1309 // is during lowering, where it's not possible to recognize the load fold 1310 // cause it has two uses through a bitcast. One use disappears at isel time 1311 // and the fold opportunity reappears. 1312 def : Pat<(v2f64 (X86Unpckl VR128:$src1, 1313 (scalar_to_vector (loadf64 addr:$src2)))), 1314 (VMOVHPDrm VR128:$src1, addr:$src2)>; 1315} 1316 1317let Predicates = [UseSSE1] in { 1318 // MOVHPS patterns 1319 def : Pat<(X86Movlhps VR128:$src1, 1320 (bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))), 1321 (MOVHPSrm VR128:$src1, addr:$src2)>; 1322 def : Pat<(X86Movlhps VR128:$src1, 1323 (bc_v4f32 (v2i64 (X86vzload addr:$src2)))), 1324 (MOVHPSrm VR128:$src1, addr:$src2)>; 1325} 1326 1327let Predicates = [UseSSE2] in { 1328 // FIXME: Instead of X86Unpckl, there should be a X86Movlhpd here, the problem 1329 // is during lowering, where it's not possible to recognize the load fold 1330 // cause it has two uses through a bitcast. One use disappears at isel time 1331 // and the fold opportunity reappears. 1332 def : Pat<(v2f64 (X86Unpckl VR128:$src1, 1333 (scalar_to_vector (loadf64 addr:$src2)))), 1334 (MOVHPDrm VR128:$src1, addr:$src2)>; 1335} 1336 1337//===----------------------------------------------------------------------===// 1338// SSE 1 & 2 - Move Low to High and High to Low packed FP Instructions 1339//===----------------------------------------------------------------------===// 1340 1341let AddedComplexity = 20, Predicates = [UseAVX] in { 1342 def VMOVLHPSrr : VPSI<0x16, MRMSrcReg, (outs VR128:$dst), 1343 (ins VR128:$src1, VR128:$src2), 1344 "movlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1345 [(set VR128:$dst, 1346 (v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))], 1347 IIC_SSE_MOV_LH>, 1348 VEX_4V, Sched<[WriteShuffle]>; 1349 def VMOVHLPSrr : VPSI<0x12, MRMSrcReg, (outs VR128:$dst), 1350 (ins VR128:$src1, VR128:$src2), 1351 "movhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1352 [(set VR128:$dst, 1353 (v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))], 1354 IIC_SSE_MOV_LH>, 1355 VEX_4V, Sched<[WriteShuffle]>; 1356} 1357let Constraints = "$src1 = $dst", AddedComplexity = 20 in { 1358 def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst), 1359 (ins VR128:$src1, VR128:$src2), 1360 "movlhps\t{$src2, $dst|$dst, $src2}", 1361 [(set VR128:$dst, 1362 (v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))], 1363 IIC_SSE_MOV_LH>, Sched<[WriteShuffle]>; 1364 def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst), 1365 (ins VR128:$src1, VR128:$src2), 1366 "movhlps\t{$src2, $dst|$dst, $src2}", 1367 [(set VR128:$dst, 1368 (v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))], 1369 IIC_SSE_MOV_LH>, Sched<[WriteShuffle]>; 1370} 1371 1372let Predicates = [UseAVX] in { 1373 // MOVLHPS patterns 1374 def : Pat<(v4i32 (X86Movlhps VR128:$src1, VR128:$src2)), 1375 (VMOVLHPSrr VR128:$src1, VR128:$src2)>; 1376 def : Pat<(v2i64 (X86Movlhps VR128:$src1, VR128:$src2)), 1377 (VMOVLHPSrr (v2i64 VR128:$src1), VR128:$src2)>; 1378 1379 // MOVHLPS patterns 1380 def : Pat<(v4i32 (X86Movhlps VR128:$src1, VR128:$src2)), 1381 (VMOVHLPSrr VR128:$src1, VR128:$src2)>; 1382} 1383 1384let Predicates = [UseSSE1] in { 1385 // MOVLHPS patterns 1386 def : Pat<(v4i32 (X86Movlhps VR128:$src1, VR128:$src2)), 1387 (MOVLHPSrr VR128:$src1, VR128:$src2)>; 1388 def : Pat<(v2i64 (X86Movlhps VR128:$src1, VR128:$src2)), 1389 (MOVLHPSrr (v2i64 VR128:$src1), VR128:$src2)>; 1390 1391 // MOVHLPS patterns 1392 def : Pat<(v4i32 (X86Movhlps VR128:$src1, VR128:$src2)), 1393 (MOVHLPSrr VR128:$src1, VR128:$src2)>; 1394} 1395 1396//===----------------------------------------------------------------------===// 1397// SSE 1 & 2 - Conversion Instructions 1398//===----------------------------------------------------------------------===// 1399 1400def SSE_CVT_PD : OpndItins< 1401 IIC_SSE_CVT_PD_RR, IIC_SSE_CVT_PD_RM 1402>; 1403 1404let Sched = WriteCvtI2F in 1405def SSE_CVT_PS : OpndItins< 1406 IIC_SSE_CVT_PS_RR, IIC_SSE_CVT_PS_RM 1407>; 1408 1409let Sched = WriteCvtI2F in 1410def SSE_CVT_Scalar : OpndItins< 1411 IIC_SSE_CVT_Scalar_RR, IIC_SSE_CVT_Scalar_RM 1412>; 1413 1414let Sched = WriteCvtF2I in 1415def SSE_CVT_SS2SI_32 : OpndItins< 1416 IIC_SSE_CVT_SS2SI32_RR, IIC_SSE_CVT_SS2SI32_RM 1417>; 1418 1419let Sched = WriteCvtF2I in 1420def SSE_CVT_SS2SI_64 : OpndItins< 1421 IIC_SSE_CVT_SS2SI64_RR, IIC_SSE_CVT_SS2SI64_RM 1422>; 1423 1424let Sched = WriteCvtF2I in 1425def SSE_CVT_SD2SI : OpndItins< 1426 IIC_SSE_CVT_SD2SI_RR, IIC_SSE_CVT_SD2SI_RM 1427>; 1428 1429multiclass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, 1430 SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag, 1431 string asm, OpndItins itins> { 1432 def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm, 1433 [(set DstRC:$dst, (OpNode SrcRC:$src))], 1434 itins.rr>, Sched<[itins.Sched]>; 1435 def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm, 1436 [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))], 1437 itins.rm>, Sched<[itins.Sched.Folded]>; 1438} 1439 1440multiclass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, 1441 X86MemOperand x86memop, string asm, Domain d, 1442 OpndItins itins> { 1443let neverHasSideEffects = 1 in { 1444 def rr : I<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm, 1445 [], itins.rr, d>, Sched<[itins.Sched]>; 1446 let mayLoad = 1 in 1447 def rm : I<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm, 1448 [], itins.rm, d>, Sched<[itins.Sched.Folded]>; 1449} 1450} 1451 1452multiclass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, 1453 X86MemOperand x86memop, string asm> { 1454let neverHasSideEffects = 1, Predicates = [UseAVX] in { 1455 def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src), 1456 !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>, 1457 Sched<[WriteCvtI2F]>; 1458 let mayLoad = 1 in 1459 def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), 1460 (ins DstRC:$src1, x86memop:$src), 1461 !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>, 1462 Sched<[WriteCvtI2FLd, ReadAfterLd]>; 1463} // neverHasSideEffects = 1 1464} 1465 1466let Predicates = [UseAVX] in { 1467defm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32, 1468 "cvttss2si\t{$src, $dst|$dst, $src}", 1469 SSE_CVT_SS2SI_32>, 1470 XS, VEX, VEX_LIG; 1471defm VCVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32, 1472 "cvttss2si\t{$src, $dst|$dst, $src}", 1473 SSE_CVT_SS2SI_64>, 1474 XS, VEX, VEX_W, VEX_LIG; 1475defm VCVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64, 1476 "cvttsd2si\t{$src, $dst|$dst, $src}", 1477 SSE_CVT_SD2SI>, 1478 XD, VEX, VEX_LIG; 1479defm VCVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64, 1480 "cvttsd2si\t{$src, $dst|$dst, $src}", 1481 SSE_CVT_SD2SI>, 1482 XD, VEX, VEX_W, VEX_LIG; 1483 1484def : InstAlias<"vcvttss2si{l}\t{$src, $dst|$dst, $src}", 1485 (VCVTTSS2SIrr GR32:$dst, FR32:$src), 0>; 1486def : InstAlias<"vcvttss2si{l}\t{$src, $dst|$dst, $src}", 1487 (VCVTTSS2SIrm GR32:$dst, f32mem:$src), 0>; 1488def : InstAlias<"vcvttsd2si{l}\t{$src, $dst|$dst, $src}", 1489 (VCVTTSD2SIrr GR32:$dst, FR64:$src), 0>; 1490def : InstAlias<"vcvttsd2si{l}\t{$src, $dst|$dst, $src}", 1491 (VCVTTSD2SIrm GR32:$dst, f64mem:$src), 0>; 1492def : InstAlias<"vcvttss2si{q}\t{$src, $dst|$dst, $src}", 1493 (VCVTTSS2SI64rr GR64:$dst, FR32:$src), 0>; 1494def : InstAlias<"vcvttss2si{q}\t{$src, $dst|$dst, $src}", 1495 (VCVTTSS2SI64rm GR64:$dst, f32mem:$src), 0>; 1496def : InstAlias<"vcvttsd2si{q}\t{$src, $dst|$dst, $src}", 1497 (VCVTTSD2SI64rr GR64:$dst, FR64:$src), 0>; 1498def : InstAlias<"vcvttsd2si{q}\t{$src, $dst|$dst, $src}", 1499 (VCVTTSD2SI64rm GR64:$dst, f64mem:$src), 0>; 1500} 1501// The assembler can recognize rr 64-bit instructions by seeing a rxx 1502// register, but the same isn't true when only using memory operands, 1503// provide other assembly "l" and "q" forms to address this explicitly 1504// where appropriate to do so. 1505defm VCVTSI2SS : sse12_vcvt_avx<0x2A, GR32, FR32, i32mem, "cvtsi2ss{l}">, 1506 XS, VEX_4V, VEX_LIG; 1507defm VCVTSI2SS64 : sse12_vcvt_avx<0x2A, GR64, FR32, i64mem, "cvtsi2ss{q}">, 1508 XS, VEX_4V, VEX_W, VEX_LIG; 1509defm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd{l}">, 1510 XD, VEX_4V, VEX_LIG; 1511defm VCVTSI2SD64 : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd{q}">, 1512 XD, VEX_4V, VEX_W, VEX_LIG; 1513 1514let Predicates = [UseAVX] in { 1515 def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}", 1516 (VCVTSI2SSrm FR64:$dst, FR64:$src1, i32mem:$src)>; 1517 def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}", 1518 (VCVTSI2SDrm FR64:$dst, FR64:$src1, i32mem:$src)>; 1519 1520 def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))), 1521 (VCVTSI2SSrm (f32 (IMPLICIT_DEF)), addr:$src)>; 1522 def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))), 1523 (VCVTSI2SS64rm (f32 (IMPLICIT_DEF)), addr:$src)>; 1524 def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))), 1525 (VCVTSI2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>; 1526 def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))), 1527 (VCVTSI2SD64rm (f64 (IMPLICIT_DEF)), addr:$src)>; 1528 1529 def : Pat<(f32 (sint_to_fp GR32:$src)), 1530 (VCVTSI2SSrr (f32 (IMPLICIT_DEF)), GR32:$src)>; 1531 def : Pat<(f32 (sint_to_fp GR64:$src)), 1532 (VCVTSI2SS64rr (f32 (IMPLICIT_DEF)), GR64:$src)>; 1533 def : Pat<(f64 (sint_to_fp GR32:$src)), 1534 (VCVTSI2SDrr (f64 (IMPLICIT_DEF)), GR32:$src)>; 1535 def : Pat<(f64 (sint_to_fp GR64:$src)), 1536 (VCVTSI2SD64rr (f64 (IMPLICIT_DEF)), GR64:$src)>; 1537} 1538 1539defm CVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32, 1540 "cvttss2si\t{$src, $dst|$dst, $src}", 1541 SSE_CVT_SS2SI_32>, XS; 1542defm CVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32, 1543 "cvttss2si\t{$src, $dst|$dst, $src}", 1544 SSE_CVT_SS2SI_64>, XS, REX_W; 1545defm CVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64, 1546 "cvttsd2si\t{$src, $dst|$dst, $src}", 1547 SSE_CVT_SD2SI>, XD; 1548defm CVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64, 1549 "cvttsd2si\t{$src, $dst|$dst, $src}", 1550 SSE_CVT_SD2SI>, XD, REX_W; 1551defm CVTSI2SS : sse12_cvt_s<0x2A, GR32, FR32, sint_to_fp, i32mem, loadi32, 1552 "cvtsi2ss{l}\t{$src, $dst|$dst, $src}", 1553 SSE_CVT_Scalar>, XS; 1554defm CVTSI2SS64 : sse12_cvt_s<0x2A, GR64, FR32, sint_to_fp, i64mem, loadi64, 1555 "cvtsi2ss{q}\t{$src, $dst|$dst, $src}", 1556 SSE_CVT_Scalar>, XS, REX_W; 1557defm CVTSI2SD : sse12_cvt_s<0x2A, GR32, FR64, sint_to_fp, i32mem, loadi32, 1558 "cvtsi2sd{l}\t{$src, $dst|$dst, $src}", 1559 SSE_CVT_Scalar>, XD; 1560defm CVTSI2SD64 : sse12_cvt_s<0x2A, GR64, FR64, sint_to_fp, i64mem, loadi64, 1561 "cvtsi2sd{q}\t{$src, $dst|$dst, $src}", 1562 SSE_CVT_Scalar>, XD, REX_W; 1563 1564def : InstAlias<"cvttss2si{l}\t{$src, $dst|$dst, $src}", 1565 (CVTTSS2SIrr GR32:$dst, FR32:$src), 0>; 1566def : InstAlias<"cvttss2si{l}\t{$src, $dst|$dst, $src}", 1567 (CVTTSS2SIrm GR32:$dst, f32mem:$src), 0>; 1568def : InstAlias<"cvttsd2si{l}\t{$src, $dst|$dst, $src}", 1569 (CVTTSD2SIrr GR32:$dst, FR64:$src), 0>; 1570def : InstAlias<"cvttsd2si{l}\t{$src, $dst|$dst, $src}", 1571 (CVTTSD2SIrm GR32:$dst, f64mem:$src), 0>; 1572def : InstAlias<"cvttss2si{q}\t{$src, $dst|$dst, $src}", 1573 (CVTTSS2SI64rr GR64:$dst, FR32:$src), 0>; 1574def : InstAlias<"cvttss2si{q}\t{$src, $dst|$dst, $src}", 1575 (CVTTSS2SI64rm GR64:$dst, f32mem:$src), 0>; 1576def : InstAlias<"cvttsd2si{q}\t{$src, $dst|$dst, $src}", 1577 (CVTTSD2SI64rr GR64:$dst, FR64:$src), 0>; 1578def : InstAlias<"cvttsd2si{q}\t{$src, $dst|$dst, $src}", 1579 (CVTTSD2SI64rm GR64:$dst, f64mem:$src), 0>; 1580 1581def : InstAlias<"cvtsi2ss\t{$src, $dst|$dst, $src}", 1582 (CVTSI2SSrm FR64:$dst, i32mem:$src)>; 1583def : InstAlias<"cvtsi2sd\t{$src, $dst|$dst, $src}", 1584 (CVTSI2SDrm FR64:$dst, i32mem:$src)>; 1585 1586// Conversion Instructions Intrinsics - Match intrinsics which expect MM 1587// and/or XMM operand(s). 1588 1589multiclass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, 1590 Intrinsic Int, Operand memop, ComplexPattern mem_cpat, 1591 string asm, OpndItins itins> { 1592 def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), 1593 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 1594 [(set DstRC:$dst, (Int SrcRC:$src))], itins.rr>, 1595 Sched<[itins.Sched]>; 1596 def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins memop:$src), 1597 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 1598 [(set DstRC:$dst, (Int mem_cpat:$src))], itins.rm>, 1599 Sched<[itins.Sched.Folded]>; 1600} 1601 1602multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC, 1603 RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop, 1604 PatFrag ld_frag, string asm, OpndItins itins, 1605 bit Is2Addr = 1> { 1606 def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src2), 1607 !if(Is2Addr, 1608 !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), 1609 !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 1610 [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))], 1611 itins.rr>, Sched<[itins.Sched]>; 1612 def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), 1613 (ins DstRC:$src1, x86memop:$src2), 1614 !if(Is2Addr, 1615 !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), 1616 !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 1617 [(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))], 1618 itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>; 1619} 1620 1621let Predicates = [UseAVX] in { 1622defm VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, 1623 int_x86_sse2_cvtsd2si, sdmem, sse_load_f64, "cvtsd2si", 1624 SSE_CVT_SD2SI>, XD, VEX, VEX_LIG; 1625defm VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, 1626 int_x86_sse2_cvtsd2si64, sdmem, sse_load_f64, "cvtsd2si", 1627 SSE_CVT_SD2SI>, XD, VEX, VEX_W, VEX_LIG; 1628} 1629defm CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si, 1630 sdmem, sse_load_f64, "cvtsd2si", SSE_CVT_SD2SI>, XD; 1631defm CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse2_cvtsd2si64, 1632 sdmem, sse_load_f64, "cvtsd2si", SSE_CVT_SD2SI>, XD, REX_W; 1633 1634 1635let Predicates = [UseAVX] in { 1636defm Int_VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128, 1637 int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss{l}", 1638 SSE_CVT_Scalar, 0>, XS, VEX_4V; 1639defm Int_VCVTSI2SS64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128, 1640 int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss{q}", 1641 SSE_CVT_Scalar, 0>, XS, VEX_4V, 1642 VEX_W; 1643defm Int_VCVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128, 1644 int_x86_sse2_cvtsi2sd, i32mem, loadi32, "cvtsi2sd{l}", 1645 SSE_CVT_Scalar, 0>, XD, VEX_4V; 1646defm Int_VCVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128, 1647 int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd{q}", 1648 SSE_CVT_Scalar, 0>, XD, 1649 VEX_4V, VEX_W; 1650} 1651let Constraints = "$src1 = $dst" in { 1652 defm Int_CVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128, 1653 int_x86_sse_cvtsi2ss, i32mem, loadi32, 1654 "cvtsi2ss{l}", SSE_CVT_Scalar>, XS; 1655 defm Int_CVTSI2SS64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128, 1656 int_x86_sse_cvtsi642ss, i64mem, loadi64, 1657 "cvtsi2ss{q}", SSE_CVT_Scalar>, XS, REX_W; 1658 defm Int_CVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128, 1659 int_x86_sse2_cvtsi2sd, i32mem, loadi32, 1660 "cvtsi2sd{l}", SSE_CVT_Scalar>, XD; 1661 defm Int_CVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128, 1662 int_x86_sse2_cvtsi642sd, i64mem, loadi64, 1663 "cvtsi2sd{q}", SSE_CVT_Scalar>, XD, REX_W; 1664} 1665 1666/// SSE 1 Only 1667 1668// Aliases for intrinsics 1669let Predicates = [UseAVX] in { 1670defm Int_VCVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si, 1671 ssmem, sse_load_f32, "cvttss2si", 1672 SSE_CVT_SS2SI_32>, XS, VEX; 1673defm Int_VCVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, 1674 int_x86_sse_cvttss2si64, ssmem, sse_load_f32, 1675 "cvttss2si", SSE_CVT_SS2SI_64>, 1676 XS, VEX, VEX_W; 1677defm Int_VCVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si, 1678 sdmem, sse_load_f64, "cvttsd2si", 1679 SSE_CVT_SD2SI>, XD, VEX; 1680defm Int_VCVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, 1681 int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64, 1682 "cvttsd2si", SSE_CVT_SD2SI>, 1683 XD, VEX, VEX_W; 1684} 1685defm Int_CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si, 1686 ssmem, sse_load_f32, "cvttss2si", 1687 SSE_CVT_SS2SI_32>, XS; 1688defm Int_CVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, 1689 int_x86_sse_cvttss2si64, ssmem, sse_load_f32, 1690 "cvttss2si", SSE_CVT_SS2SI_64>, XS, REX_W; 1691defm Int_CVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si, 1692 sdmem, sse_load_f64, "cvttsd2si", 1693 SSE_CVT_SD2SI>, XD; 1694defm Int_CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, 1695 int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64, 1696 "cvttsd2si", SSE_CVT_SD2SI>, XD, REX_W; 1697 1698let Predicates = [UseAVX] in { 1699defm VCVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si, 1700 ssmem, sse_load_f32, "cvtss2si", 1701 SSE_CVT_SS2SI_32>, XS, VEX, VEX_LIG; 1702defm VCVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse_cvtss2si64, 1703 ssmem, sse_load_f32, "cvtss2si", 1704 SSE_CVT_SS2SI_64>, XS, VEX, VEX_W, VEX_LIG; 1705} 1706defm CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si, 1707 ssmem, sse_load_f32, "cvtss2si", 1708 SSE_CVT_SS2SI_32>, XS; 1709defm CVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse_cvtss2si64, 1710 ssmem, sse_load_f32, "cvtss2si", 1711 SSE_CVT_SS2SI_64>, XS, REX_W; 1712 1713defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, i128mem, 1714 "vcvtdq2ps\t{$src, $dst|$dst, $src}", 1715 SSEPackedSingle, SSE_CVT_PS>, 1716 TB, VEX, Requires<[HasAVX]>; 1717defm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, VR256, i256mem, 1718 "vcvtdq2ps\t{$src, $dst|$dst, $src}", 1719 SSEPackedSingle, SSE_CVT_PS>, 1720 TB, VEX, VEX_L, Requires<[HasAVX]>; 1721 1722defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, i128mem, 1723 "cvtdq2ps\t{$src, $dst|$dst, $src}", 1724 SSEPackedSingle, SSE_CVT_PS>, 1725 TB, Requires<[UseSSE2]>; 1726 1727let Predicates = [UseAVX] in { 1728def : InstAlias<"vcvtss2si{l}\t{$src, $dst|$dst, $src}", 1729 (VCVTSS2SIrr GR32:$dst, VR128:$src), 0>; 1730def : InstAlias<"vcvtss2si{l}\t{$src, $dst|$dst, $src}", 1731 (VCVTSS2SIrm GR32:$dst, ssmem:$src), 0>; 1732def : InstAlias<"vcvtsd2si{l}\t{$src, $dst|$dst, $src}", 1733 (VCVTSD2SIrr GR32:$dst, VR128:$src), 0>; 1734def : InstAlias<"vcvtsd2si{l}\t{$src, $dst|$dst, $src}", 1735 (VCVTSD2SIrm GR32:$dst, sdmem:$src), 0>; 1736def : InstAlias<"vcvtss2si{q}\t{$src, $dst|$dst, $src}", 1737 (VCVTSS2SI64rr GR64:$dst, VR128:$src), 0>; 1738def : InstAlias<"vcvtss2si{q}\t{$src, $dst|$dst, $src}", 1739 (VCVTSS2SI64rm GR64:$dst, ssmem:$src), 0>; 1740def : InstAlias<"vcvtsd2si{q}\t{$src, $dst|$dst, $src}", 1741 (VCVTSD2SI64rr GR64:$dst, VR128:$src), 0>; 1742def : InstAlias<"vcvtsd2si{q}\t{$src, $dst|$dst, $src}", 1743 (VCVTSD2SI64rm GR64:$dst, sdmem:$src), 0>; 1744} 1745 1746def : InstAlias<"cvtss2si{l}\t{$src, $dst|$dst, $src}", 1747 (CVTSS2SIrr GR32:$dst, VR128:$src), 0>; 1748def : InstAlias<"cvtss2si{l}\t{$src, $dst|$dst, $src}", 1749 (CVTSS2SIrm GR32:$dst, ssmem:$src), 0>; 1750def : InstAlias<"cvtsd2si{l}\t{$src, $dst|$dst, $src}", 1751 (CVTSD2SIrr GR32:$dst, VR128:$src), 0>; 1752def : InstAlias<"cvtsd2si{l}\t{$src, $dst|$dst, $src}", 1753 (CVTSD2SIrm GR32:$dst, sdmem:$src), 0>; 1754def : InstAlias<"cvtss2si{q}\t{$src, $dst|$dst, $src}", 1755 (CVTSS2SI64rr GR64:$dst, VR128:$src), 0>; 1756def : InstAlias<"cvtss2si{q}\t{$src, $dst|$dst, $src}", 1757 (CVTSS2SI64rm GR64:$dst, ssmem:$src), 0>; 1758def : InstAlias<"cvtsd2si{q}\t{$src, $dst|$dst, $src}", 1759 (CVTSD2SI64rr GR64:$dst, VR128:$src), 0>; 1760def : InstAlias<"cvtsd2si{q}\t{$src, $dst|$dst, $src}", 1761 (CVTSD2SI64rm GR64:$dst, sdmem:$src)>; 1762 1763/// SSE 2 Only 1764 1765// Convert scalar double to scalar single 1766let neverHasSideEffects = 1, Predicates = [UseAVX] in { 1767def VCVTSD2SSrr : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst), 1768 (ins FR64:$src1, FR64:$src2), 1769 "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], 1770 IIC_SSE_CVT_Scalar_RR>, VEX_4V, VEX_LIG, 1771 Sched<[WriteCvtF2F]>; 1772let mayLoad = 1 in 1773def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), 1774 (ins FR64:$src1, f64mem:$src2), 1775 "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1776 [], IIC_SSE_CVT_Scalar_RM>, 1777 XD, Requires<[HasAVX, OptForSize]>, VEX_4V, VEX_LIG, 1778 Sched<[WriteCvtF2FLd, ReadAfterLd]>; 1779} 1780 1781def : Pat<(f32 (fround FR64:$src)), (VCVTSD2SSrr FR64:$src, FR64:$src)>, 1782 Requires<[UseAVX]>; 1783 1784def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src), 1785 "cvtsd2ss\t{$src, $dst|$dst, $src}", 1786 [(set FR32:$dst, (fround FR64:$src))], 1787 IIC_SSE_CVT_Scalar_RR>, Sched<[WriteCvtF2F]>; 1788def CVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src), 1789 "cvtsd2ss\t{$src, $dst|$dst, $src}", 1790 [(set FR32:$dst, (fround (loadf64 addr:$src)))], 1791 IIC_SSE_CVT_Scalar_RM>, 1792 XD, 1793 Requires<[UseSSE2, OptForSize]>, Sched<[WriteCvtF2FLd]>; 1794 1795def Int_VCVTSD2SSrr: I<0x5A, MRMSrcReg, 1796 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), 1797 "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1798 [(set VR128:$dst, 1799 (int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))], 1800 IIC_SSE_CVT_Scalar_RR>, XD, VEX_4V, Requires<[UseAVX]>, 1801 Sched<[WriteCvtF2F]>; 1802def Int_VCVTSD2SSrm: I<0x5A, MRMSrcReg, 1803 (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2), 1804 "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1805 [(set VR128:$dst, (int_x86_sse2_cvtsd2ss 1806 VR128:$src1, sse_load_f64:$src2))], 1807 IIC_SSE_CVT_Scalar_RM>, XD, VEX_4V, Requires<[UseAVX]>, 1808 Sched<[WriteCvtF2FLd, ReadAfterLd]>; 1809 1810let Constraints = "$src1 = $dst" in { 1811def Int_CVTSD2SSrr: I<0x5A, MRMSrcReg, 1812 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), 1813 "cvtsd2ss\t{$src2, $dst|$dst, $src2}", 1814 [(set VR128:$dst, 1815 (int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))], 1816 IIC_SSE_CVT_Scalar_RR>, XD, Requires<[UseSSE2]>, 1817 Sched<[WriteCvtF2F]>; 1818def Int_CVTSD2SSrm: I<0x5A, MRMSrcReg, 1819 (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2), 1820 "cvtsd2ss\t{$src2, $dst|$dst, $src2}", 1821 [(set VR128:$dst, (int_x86_sse2_cvtsd2ss 1822 VR128:$src1, sse_load_f64:$src2))], 1823 IIC_SSE_CVT_Scalar_RM>, XD, Requires<[UseSSE2]>, 1824 Sched<[WriteCvtF2FLd, ReadAfterLd]>; 1825} 1826 1827// Convert scalar single to scalar double 1828// SSE2 instructions with XS prefix 1829let neverHasSideEffects = 1, Predicates = [UseAVX] in { 1830def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), 1831 (ins FR32:$src1, FR32:$src2), 1832 "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1833 [], IIC_SSE_CVT_Scalar_RR>, 1834 XS, Requires<[HasAVX]>, VEX_4V, VEX_LIG, 1835 Sched<[WriteCvtF2F]>; 1836let mayLoad = 1 in 1837def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), 1838 (ins FR32:$src1, f32mem:$src2), 1839 "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1840 [], IIC_SSE_CVT_Scalar_RM>, 1841 XS, VEX_4V, VEX_LIG, Requires<[HasAVX, OptForSize]>, 1842 Sched<[WriteCvtF2FLd, ReadAfterLd]>; 1843} 1844 1845def : Pat<(f64 (fextend FR32:$src)), 1846 (VCVTSS2SDrr FR32:$src, FR32:$src)>, Requires<[UseAVX]>; 1847def : Pat<(fextend (loadf32 addr:$src)), 1848 (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>, Requires<[UseAVX]>; 1849 1850def : Pat<(extloadf32 addr:$src), 1851 (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>, 1852 Requires<[UseAVX, OptForSize]>; 1853def : Pat<(extloadf32 addr:$src), 1854 (VCVTSS2SDrr (f32 (IMPLICIT_DEF)), (VMOVSSrm addr:$src))>, 1855 Requires<[UseAVX, OptForSpeed]>; 1856 1857def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src), 1858 "cvtss2sd\t{$src, $dst|$dst, $src}", 1859 [(set FR64:$dst, (fextend FR32:$src))], 1860 IIC_SSE_CVT_Scalar_RR>, XS, 1861 Requires<[UseSSE2]>, Sched<[WriteCvtF2F]>; 1862def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src), 1863 "cvtss2sd\t{$src, $dst|$dst, $src}", 1864 [(set FR64:$dst, (extloadf32 addr:$src))], 1865 IIC_SSE_CVT_Scalar_RM>, XS, 1866 Requires<[UseSSE2, OptForSize]>, Sched<[WriteCvtF2FLd]>; 1867 1868// extload f32 -> f64. This matches load+fextend because we have a hack in 1869// the isel (PreprocessForFPConvert) that can introduce loads after dag 1870// combine. 1871// Since these loads aren't folded into the fextend, we have to match it 1872// explicitly here. 1873def : Pat<(fextend (loadf32 addr:$src)), 1874 (CVTSS2SDrm addr:$src)>, Requires<[UseSSE2]>; 1875def : Pat<(extloadf32 addr:$src), 1876 (CVTSS2SDrr (MOVSSrm addr:$src))>, Requires<[UseSSE2, OptForSpeed]>; 1877 1878def Int_VCVTSS2SDrr: I<0x5A, MRMSrcReg, 1879 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), 1880 "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1881 [(set VR128:$dst, 1882 (int_x86_sse2_cvtss2sd VR128:$src1, VR128:$src2))], 1883 IIC_SSE_CVT_Scalar_RR>, XS, VEX_4V, Requires<[UseAVX]>, 1884 Sched<[WriteCvtF2F]>; 1885def Int_VCVTSS2SDrm: I<0x5A, MRMSrcMem, 1886 (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2), 1887 "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1888 [(set VR128:$dst, 1889 (int_x86_sse2_cvtss2sd VR128:$src1, sse_load_f32:$src2))], 1890 IIC_SSE_CVT_Scalar_RM>, XS, VEX_4V, Requires<[UseAVX]>, 1891 Sched<[WriteCvtF2FLd, ReadAfterLd]>; 1892let Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix 1893def Int_CVTSS2SDrr: I<0x5A, MRMSrcReg, 1894 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), 1895 "cvtss2sd\t{$src2, $dst|$dst, $src2}", 1896 [(set VR128:$dst, 1897 (int_x86_sse2_cvtss2sd VR128:$src1, VR128:$src2))], 1898 IIC_SSE_CVT_Scalar_RR>, XS, Requires<[UseSSE2]>, 1899 Sched<[WriteCvtF2F]>; 1900def Int_CVTSS2SDrm: I<0x5A, MRMSrcMem, 1901 (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2), 1902 "cvtss2sd\t{$src2, $dst|$dst, $src2}", 1903 [(set VR128:$dst, 1904 (int_x86_sse2_cvtss2sd VR128:$src1, sse_load_f32:$src2))], 1905 IIC_SSE_CVT_Scalar_RM>, XS, Requires<[UseSSE2]>, 1906 Sched<[WriteCvtF2FLd, ReadAfterLd]>; 1907} 1908 1909// Convert packed single/double fp to doubleword 1910def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 1911 "cvtps2dq\t{$src, $dst|$dst, $src}", 1912 [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))], 1913 IIC_SSE_CVT_PS_RR>, VEX, Sched<[WriteCvtF2I]>; 1914def VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 1915 "cvtps2dq\t{$src, $dst|$dst, $src}", 1916 [(set VR128:$dst, 1917 (int_x86_sse2_cvtps2dq (loadv4f32 addr:$src)))], 1918 IIC_SSE_CVT_PS_RM>, VEX, Sched<[WriteCvtF2ILd]>; 1919def VCVTPS2DQYrr : VPDI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 1920 "cvtps2dq\t{$src, $dst|$dst, $src}", 1921 [(set VR256:$dst, 1922 (int_x86_avx_cvt_ps2dq_256 VR256:$src))], 1923 IIC_SSE_CVT_PS_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>; 1924def VCVTPS2DQYrm : VPDI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), 1925 "cvtps2dq\t{$src, $dst|$dst, $src}", 1926 [(set VR256:$dst, 1927 (int_x86_avx_cvt_ps2dq_256 (loadv8f32 addr:$src)))], 1928 IIC_SSE_CVT_PS_RM>, VEX, VEX_L, Sched<[WriteCvtF2ILd]>; 1929def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 1930 "cvtps2dq\t{$src, $dst|$dst, $src}", 1931 [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))], 1932 IIC_SSE_CVT_PS_RR>, Sched<[WriteCvtF2I]>; 1933def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 1934 "cvtps2dq\t{$src, $dst|$dst, $src}", 1935 [(set VR128:$dst, 1936 (int_x86_sse2_cvtps2dq (memopv4f32 addr:$src)))], 1937 IIC_SSE_CVT_PS_RM>, Sched<[WriteCvtF2ILd]>; 1938 1939 1940// Convert Packed Double FP to Packed DW Integers 1941let Predicates = [HasAVX] in { 1942// The assembler can recognize rr 256-bit instructions by seeing a ymm 1943// register, but the same isn't true when using memory operands instead. 1944// Provide other assembly rr and rm forms to address this explicitly. 1945def VCVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 1946 "vcvtpd2dq\t{$src, $dst|$dst, $src}", 1947 [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>, 1948 VEX, Sched<[WriteCvtF2I]>; 1949 1950// XMM only 1951def : InstAlias<"vcvtpd2dqx\t{$src, $dst|$dst, $src}", 1952 (VCVTPD2DQrr VR128:$dst, VR128:$src)>; 1953def VCVTPD2DQXrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 1954 "vcvtpd2dqx\t{$src, $dst|$dst, $src}", 1955 [(set VR128:$dst, 1956 (int_x86_sse2_cvtpd2dq (loadv2f64 addr:$src)))]>, VEX, 1957 Sched<[WriteCvtF2ILd]>; 1958 1959// YMM only 1960def VCVTPD2DQYrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), 1961 "vcvtpd2dq{y}\t{$src, $dst|$dst, $src}", 1962 [(set VR128:$dst, 1963 (int_x86_avx_cvt_pd2dq_256 VR256:$src))]>, VEX, VEX_L, 1964 Sched<[WriteCvtF2I]>; 1965def VCVTPD2DQYrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), 1966 "vcvtpd2dq{y}\t{$src, $dst|$dst, $src}", 1967 [(set VR128:$dst, 1968 (int_x86_avx_cvt_pd2dq_256 (loadv4f64 addr:$src)))]>, 1969 VEX, VEX_L, Sched<[WriteCvtF2ILd]>; 1970def : InstAlias<"vcvtpd2dq\t{$src, $dst|$dst, $src}", 1971 (VCVTPD2DQYrr VR128:$dst, VR256:$src)>; 1972} 1973 1974def CVTPD2DQrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 1975 "cvtpd2dq\t{$src, $dst|$dst, $src}", 1976 [(set VR128:$dst, 1977 (int_x86_sse2_cvtpd2dq (memopv2f64 addr:$src)))], 1978 IIC_SSE_CVT_PD_RM>, Sched<[WriteCvtF2ILd]>; 1979def CVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 1980 "cvtpd2dq\t{$src, $dst|$dst, $src}", 1981 [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))], 1982 IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtF2I]>; 1983 1984// Convert with truncation packed single/double fp to doubleword 1985// SSE2 packed instructions with XS prefix 1986def VCVTTPS2DQrr : VS2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 1987 "cvttps2dq\t{$src, $dst|$dst, $src}", 1988 [(set VR128:$dst, 1989 (int_x86_sse2_cvttps2dq VR128:$src))], 1990 IIC_SSE_CVT_PS_RR>, VEX, Sched<[WriteCvtF2I]>; 1991def VCVTTPS2DQrm : VS2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 1992 "cvttps2dq\t{$src, $dst|$dst, $src}", 1993 [(set VR128:$dst, (int_x86_sse2_cvttps2dq 1994 (loadv4f32 addr:$src)))], 1995 IIC_SSE_CVT_PS_RM>, VEX, Sched<[WriteCvtF2ILd]>; 1996def VCVTTPS2DQYrr : VS2SI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 1997 "cvttps2dq\t{$src, $dst|$dst, $src}", 1998 [(set VR256:$dst, 1999 (int_x86_avx_cvtt_ps2dq_256 VR256:$src))], 2000 IIC_SSE_CVT_PS_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>; 2001def VCVTTPS2DQYrm : VS2SI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), 2002 "cvttps2dq\t{$src, $dst|$dst, $src}", 2003 [(set VR256:$dst, (int_x86_avx_cvtt_ps2dq_256 2004 (loadv8f32 addr:$src)))], 2005 IIC_SSE_CVT_PS_RM>, VEX, VEX_L, 2006 Sched<[WriteCvtF2ILd]>; 2007 2008def CVTTPS2DQrr : S2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 2009 "cvttps2dq\t{$src, $dst|$dst, $src}", 2010 [(set VR128:$dst, (int_x86_sse2_cvttps2dq VR128:$src))], 2011 IIC_SSE_CVT_PS_RR>, Sched<[WriteCvtF2I]>; 2012def CVTTPS2DQrm : S2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 2013 "cvttps2dq\t{$src, $dst|$dst, $src}", 2014 [(set VR128:$dst, 2015 (int_x86_sse2_cvttps2dq (memopv4f32 addr:$src)))], 2016 IIC_SSE_CVT_PS_RM>, Sched<[WriteCvtF2ILd]>; 2017 2018let Predicates = [HasAVX] in { 2019 def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))), 2020 (VCVTDQ2PSrr VR128:$src)>; 2021 def : Pat<(v4f32 (sint_to_fp (bc_v4i32 (loadv2i64 addr:$src)))), 2022 (VCVTDQ2PSrm addr:$src)>; 2023 2024 def : Pat<(int_x86_sse2_cvtdq2ps VR128:$src), 2025 (VCVTDQ2PSrr VR128:$src)>; 2026 def : Pat<(int_x86_sse2_cvtdq2ps (bc_v4i32 (loadv2i64 addr:$src))), 2027 (VCVTDQ2PSrm addr:$src)>; 2028 2029 def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))), 2030 (VCVTTPS2DQrr VR128:$src)>; 2031 def : Pat<(v4i32 (fp_to_sint (loadv4f32 addr:$src))), 2032 (VCVTTPS2DQrm addr:$src)>; 2033 2034 def : Pat<(v8f32 (sint_to_fp (v8i32 VR256:$src))), 2035 (VCVTDQ2PSYrr VR256:$src)>; 2036 def : Pat<(v8f32 (sint_to_fp (bc_v8i32 (loadv4i64 addr:$src)))), 2037 (VCVTDQ2PSYrm addr:$src)>; 2038 2039 def : Pat<(v8i32 (fp_to_sint (v8f32 VR256:$src))), 2040 (VCVTTPS2DQYrr VR256:$src)>; 2041 def : Pat<(v8i32 (fp_to_sint (loadv8f32 addr:$src))), 2042 (VCVTTPS2DQYrm addr:$src)>; 2043} 2044 2045let Predicates = [UseSSE2] in { 2046 def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))), 2047 (CVTDQ2PSrr VR128:$src)>; 2048 def : Pat<(v4f32 (sint_to_fp (bc_v4i32 (memopv2i64 addr:$src)))), 2049 (CVTDQ2PSrm addr:$src)>; 2050 2051 def : Pat<(int_x86_sse2_cvtdq2ps VR128:$src), 2052 (CVTDQ2PSrr VR128:$src)>; 2053 def : Pat<(int_x86_sse2_cvtdq2ps (bc_v4i32 (memopv2i64 addr:$src))), 2054 (CVTDQ2PSrm addr:$src)>; 2055 2056 def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))), 2057 (CVTTPS2DQrr VR128:$src)>; 2058 def : Pat<(v4i32 (fp_to_sint (memopv4f32 addr:$src))), 2059 (CVTTPS2DQrm addr:$src)>; 2060} 2061 2062def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 2063 "cvttpd2dq\t{$src, $dst|$dst, $src}", 2064 [(set VR128:$dst, 2065 (int_x86_sse2_cvttpd2dq VR128:$src))], 2066 IIC_SSE_CVT_PD_RR>, VEX, Sched<[WriteCvtF2I]>; 2067 2068// The assembler can recognize rr 256-bit instructions by seeing a ymm 2069// register, but the same isn't true when using memory operands instead. 2070// Provide other assembly rr and rm forms to address this explicitly. 2071 2072// XMM only 2073def : InstAlias<"vcvttpd2dqx\t{$src, $dst|$dst, $src}", 2074 (VCVTTPD2DQrr VR128:$dst, VR128:$src)>; 2075def VCVTTPD2DQXrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 2076 "cvttpd2dqx\t{$src, $dst|$dst, $src}", 2077 [(set VR128:$dst, (int_x86_sse2_cvttpd2dq 2078 (loadv2f64 addr:$src)))], 2079 IIC_SSE_CVT_PD_RM>, VEX, Sched<[WriteCvtF2ILd]>; 2080 2081// YMM only 2082def VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), 2083 "cvttpd2dq{y}\t{$src, $dst|$dst, $src}", 2084 [(set VR128:$dst, 2085 (int_x86_avx_cvtt_pd2dq_256 VR256:$src))], 2086 IIC_SSE_CVT_PD_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>; 2087def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), 2088 "cvttpd2dq{y}\t{$src, $dst|$dst, $src}", 2089 [(set VR128:$dst, 2090 (int_x86_avx_cvtt_pd2dq_256 (loadv4f64 addr:$src)))], 2091 IIC_SSE_CVT_PD_RM>, VEX, VEX_L, Sched<[WriteCvtF2ILd]>; 2092def : InstAlias<"vcvttpd2dq\t{$src, $dst|$dst, $src}", 2093 (VCVTTPD2DQYrr VR128:$dst, VR256:$src)>; 2094 2095let Predicates = [HasAVX] in { 2096 def : Pat<(v4i32 (fp_to_sint (v4f64 VR256:$src))), 2097 (VCVTTPD2DQYrr VR256:$src)>; 2098 def : Pat<(v4i32 (fp_to_sint (loadv4f64 addr:$src))), 2099 (VCVTTPD2DQYrm addr:$src)>; 2100} // Predicates = [HasAVX] 2101 2102def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 2103 "cvttpd2dq\t{$src, $dst|$dst, $src}", 2104 [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))], 2105 IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtF2I]>; 2106def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src), 2107 "cvttpd2dq\t{$src, $dst|$dst, $src}", 2108 [(set VR128:$dst, (int_x86_sse2_cvttpd2dq 2109 (memopv2f64 addr:$src)))], 2110 IIC_SSE_CVT_PD_RM>, 2111 Sched<[WriteCvtF2ILd]>; 2112 2113// Convert packed single to packed double 2114let Predicates = [HasAVX] in { 2115 // SSE2 instructions without OpSize prefix 2116def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 2117 "vcvtps2pd\t{$src, $dst|$dst, $src}", 2118 [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))], 2119 IIC_SSE_CVT_PD_RR>, TB, VEX, Sched<[WriteCvtF2F]>; 2120def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), 2121 "vcvtps2pd\t{$src, $dst|$dst, $src}", 2122 [(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))], 2123 IIC_SSE_CVT_PD_RM>, TB, VEX, Sched<[WriteCvtF2FLd]>; 2124def VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), 2125 "vcvtps2pd\t{$src, $dst|$dst, $src}", 2126 [(set VR256:$dst, 2127 (int_x86_avx_cvt_ps2_pd_256 VR128:$src))], 2128 IIC_SSE_CVT_PD_RR>, TB, VEX, VEX_L, Sched<[WriteCvtF2F]>; 2129def VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src), 2130 "vcvtps2pd\t{$src, $dst|$dst, $src}", 2131 [(set VR256:$dst, 2132 (int_x86_avx_cvt_ps2_pd_256 (loadv4f32 addr:$src)))], 2133 IIC_SSE_CVT_PD_RM>, TB, VEX, VEX_L, Sched<[WriteCvtF2FLd]>; 2134} 2135 2136let Predicates = [UseSSE2] in { 2137def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 2138 "cvtps2pd\t{$src, $dst|$dst, $src}", 2139 [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))], 2140 IIC_SSE_CVT_PD_RR>, TB, Sched<[WriteCvtF2F]>; 2141def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), 2142 "cvtps2pd\t{$src, $dst|$dst, $src}", 2143 [(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))], 2144 IIC_SSE_CVT_PD_RM>, TB, Sched<[WriteCvtF2FLd]>; 2145} 2146 2147// Convert Packed DW Integers to Packed Double FP 2148let Predicates = [HasAVX] in { 2149let neverHasSideEffects = 1, mayLoad = 1 in 2150def VCVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), 2151 "vcvtdq2pd\t{$src, $dst|$dst, $src}", 2152 []>, VEX, Sched<[WriteCvtI2FLd]>; 2153def VCVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 2154 "vcvtdq2pd\t{$src, $dst|$dst, $src}", 2155 [(set VR128:$dst, 2156 (int_x86_sse2_cvtdq2pd VR128:$src))]>, VEX, 2157 Sched<[WriteCvtI2F]>; 2158def VCVTDQ2PDYrm : S2SI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins i128mem:$src), 2159 "vcvtdq2pd\t{$src, $dst|$dst, $src}", 2160 [(set VR256:$dst, 2161 (int_x86_avx_cvtdq2_pd_256 2162 (bitconvert (loadv2i64 addr:$src))))]>, VEX, VEX_L, 2163 Sched<[WriteCvtI2FLd]>; 2164def VCVTDQ2PDYrr : S2SI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), 2165 "vcvtdq2pd\t{$src, $dst|$dst, $src}", 2166 [(set VR256:$dst, 2167 (int_x86_avx_cvtdq2_pd_256 VR128:$src))]>, VEX, VEX_L, 2168 Sched<[WriteCvtI2F]>; 2169} 2170 2171let neverHasSideEffects = 1, mayLoad = 1 in 2172def CVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), 2173 "cvtdq2pd\t{$src, $dst|$dst, $src}", [], 2174 IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtI2FLd]>; 2175def CVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 2176 "cvtdq2pd\t{$src, $dst|$dst, $src}", 2177 [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))], 2178 IIC_SSE_CVT_PD_RM>, Sched<[WriteCvtI2F]>; 2179 2180// AVX 256-bit register conversion intrinsics 2181let Predicates = [HasAVX] in { 2182 def : Pat<(v4f64 (sint_to_fp (v4i32 VR128:$src))), 2183 (VCVTDQ2PDYrr VR128:$src)>; 2184 def : Pat<(v4f64 (sint_to_fp (bc_v4i32 (loadv2i64 addr:$src)))), 2185 (VCVTDQ2PDYrm addr:$src)>; 2186} // Predicates = [HasAVX] 2187 2188// Convert packed double to packed single 2189// The assembler can recognize rr 256-bit instructions by seeing a ymm 2190// register, but the same isn't true when using memory operands instead. 2191// Provide other assembly rr and rm forms to address this explicitly. 2192def VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 2193 "cvtpd2ps\t{$src, $dst|$dst, $src}", 2194 [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))], 2195 IIC_SSE_CVT_PD_RR>, VEX, Sched<[WriteCvtF2F]>; 2196 2197// XMM only 2198def : InstAlias<"vcvtpd2psx\t{$src, $dst|$dst, $src}", 2199 (VCVTPD2PSrr VR128:$dst, VR128:$src)>; 2200def VCVTPD2PSXrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 2201 "cvtpd2psx\t{$src, $dst|$dst, $src}", 2202 [(set VR128:$dst, 2203 (int_x86_sse2_cvtpd2ps (loadv2f64 addr:$src)))], 2204 IIC_SSE_CVT_PD_RM>, VEX, Sched<[WriteCvtF2FLd]>; 2205 2206// YMM only 2207def VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), 2208 "cvtpd2ps{y}\t{$src, $dst|$dst, $src}", 2209 [(set VR128:$dst, 2210 (int_x86_avx_cvt_pd2_ps_256 VR256:$src))], 2211 IIC_SSE_CVT_PD_RR>, VEX, VEX_L, Sched<[WriteCvtF2F]>; 2212def VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), 2213 "cvtpd2ps{y}\t{$src, $dst|$dst, $src}", 2214 [(set VR128:$dst, 2215 (int_x86_avx_cvt_pd2_ps_256 (loadv4f64 addr:$src)))], 2216 IIC_SSE_CVT_PD_RM>, VEX, VEX_L, Sched<[WriteCvtF2FLd]>; 2217def : InstAlias<"vcvtpd2ps\t{$src, $dst|$dst, $src}", 2218 (VCVTPD2PSYrr VR128:$dst, VR256:$src)>; 2219 2220def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 2221 "cvtpd2ps\t{$src, $dst|$dst, $src}", 2222 [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))], 2223 IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtF2F]>; 2224def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 2225 "cvtpd2ps\t{$src, $dst|$dst, $src}", 2226 [(set VR128:$dst, 2227 (int_x86_sse2_cvtpd2ps (memopv2f64 addr:$src)))], 2228 IIC_SSE_CVT_PD_RM>, Sched<[WriteCvtF2FLd]>; 2229 2230 2231// AVX 256-bit register conversion intrinsics 2232// FIXME: Migrate SSE conversion intrinsics matching to use patterns as below 2233// whenever possible to avoid declaring two versions of each one. 2234let Predicates = [HasAVX] in { 2235 def : Pat<(int_x86_avx_cvtdq2_ps_256 VR256:$src), 2236 (VCVTDQ2PSYrr VR256:$src)>; 2237 def : Pat<(int_x86_avx_cvtdq2_ps_256 (bitconvert (loadv4i64 addr:$src))), 2238 (VCVTDQ2PSYrm addr:$src)>; 2239 2240 // Match fround and fextend for 128/256-bit conversions 2241 def : Pat<(v4f32 (X86vfpround (v2f64 VR128:$src))), 2242 (VCVTPD2PSrr VR128:$src)>; 2243 def : Pat<(v4f32 (X86vfpround (loadv2f64 addr:$src))), 2244 (VCVTPD2PSXrm addr:$src)>; 2245 def : Pat<(v4f32 (fround (v4f64 VR256:$src))), 2246 (VCVTPD2PSYrr VR256:$src)>; 2247 def : Pat<(v4f32 (fround (loadv4f64 addr:$src))), 2248 (VCVTPD2PSYrm addr:$src)>; 2249 2250 def : Pat<(v2f64 (X86vfpext (v4f32 VR128:$src))), 2251 (VCVTPS2PDrr VR128:$src)>; 2252 def : Pat<(v4f64 (fextend (v4f32 VR128:$src))), 2253 (VCVTPS2PDYrr VR128:$src)>; 2254 def : Pat<(v4f64 (extloadv4f32 addr:$src)), 2255 (VCVTPS2PDYrm addr:$src)>; 2256} 2257 2258let Predicates = [UseSSE2] in { 2259 // Match fround and fextend for 128 conversions 2260 def : Pat<(v4f32 (X86vfpround (v2f64 VR128:$src))), 2261 (CVTPD2PSrr VR128:$src)>; 2262 def : Pat<(v4f32 (X86vfpround (memopv2f64 addr:$src))), 2263 (CVTPD2PSrm addr:$src)>; 2264 2265 def : Pat<(v2f64 (X86vfpext (v4f32 VR128:$src))), 2266 (CVTPS2PDrr VR128:$src)>; 2267} 2268 2269//===----------------------------------------------------------------------===// 2270// SSE 1 & 2 - Compare Instructions 2271//===----------------------------------------------------------------------===// 2272 2273// sse12_cmp_scalar - sse 1 & 2 compare scalar instructions 2274multiclass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop, 2275 Operand CC, SDNode OpNode, ValueType VT, 2276 PatFrag ld_frag, string asm, string asm_alt, 2277 OpndItins itins> { 2278 def rr : SIi8<0xC2, MRMSrcReg, 2279 (outs RC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm, 2280 [(set RC:$dst, (OpNode (VT RC:$src1), RC:$src2, imm:$cc))], 2281 itins.rr>, Sched<[itins.Sched]>; 2282 def rm : SIi8<0xC2, MRMSrcMem, 2283 (outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm, 2284 [(set RC:$dst, (OpNode (VT RC:$src1), 2285 (ld_frag addr:$src2), imm:$cc))], 2286 itins.rm>, 2287 Sched<[itins.Sched.Folded, ReadAfterLd]>; 2288 2289 // Accept explicit immediate argument form instead of comparison code. 2290 let neverHasSideEffects = 1 in { 2291 def rr_alt : SIi8<0xC2, MRMSrcReg, (outs RC:$dst), 2292 (ins RC:$src1, RC:$src2, i8imm:$cc), asm_alt, [], 2293 IIC_SSE_ALU_F32S_RR>, Sched<[itins.Sched]>; 2294 let mayLoad = 1 in 2295 def rm_alt : SIi8<0xC2, MRMSrcMem, (outs RC:$dst), 2296 (ins RC:$src1, x86memop:$src2, i8imm:$cc), asm_alt, [], 2297 IIC_SSE_ALU_F32S_RM>, 2298 Sched<[itins.Sched.Folded, ReadAfterLd]>; 2299 } 2300} 2301 2302defm VCMPSS : sse12_cmp_scalar<FR32, f32mem, AVXCC, X86cmpss, f32, loadf32, 2303 "cmp${cc}ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2304 "cmpss\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", 2305 SSE_ALU_F32S>, 2306 XS, VEX_4V, VEX_LIG; 2307defm VCMPSD : sse12_cmp_scalar<FR64, f64mem, AVXCC, X86cmpsd, f64, loadf64, 2308 "cmp${cc}sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2309 "cmpsd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", 2310 SSE_ALU_F32S>, // same latency as 32 bit compare 2311 XD, VEX_4V, VEX_LIG; 2312 2313let Constraints = "$src1 = $dst" in { 2314 defm CMPSS : sse12_cmp_scalar<FR32, f32mem, SSECC, X86cmpss, f32, loadf32, 2315 "cmp${cc}ss\t{$src2, $dst|$dst, $src2}", 2316 "cmpss\t{$cc, $src2, $dst|$dst, $src2, $cc}", SSE_ALU_F32S>, 2317 XS; 2318 defm CMPSD : sse12_cmp_scalar<FR64, f64mem, SSECC, X86cmpsd, f64, loadf64, 2319 "cmp${cc}sd\t{$src2, $dst|$dst, $src2}", 2320 "cmpsd\t{$cc, $src2, $dst|$dst, $src2, $cc}", 2321 SSE_ALU_F64S>, 2322 XD; 2323} 2324 2325multiclass sse12_cmp_scalar_int<X86MemOperand x86memop, Operand CC, 2326 Intrinsic Int, string asm, OpndItins itins> { 2327 def rr : SIi8<0xC2, MRMSrcReg, (outs VR128:$dst), 2328 (ins VR128:$src1, VR128:$src, CC:$cc), asm, 2329 [(set VR128:$dst, (Int VR128:$src1, 2330 VR128:$src, imm:$cc))], 2331 itins.rr>, 2332 Sched<[itins.Sched]>; 2333 def rm : SIi8<0xC2, MRMSrcMem, (outs VR128:$dst), 2334 (ins VR128:$src1, x86memop:$src, CC:$cc), asm, 2335 [(set VR128:$dst, (Int VR128:$src1, 2336 (load addr:$src), imm:$cc))], 2337 itins.rm>, 2338 Sched<[itins.Sched.Folded, ReadAfterLd]>; 2339} 2340 2341// Aliases to match intrinsics which expect XMM operand(s). 2342defm Int_VCMPSS : sse12_cmp_scalar_int<f32mem, AVXCC, int_x86_sse_cmp_ss, 2343 "cmp${cc}ss\t{$src, $src1, $dst|$dst, $src1, $src}", 2344 SSE_ALU_F32S>, 2345 XS, VEX_4V; 2346defm Int_VCMPSD : sse12_cmp_scalar_int<f64mem, AVXCC, int_x86_sse2_cmp_sd, 2347 "cmp${cc}sd\t{$src, $src1, $dst|$dst, $src1, $src}", 2348 SSE_ALU_F32S>, // same latency as f32 2349 XD, VEX_4V; 2350let Constraints = "$src1 = $dst" in { 2351 defm Int_CMPSS : sse12_cmp_scalar_int<f32mem, SSECC, int_x86_sse_cmp_ss, 2352 "cmp${cc}ss\t{$src, $dst|$dst, $src}", 2353 SSE_ALU_F32S>, XS; 2354 defm Int_CMPSD : sse12_cmp_scalar_int<f64mem, SSECC, int_x86_sse2_cmp_sd, 2355 "cmp${cc}sd\t{$src, $dst|$dst, $src}", 2356 SSE_ALU_F64S>, 2357 XD; 2358} 2359 2360 2361// sse12_ord_cmp - Unordered/Ordered scalar fp compare and set EFLAGS 2362multiclass sse12_ord_cmp<bits<8> opc, RegisterClass RC, SDNode OpNode, 2363 ValueType vt, X86MemOperand x86memop, 2364 PatFrag ld_frag, string OpcodeStr> { 2365 def rr: SI<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2), 2366 !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 2367 [(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))], 2368 IIC_SSE_COMIS_RR>, 2369 Sched<[WriteFAdd]>; 2370 def rm: SI<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2), 2371 !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 2372 [(set EFLAGS, (OpNode (vt RC:$src1), 2373 (ld_frag addr:$src2)))], 2374 IIC_SSE_COMIS_RM>, 2375 Sched<[WriteFAddLd, ReadAfterLd]>; 2376} 2377 2378let Defs = [EFLAGS] in { 2379 defm VUCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32, 2380 "ucomiss">, TB, VEX, VEX_LIG; 2381 defm VUCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64, 2382 "ucomisd">, TB, OpSize, VEX, VEX_LIG; 2383 let Pattern = []<dag> in { 2384 defm VCOMISS : sse12_ord_cmp<0x2F, VR128, undef, v4f32, f128mem, load, 2385 "comiss">, TB, VEX, VEX_LIG; 2386 defm VCOMISD : sse12_ord_cmp<0x2F, VR128, undef, v2f64, f128mem, load, 2387 "comisd">, TB, OpSize, VEX, VEX_LIG; 2388 } 2389 2390 defm Int_VUCOMISS : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem, 2391 load, "ucomiss">, TB, VEX; 2392 defm Int_VUCOMISD : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem, 2393 load, "ucomisd">, TB, OpSize, VEX; 2394 2395 defm Int_VCOMISS : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem, 2396 load, "comiss">, TB, VEX; 2397 defm Int_VCOMISD : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem, 2398 load, "comisd">, TB, OpSize, VEX; 2399 defm UCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32, 2400 "ucomiss">, TB; 2401 defm UCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64, 2402 "ucomisd">, TB, OpSize; 2403 2404 let Pattern = []<dag> in { 2405 defm COMISS : sse12_ord_cmp<0x2F, VR128, undef, v4f32, f128mem, load, 2406 "comiss">, TB; 2407 defm COMISD : sse12_ord_cmp<0x2F, VR128, undef, v2f64, f128mem, load, 2408 "comisd">, TB, OpSize; 2409 } 2410 2411 defm Int_UCOMISS : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem, 2412 load, "ucomiss">, TB; 2413 defm Int_UCOMISD : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem, 2414 load, "ucomisd">, TB, OpSize; 2415 2416 defm Int_COMISS : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem, load, 2417 "comiss">, TB; 2418 defm Int_COMISD : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem, load, 2419 "comisd">, TB, OpSize; 2420} // Defs = [EFLAGS] 2421 2422// sse12_cmp_packed - sse 1 & 2 compare packed instructions 2423multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop, 2424 Operand CC, Intrinsic Int, string asm, 2425 string asm_alt, Domain d, 2426 OpndItins itins = SSE_ALU_F32P> { 2427 def rri : PIi8<0xC2, MRMSrcReg, 2428 (outs RC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm, 2429 [(set RC:$dst, (Int RC:$src1, RC:$src2, imm:$cc))], 2430 itins.rr, d>, 2431 Sched<[WriteFAdd]>; 2432 def rmi : PIi8<0xC2, MRMSrcMem, 2433 (outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm, 2434 [(set RC:$dst, (Int RC:$src1, (memop addr:$src2), imm:$cc))], 2435 itins.rm, d>, 2436 Sched<[WriteFAddLd, ReadAfterLd]>; 2437 2438 // Accept explicit immediate argument form instead of comparison code. 2439 let neverHasSideEffects = 1 in { 2440 def rri_alt : PIi8<0xC2, MRMSrcReg, 2441 (outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc), 2442 asm_alt, [], itins.rr, d>, Sched<[WriteFAdd]>; 2443 def rmi_alt : PIi8<0xC2, MRMSrcMem, 2444 (outs RC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc), 2445 asm_alt, [], itins.rm, d>, 2446 Sched<[WriteFAddLd, ReadAfterLd]>; 2447 } 2448} 2449 2450defm VCMPPS : sse12_cmp_packed<VR128, f128mem, AVXCC, int_x86_sse_cmp_ps, 2451 "cmp${cc}ps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2452 "cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", 2453 SSEPackedSingle>, TB, VEX_4V; 2454defm VCMPPD : sse12_cmp_packed<VR128, f128mem, AVXCC, int_x86_sse2_cmp_pd, 2455 "cmp${cc}pd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2456 "cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", 2457 SSEPackedDouble>, TB, OpSize, VEX_4V; 2458defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, AVXCC, int_x86_avx_cmp_ps_256, 2459 "cmp${cc}ps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2460 "cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", 2461 SSEPackedSingle>, TB, VEX_4V, VEX_L; 2462defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, AVXCC, int_x86_avx_cmp_pd_256, 2463 "cmp${cc}pd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2464 "cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", 2465 SSEPackedDouble>, TB, OpSize, VEX_4V, VEX_L; 2466let Constraints = "$src1 = $dst" in { 2467 defm CMPPS : sse12_cmp_packed<VR128, f128mem, SSECC, int_x86_sse_cmp_ps, 2468 "cmp${cc}ps\t{$src2, $dst|$dst, $src2}", 2469 "cmpps\t{$cc, $src2, $dst|$dst, $src2, $cc}", 2470 SSEPackedSingle, SSE_ALU_F32P>, TB; 2471 defm CMPPD : sse12_cmp_packed<VR128, f128mem, SSECC, int_x86_sse2_cmp_pd, 2472 "cmp${cc}pd\t{$src2, $dst|$dst, $src2}", 2473 "cmppd\t{$cc, $src2, $dst|$dst, $src2, $cc}", 2474 SSEPackedDouble, SSE_ALU_F64P>, TB, OpSize; 2475} 2476 2477let Predicates = [HasAVX] in { 2478def : Pat<(v4i32 (X86cmpp (v4f32 VR128:$src1), VR128:$src2, imm:$cc)), 2479 (VCMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>; 2480def : Pat<(v4i32 (X86cmpp (v4f32 VR128:$src1), (memop addr:$src2), imm:$cc)), 2481 (VCMPPSrmi (v4f32 VR128:$src1), addr:$src2, imm:$cc)>; 2482def : Pat<(v2i64 (X86cmpp (v2f64 VR128:$src1), VR128:$src2, imm:$cc)), 2483 (VCMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>; 2484def : Pat<(v2i64 (X86cmpp (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)), 2485 (VCMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>; 2486 2487def : Pat<(v8i32 (X86cmpp (v8f32 VR256:$src1), VR256:$src2, imm:$cc)), 2488 (VCMPPSYrri (v8f32 VR256:$src1), (v8f32 VR256:$src2), imm:$cc)>; 2489def : Pat<(v8i32 (X86cmpp (v8f32 VR256:$src1), (memop addr:$src2), imm:$cc)), 2490 (VCMPPSYrmi (v8f32 VR256:$src1), addr:$src2, imm:$cc)>; 2491def : Pat<(v4i64 (X86cmpp (v4f64 VR256:$src1), VR256:$src2, imm:$cc)), 2492 (VCMPPDYrri VR256:$src1, VR256:$src2, imm:$cc)>; 2493def : Pat<(v4i64 (X86cmpp (v4f64 VR256:$src1), (memop addr:$src2), imm:$cc)), 2494 (VCMPPDYrmi VR256:$src1, addr:$src2, imm:$cc)>; 2495} 2496 2497let Predicates = [UseSSE1] in { 2498def : Pat<(v4i32 (X86cmpp (v4f32 VR128:$src1), VR128:$src2, imm:$cc)), 2499 (CMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>; 2500def : Pat<(v4i32 (X86cmpp (v4f32 VR128:$src1), (memop addr:$src2), imm:$cc)), 2501 (CMPPSrmi (v4f32 VR128:$src1), addr:$src2, imm:$cc)>; 2502} 2503 2504let Predicates = [UseSSE2] in { 2505def : Pat<(v2i64 (X86cmpp (v2f64 VR128:$src1), VR128:$src2, imm:$cc)), 2506 (CMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>; 2507def : Pat<(v2i64 (X86cmpp (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)), 2508 (CMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>; 2509} 2510 2511//===----------------------------------------------------------------------===// 2512// SSE 1 & 2 - Shuffle Instructions 2513//===----------------------------------------------------------------------===// 2514 2515/// sse12_shuffle - sse 1 & 2 shuffle instructions 2516multiclass sse12_shuffle<RegisterClass RC, X86MemOperand x86memop, 2517 ValueType vt, string asm, PatFrag mem_frag, 2518 Domain d, bit IsConvertibleToThreeAddress = 0> { 2519 def rmi : PIi8<0xC6, MRMSrcMem, (outs RC:$dst), 2520 (ins RC:$src1, x86memop:$src2, i8imm:$src3), asm, 2521 [(set RC:$dst, (vt (X86Shufp RC:$src1, (mem_frag addr:$src2), 2522 (i8 imm:$src3))))], IIC_SSE_SHUFP, d>, 2523 Sched<[WriteShuffleLd, ReadAfterLd]>; 2524 let isConvertibleToThreeAddress = IsConvertibleToThreeAddress in 2525 def rri : PIi8<0xC6, MRMSrcReg, (outs RC:$dst), 2526 (ins RC:$src1, RC:$src2, i8imm:$src3), asm, 2527 [(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2, 2528 (i8 imm:$src3))))], IIC_SSE_SHUFP, d>, 2529 Sched<[WriteShuffle]>; 2530} 2531 2532defm VSHUFPS : sse12_shuffle<VR128, f128mem, v4f32, 2533 "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 2534 loadv4f32, SSEPackedSingle>, TB, VEX_4V; 2535defm VSHUFPSY : sse12_shuffle<VR256, f256mem, v8f32, 2536 "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 2537 loadv8f32, SSEPackedSingle>, TB, VEX_4V, VEX_L; 2538defm VSHUFPD : sse12_shuffle<VR128, f128mem, v2f64, 2539 "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 2540 loadv2f64, SSEPackedDouble>, TB, OpSize, VEX_4V; 2541defm VSHUFPDY : sse12_shuffle<VR256, f256mem, v4f64, 2542 "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 2543 loadv4f64, SSEPackedDouble>, TB, OpSize, VEX_4V, VEX_L; 2544 2545let Constraints = "$src1 = $dst" in { 2546 defm SHUFPS : sse12_shuffle<VR128, f128mem, v4f32, 2547 "shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}", 2548 memopv4f32, SSEPackedSingle, 1 /* cvt to pshufd */>, 2549 TB; 2550 defm SHUFPD : sse12_shuffle<VR128, f128mem, v2f64, 2551 "shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}", 2552 memopv2f64, SSEPackedDouble, 1 /* cvt to pshufd */>, 2553 TB, OpSize; 2554} 2555 2556let Predicates = [HasAVX] in { 2557 def : Pat<(v4i32 (X86Shufp VR128:$src1, 2558 (bc_v4i32 (loadv2i64 addr:$src2)), (i8 imm:$imm))), 2559 (VSHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>; 2560 def : Pat<(v4i32 (X86Shufp VR128:$src1, VR128:$src2, (i8 imm:$imm))), 2561 (VSHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>; 2562 2563 def : Pat<(v2i64 (X86Shufp VR128:$src1, 2564 (loadv2i64 addr:$src2), (i8 imm:$imm))), 2565 (VSHUFPDrmi VR128:$src1, addr:$src2, imm:$imm)>; 2566 def : Pat<(v2i64 (X86Shufp VR128:$src1, VR128:$src2, (i8 imm:$imm))), 2567 (VSHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>; 2568 2569 // 256-bit patterns 2570 def : Pat<(v8i32 (X86Shufp VR256:$src1, VR256:$src2, (i8 imm:$imm))), 2571 (VSHUFPSYrri VR256:$src1, VR256:$src2, imm:$imm)>; 2572 def : Pat<(v8i32 (X86Shufp VR256:$src1, 2573 (bc_v8i32 (loadv4i64 addr:$src2)), (i8 imm:$imm))), 2574 (VSHUFPSYrmi VR256:$src1, addr:$src2, imm:$imm)>; 2575 2576 def : Pat<(v4i64 (X86Shufp VR256:$src1, VR256:$src2, (i8 imm:$imm))), 2577 (VSHUFPDYrri VR256:$src1, VR256:$src2, imm:$imm)>; 2578 def : Pat<(v4i64 (X86Shufp VR256:$src1, 2579 (loadv4i64 addr:$src2), (i8 imm:$imm))), 2580 (VSHUFPDYrmi VR256:$src1, addr:$src2, imm:$imm)>; 2581} 2582 2583let Predicates = [UseSSE1] in { 2584 def : Pat<(v4i32 (X86Shufp VR128:$src1, 2585 (bc_v4i32 (memopv2i64 addr:$src2)), (i8 imm:$imm))), 2586 (SHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>; 2587 def : Pat<(v4i32 (X86Shufp VR128:$src1, VR128:$src2, (i8 imm:$imm))), 2588 (SHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>; 2589} 2590 2591let Predicates = [UseSSE2] in { 2592 // Generic SHUFPD patterns 2593 def : Pat<(v2i64 (X86Shufp VR128:$src1, 2594 (memopv2i64 addr:$src2), (i8 imm:$imm))), 2595 (SHUFPDrmi VR128:$src1, addr:$src2, imm:$imm)>; 2596 def : Pat<(v2i64 (X86Shufp VR128:$src1, VR128:$src2, (i8 imm:$imm))), 2597 (SHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>; 2598} 2599 2600//===----------------------------------------------------------------------===// 2601// SSE 1 & 2 - Unpack Instructions 2602//===----------------------------------------------------------------------===// 2603 2604/// sse12_unpack_interleave - sse 1 & 2 unpack and interleave 2605multiclass sse12_unpack_interleave<bits<8> opc, SDNode OpNode, ValueType vt, 2606 PatFrag mem_frag, RegisterClass RC, 2607 X86MemOperand x86memop, string asm, 2608 Domain d> { 2609 def rr : PI<opc, MRMSrcReg, 2610 (outs RC:$dst), (ins RC:$src1, RC:$src2), 2611 asm, [(set RC:$dst, 2612 (vt (OpNode RC:$src1, RC:$src2)))], 2613 IIC_SSE_UNPCK, d>, Sched<[WriteShuffle]>; 2614 def rm : PI<opc, MRMSrcMem, 2615 (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 2616 asm, [(set RC:$dst, 2617 (vt (OpNode RC:$src1, 2618 (mem_frag addr:$src2))))], 2619 IIC_SSE_UNPCK, d>, 2620 Sched<[WriteShuffleLd, ReadAfterLd]>; 2621} 2622 2623defm VUNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, loadv4f32, 2624 VR128, f128mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2625 SSEPackedSingle>, TB, VEX_4V; 2626defm VUNPCKHPD: sse12_unpack_interleave<0x15, X86Unpckh, v2f64, loadv2f64, 2627 VR128, f128mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2628 SSEPackedDouble>, TB, OpSize, VEX_4V; 2629defm VUNPCKLPS: sse12_unpack_interleave<0x14, X86Unpckl, v4f32, loadv4f32, 2630 VR128, f128mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2631 SSEPackedSingle>, TB, VEX_4V; 2632defm VUNPCKLPD: sse12_unpack_interleave<0x14, X86Unpckl, v2f64, loadv2f64, 2633 VR128, f128mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2634 SSEPackedDouble>, TB, OpSize, VEX_4V; 2635 2636defm VUNPCKHPSY: sse12_unpack_interleave<0x15, X86Unpckh, v8f32, loadv8f32, 2637 VR256, f256mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2638 SSEPackedSingle>, TB, VEX_4V, VEX_L; 2639defm VUNPCKHPDY: sse12_unpack_interleave<0x15, X86Unpckh, v4f64, loadv4f64, 2640 VR256, f256mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2641 SSEPackedDouble>, TB, OpSize, VEX_4V, VEX_L; 2642defm VUNPCKLPSY: sse12_unpack_interleave<0x14, X86Unpckl, v8f32, loadv8f32, 2643 VR256, f256mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2644 SSEPackedSingle>, TB, VEX_4V, VEX_L; 2645defm VUNPCKLPDY: sse12_unpack_interleave<0x14, X86Unpckl, v4f64, loadv4f64, 2646 VR256, f256mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2647 SSEPackedDouble>, TB, OpSize, VEX_4V, VEX_L; 2648 2649let Constraints = "$src1 = $dst" in { 2650 defm UNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, memopv4f32, 2651 VR128, f128mem, "unpckhps\t{$src2, $dst|$dst, $src2}", 2652 SSEPackedSingle>, TB; 2653 defm UNPCKHPD: sse12_unpack_interleave<0x15, X86Unpckh, v2f64, memopv2f64, 2654 VR128, f128mem, "unpckhpd\t{$src2, $dst|$dst, $src2}", 2655 SSEPackedDouble>, TB, OpSize; 2656 defm UNPCKLPS: sse12_unpack_interleave<0x14, X86Unpckl, v4f32, memopv4f32, 2657 VR128, f128mem, "unpcklps\t{$src2, $dst|$dst, $src2}", 2658 SSEPackedSingle>, TB; 2659 defm UNPCKLPD: sse12_unpack_interleave<0x14, X86Unpckl, v2f64, memopv2f64, 2660 VR128, f128mem, "unpcklpd\t{$src2, $dst|$dst, $src2}", 2661 SSEPackedDouble>, TB, OpSize; 2662} // Constraints = "$src1 = $dst" 2663 2664let Predicates = [HasAVX1Only] in { 2665 def : Pat<(v8i32 (X86Unpckl VR256:$src1, (bc_v8i32 (loadv4i64 addr:$src2)))), 2666 (VUNPCKLPSYrm VR256:$src1, addr:$src2)>; 2667 def : Pat<(v8i32 (X86Unpckl VR256:$src1, VR256:$src2)), 2668 (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>; 2669 def : Pat<(v8i32 (X86Unpckh VR256:$src1, (bc_v8i32 (loadv4i64 addr:$src2)))), 2670 (VUNPCKHPSYrm VR256:$src1, addr:$src2)>; 2671 def : Pat<(v8i32 (X86Unpckh VR256:$src1, VR256:$src2)), 2672 (VUNPCKHPSYrr VR256:$src1, VR256:$src2)>; 2673 2674 def : Pat<(v4i64 (X86Unpckl VR256:$src1, (loadv4i64 addr:$src2))), 2675 (VUNPCKLPDYrm VR256:$src1, addr:$src2)>; 2676 def : Pat<(v4i64 (X86Unpckl VR256:$src1, VR256:$src2)), 2677 (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>; 2678 def : Pat<(v4i64 (X86Unpckh VR256:$src1, (loadv4i64 addr:$src2))), 2679 (VUNPCKHPDYrm VR256:$src1, addr:$src2)>; 2680 def : Pat<(v4i64 (X86Unpckh VR256:$src1, VR256:$src2)), 2681 (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>; 2682} 2683 2684let Predicates = [HasAVX] in { 2685 // FIXME: Instead of X86Movddup, there should be a X86Unpckl here, the 2686 // problem is during lowering, where it's not possible to recognize the load 2687 // fold cause it has two uses through a bitcast. One use disappears at isel 2688 // time and the fold opportunity reappears. 2689 def : Pat<(v2f64 (X86Movddup VR128:$src)), 2690 (VUNPCKLPDrr VR128:$src, VR128:$src)>; 2691} 2692 2693let Predicates = [UseSSE2] in { 2694 // FIXME: Instead of X86Movddup, there should be a X86Unpckl here, the 2695 // problem is during lowering, where it's not possible to recognize the load 2696 // fold cause it has two uses through a bitcast. One use disappears at isel 2697 // time and the fold opportunity reappears. 2698 def : Pat<(v2f64 (X86Movddup VR128:$src)), 2699 (UNPCKLPDrr VR128:$src, VR128:$src)>; 2700} 2701 2702//===----------------------------------------------------------------------===// 2703// SSE 1 & 2 - Extract Floating-Point Sign mask 2704//===----------------------------------------------------------------------===// 2705 2706/// sse12_extr_sign_mask - sse 1 & 2 unpack and interleave 2707multiclass sse12_extr_sign_mask<RegisterClass RC, Intrinsic Int, string asm, 2708 Domain d> { 2709 def rr : PI<0x50, MRMSrcReg, (outs GR32orGR64:$dst), (ins RC:$src), 2710 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 2711 [(set GR32orGR64:$dst, (Int RC:$src))], IIC_SSE_MOVMSK, d>, 2712 Sched<[WriteVecLogic]>; 2713} 2714 2715let Predicates = [HasAVX] in { 2716 defm VMOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps, 2717 "movmskps", SSEPackedSingle>, TB, VEX; 2718 defm VMOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd, 2719 "movmskpd", SSEPackedDouble>, TB, 2720 OpSize, VEX; 2721 defm VMOVMSKPSY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_ps_256, 2722 "movmskps", SSEPackedSingle>, TB, 2723 VEX, VEX_L; 2724 defm VMOVMSKPDY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_pd_256, 2725 "movmskpd", SSEPackedDouble>, TB, 2726 OpSize, VEX, VEX_L; 2727 2728 def : Pat<(i32 (X86fgetsign FR32:$src)), 2729 (VMOVMSKPSrr (COPY_TO_REGCLASS FR32:$src, VR128))>; 2730 def : Pat<(i64 (X86fgetsign FR32:$src)), 2731 (SUBREG_TO_REG (i64 0), 2732 (VMOVMSKPSrr (COPY_TO_REGCLASS FR32:$src, VR128)), sub_32bit)>; 2733 def : Pat<(i32 (X86fgetsign FR64:$src)), 2734 (VMOVMSKPDrr (COPY_TO_REGCLASS FR64:$src, VR128))>; 2735 def : Pat<(i64 (X86fgetsign FR64:$src)), 2736 (SUBREG_TO_REG (i64 0), 2737 (VMOVMSKPDrr (COPY_TO_REGCLASS FR64:$src, VR128)), sub_32bit)>; 2738} 2739 2740defm MOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps, "movmskps", 2741 SSEPackedSingle>, TB; 2742defm MOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd, "movmskpd", 2743 SSEPackedDouble>, TB, OpSize; 2744 2745def : Pat<(i32 (X86fgetsign FR32:$src)), 2746 (MOVMSKPSrr (COPY_TO_REGCLASS FR32:$src, VR128))>, 2747 Requires<[UseSSE1]>; 2748def : Pat<(i64 (X86fgetsign FR32:$src)), 2749 (SUBREG_TO_REG (i64 0), 2750 (MOVMSKPSrr (COPY_TO_REGCLASS FR32:$src, VR128)), sub_32bit)>, 2751 Requires<[UseSSE1]>; 2752def : Pat<(i32 (X86fgetsign FR64:$src)), 2753 (MOVMSKPDrr (COPY_TO_REGCLASS FR64:$src, VR128))>, 2754 Requires<[UseSSE2]>; 2755def : Pat<(i64 (X86fgetsign FR64:$src)), 2756 (SUBREG_TO_REG (i64 0), 2757 (MOVMSKPDrr (COPY_TO_REGCLASS FR64:$src, VR128)), sub_32bit)>, 2758 Requires<[UseSSE2]>; 2759 2760//===---------------------------------------------------------------------===// 2761// SSE2 - Packed Integer Logical Instructions 2762//===---------------------------------------------------------------------===// 2763 2764let ExeDomain = SSEPackedInt in { // SSE integer instructions 2765 2766/// PDI_binop_rm - Simple SSE2 binary operator. 2767multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 2768 ValueType OpVT, RegisterClass RC, PatFrag memop_frag, 2769 X86MemOperand x86memop, OpndItins itins, 2770 bit IsCommutable, bit Is2Addr> { 2771 let isCommutable = IsCommutable in 2772 def rr : PDI<opc, MRMSrcReg, (outs RC:$dst), 2773 (ins RC:$src1, RC:$src2), 2774 !if(Is2Addr, 2775 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 2776 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 2777 [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))], itins.rr>, 2778 Sched<[itins.Sched]>; 2779 def rm : PDI<opc, MRMSrcMem, (outs RC:$dst), 2780 (ins RC:$src1, x86memop:$src2), 2781 !if(Is2Addr, 2782 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 2783 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 2784 [(set RC:$dst, (OpVT (OpNode RC:$src1, 2785 (bitconvert (memop_frag addr:$src2)))))], 2786 itins.rm>, 2787 Sched<[itins.Sched.Folded, ReadAfterLd]>; 2788} 2789} // ExeDomain = SSEPackedInt 2790 2791multiclass PDI_binop_all<bits<8> opc, string OpcodeStr, SDNode Opcode, 2792 ValueType OpVT128, ValueType OpVT256, 2793 OpndItins itins, bit IsCommutable = 0> { 2794let Predicates = [HasAVX] in 2795 defm V#NAME : PDI_binop_rm<opc, !strconcat("v", OpcodeStr), Opcode, OpVT128, 2796 VR128, loadv2i64, i128mem, itins, IsCommutable, 0>, VEX_4V; 2797 2798let Constraints = "$src1 = $dst" in 2799 defm NAME : PDI_binop_rm<opc, OpcodeStr, Opcode, OpVT128, VR128, 2800 memopv2i64, i128mem, itins, IsCommutable, 1>; 2801 2802let Predicates = [HasAVX2] in 2803 defm V#NAME#Y : PDI_binop_rm<opc, !strconcat("v", OpcodeStr), Opcode, 2804 OpVT256, VR256, loadv4i64, i256mem, itins, 2805 IsCommutable, 0>, VEX_4V, VEX_L; 2806} 2807 2808// These are ordered here for pattern ordering requirements with the fp versions 2809 2810defm PAND : PDI_binop_all<0xDB, "pand", and, v2i64, v4i64, SSE_BIT_ITINS_P, 1>; 2811defm POR : PDI_binop_all<0xEB, "por", or, v2i64, v4i64, SSE_BIT_ITINS_P, 1>; 2812defm PXOR : PDI_binop_all<0xEF, "pxor", xor, v2i64, v4i64, SSE_BIT_ITINS_P, 1>; 2813defm PANDN : PDI_binop_all<0xDF, "pandn", X86andnp, v2i64, v4i64, 2814 SSE_BIT_ITINS_P, 0>; 2815 2816//===----------------------------------------------------------------------===// 2817// SSE 1 & 2 - Logical Instructions 2818//===----------------------------------------------------------------------===// 2819 2820/// sse12_fp_alias_pack_logical - SSE 1 & 2 aliased packed FP logical ops 2821/// 2822multiclass sse12_fp_alias_pack_logical<bits<8> opc, string OpcodeStr, 2823 SDNode OpNode, OpndItins itins> { 2824 defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, 2825 FR32, f32, f128mem, memopfsf32, SSEPackedSingle, itins, 0>, 2826 TB, VEX_4V; 2827 2828 defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, 2829 FR64, f64, f128mem, memopfsf64, SSEPackedDouble, itins, 0>, 2830 TB, OpSize, VEX_4V; 2831 2832 let Constraints = "$src1 = $dst" in { 2833 defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, FR32, 2834 f32, f128mem, memopfsf32, SSEPackedSingle, itins>, 2835 TB; 2836 2837 defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, FR64, 2838 f64, f128mem, memopfsf64, SSEPackedDouble, itins>, 2839 TB, OpSize; 2840 } 2841} 2842 2843// Alias bitwise logical operations using SSE logical ops on packed FP values. 2844let isCodeGenOnly = 1 in { 2845 defm FsAND : sse12_fp_alias_pack_logical<0x54, "and", X86fand, 2846 SSE_BIT_ITINS_P>; 2847 defm FsOR : sse12_fp_alias_pack_logical<0x56, "or", X86for, 2848 SSE_BIT_ITINS_P>; 2849 defm FsXOR : sse12_fp_alias_pack_logical<0x57, "xor", X86fxor, 2850 SSE_BIT_ITINS_P>; 2851 2852 let isCommutable = 0 in 2853 defm FsANDN : sse12_fp_alias_pack_logical<0x55, "andn", X86fandn, 2854 SSE_BIT_ITINS_P>; 2855} 2856 2857/// sse12_fp_packed_logical - SSE 1 & 2 packed FP logical ops 2858/// 2859multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr, 2860 SDNode OpNode> { 2861 defm V#NAME#PSY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedSingle, 2862 !strconcat(OpcodeStr, "ps"), f256mem, 2863 [(set VR256:$dst, (v4i64 (OpNode VR256:$src1, VR256:$src2)))], 2864 [(set VR256:$dst, (OpNode (bc_v4i64 (v8f32 VR256:$src1)), 2865 (loadv4i64 addr:$src2)))], 0>, TB, VEX_4V, VEX_L; 2866 2867 defm V#NAME#PDY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedDouble, 2868 !strconcat(OpcodeStr, "pd"), f256mem, 2869 [(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)), 2870 (bc_v4i64 (v4f64 VR256:$src2))))], 2871 [(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)), 2872 (loadv4i64 addr:$src2)))], 0>, 2873 TB, OpSize, VEX_4V, VEX_L; 2874 2875 // In AVX no need to add a pattern for 128-bit logical rr ps, because they 2876 // are all promoted to v2i64, and the patterns are covered by the int 2877 // version. This is needed in SSE only, because v2i64 isn't supported on 2878 // SSE1, but only on SSE2. 2879 defm V#NAME#PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle, 2880 !strconcat(OpcodeStr, "ps"), f128mem, [], 2881 [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)), 2882 (loadv2i64 addr:$src2)))], 0>, TB, VEX_4V; 2883 2884 defm V#NAME#PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble, 2885 !strconcat(OpcodeStr, "pd"), f128mem, 2886 [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), 2887 (bc_v2i64 (v2f64 VR128:$src2))))], 2888 [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), 2889 (loadv2i64 addr:$src2)))], 0>, 2890 TB, OpSize, VEX_4V; 2891 2892 let Constraints = "$src1 = $dst" in { 2893 defm PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle, 2894 !strconcat(OpcodeStr, "ps"), f128mem, 2895 [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, VR128:$src2)))], 2896 [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)), 2897 (memopv2i64 addr:$src2)))]>, TB; 2898 2899 defm PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble, 2900 !strconcat(OpcodeStr, "pd"), f128mem, 2901 [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), 2902 (bc_v2i64 (v2f64 VR128:$src2))))], 2903 [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), 2904 (memopv2i64 addr:$src2)))]>, TB, OpSize; 2905 } 2906} 2907 2908defm AND : sse12_fp_packed_logical<0x54, "and", and>; 2909defm OR : sse12_fp_packed_logical<0x56, "or", or>; 2910defm XOR : sse12_fp_packed_logical<0x57, "xor", xor>; 2911let isCommutable = 0 in 2912 defm ANDN : sse12_fp_packed_logical<0x55, "andn", X86andnp>; 2913 2914//===----------------------------------------------------------------------===// 2915// SSE 1 & 2 - Arithmetic Instructions 2916//===----------------------------------------------------------------------===// 2917 2918/// basic_sse12_fp_binop_xxx - SSE 1 & 2 binops come in both scalar and 2919/// vector forms. 2920/// 2921/// In addition, we also have a special variant of the scalar form here to 2922/// represent the associated intrinsic operation. This form is unlike the 2923/// plain scalar form, in that it takes an entire vector (instead of a scalar) 2924/// and leaves the top elements unmodified (therefore these cannot be commuted). 2925/// 2926/// These three forms can each be reg+reg or reg+mem. 2927/// 2928 2929/// FIXME: once all 256-bit intrinsics are matched, cleanup and refactor those 2930/// classes below 2931multiclass basic_sse12_fp_binop_p<bits<8> opc, string OpcodeStr, 2932 SDNode OpNode, SizeItins itins> { 2933 defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, 2934 VR128, v4f32, f128mem, loadv4f32, 2935 SSEPackedSingle, itins.s, 0>, TB, VEX_4V; 2936 defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, 2937 VR128, v2f64, f128mem, loadv2f64, 2938 SSEPackedDouble, itins.d, 0>, TB, OpSize, VEX_4V; 2939 2940 defm V#NAME#PSY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), 2941 OpNode, VR256, v8f32, f256mem, loadv8f32, 2942 SSEPackedSingle, itins.s, 0>, TB, VEX_4V, VEX_L; 2943 defm V#NAME#PDY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), 2944 OpNode, VR256, v4f64, f256mem, loadv4f64, 2945 SSEPackedDouble, itins.d, 0>, TB, OpSize, VEX_4V, VEX_L; 2946 2947 let Constraints = "$src1 = $dst" in { 2948 defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR128, 2949 v4f32, f128mem, memopv4f32, SSEPackedSingle, 2950 itins.s>, TB; 2951 defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR128, 2952 v2f64, f128mem, memopv2f64, SSEPackedDouble, 2953 itins.d>, TB, OpSize; 2954 } 2955} 2956 2957multiclass basic_sse12_fp_binop_s<bits<8> opc, string OpcodeStr, SDNode OpNode, 2958 SizeItins itins> { 2959 defm V#NAME#SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"), 2960 OpNode, FR32, f32mem, itins.s, 0>, XS, VEX_4V, VEX_LIG; 2961 defm V#NAME#SD : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"), 2962 OpNode, FR64, f64mem, itins.d, 0>, XD, VEX_4V, VEX_LIG; 2963 2964 let Constraints = "$src1 = $dst" in { 2965 defm SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"), 2966 OpNode, FR32, f32mem, itins.s>, XS; 2967 defm SD : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"), 2968 OpNode, FR64, f64mem, itins.d>, XD; 2969 } 2970} 2971 2972multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr, 2973 SizeItins itins> { 2974 defm V#NAME#SS : sse12_fp_scalar_int<opc, OpcodeStr, VR128, 2975 !strconcat(OpcodeStr, "ss"), "", "_ss", ssmem, sse_load_f32, 2976 itins.s, 0>, XS, VEX_4V, VEX_LIG; 2977 defm V#NAME#SD : sse12_fp_scalar_int<opc, OpcodeStr, VR128, 2978 !strconcat(OpcodeStr, "sd"), "2", "_sd", sdmem, sse_load_f64, 2979 itins.d, 0>, XD, VEX_4V, VEX_LIG; 2980 2981 let Constraints = "$src1 = $dst" in { 2982 defm SS : sse12_fp_scalar_int<opc, OpcodeStr, VR128, 2983 !strconcat(OpcodeStr, "ss"), "", "_ss", ssmem, sse_load_f32, 2984 itins.s>, XS; 2985 defm SD : sse12_fp_scalar_int<opc, OpcodeStr, VR128, 2986 !strconcat(OpcodeStr, "sd"), "2", "_sd", sdmem, sse_load_f64, 2987 itins.d>, XD; 2988 } 2989} 2990 2991// Binary Arithmetic instructions 2992defm ADD : basic_sse12_fp_binop_p<0x58, "add", fadd, SSE_ALU_ITINS_P>, 2993 basic_sse12_fp_binop_s<0x58, "add", fadd, SSE_ALU_ITINS_S>, 2994 basic_sse12_fp_binop_s_int<0x58, "add", SSE_ALU_ITINS_S>; 2995defm MUL : basic_sse12_fp_binop_p<0x59, "mul", fmul, SSE_MUL_ITINS_P>, 2996 basic_sse12_fp_binop_s<0x59, "mul", fmul, SSE_MUL_ITINS_S>, 2997 basic_sse12_fp_binop_s_int<0x59, "mul", SSE_MUL_ITINS_S>; 2998let isCommutable = 0 in { 2999 defm SUB : basic_sse12_fp_binop_p<0x5C, "sub", fsub, SSE_ALU_ITINS_P>, 3000 basic_sse12_fp_binop_s<0x5C, "sub", fsub, SSE_ALU_ITINS_S>, 3001 basic_sse12_fp_binop_s_int<0x5C, "sub", SSE_ALU_ITINS_S>; 3002 defm DIV : basic_sse12_fp_binop_p<0x5E, "div", fdiv, SSE_DIV_ITINS_P>, 3003 basic_sse12_fp_binop_s<0x5E, "div", fdiv, SSE_DIV_ITINS_S>, 3004 basic_sse12_fp_binop_s_int<0x5E, "div", SSE_DIV_ITINS_S>; 3005 defm MAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, SSE_ALU_ITINS_P>, 3006 basic_sse12_fp_binop_s<0x5F, "max", X86fmax, SSE_ALU_ITINS_S>, 3007 basic_sse12_fp_binop_s_int<0x5F, "max", SSE_ALU_ITINS_S>; 3008 defm MIN : basic_sse12_fp_binop_p<0x5D, "min", X86fmin, SSE_ALU_ITINS_P>, 3009 basic_sse12_fp_binop_s<0x5D, "min", X86fmin, SSE_ALU_ITINS_S>, 3010 basic_sse12_fp_binop_s_int<0x5D, "min", SSE_ALU_ITINS_S>; 3011} 3012 3013let isCodeGenOnly = 1 in { 3014 defm MAXC: basic_sse12_fp_binop_p<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_P>, 3015 basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_S>; 3016 defm MINC: basic_sse12_fp_binop_p<0x5D, "min", X86fminc, SSE_ALU_ITINS_P>, 3017 basic_sse12_fp_binop_s<0x5D, "min", X86fminc, SSE_ALU_ITINS_S>; 3018} 3019 3020/// Unop Arithmetic 3021/// In addition, we also have a special variant of the scalar form here to 3022/// represent the associated intrinsic operation. This form is unlike the 3023/// plain scalar form, in that it takes an entire vector (instead of a 3024/// scalar) and leaves the top elements undefined. 3025/// 3026/// And, we have a special variant form for a full-vector intrinsic form. 3027 3028let Sched = WriteFSqrt in { 3029def SSE_SQRTPS : OpndItins< 3030 IIC_SSE_SQRTPS_RR, IIC_SSE_SQRTPS_RM 3031>; 3032 3033def SSE_SQRTSS : OpndItins< 3034 IIC_SSE_SQRTSS_RR, IIC_SSE_SQRTSS_RM 3035>; 3036 3037def SSE_SQRTPD : OpndItins< 3038 IIC_SSE_SQRTPD_RR, IIC_SSE_SQRTPD_RM 3039>; 3040 3041def SSE_SQRTSD : OpndItins< 3042 IIC_SSE_SQRTSD_RR, IIC_SSE_SQRTSD_RM 3043>; 3044} 3045 3046let Sched = WriteFRcp in { 3047def SSE_RCPP : OpndItins< 3048 IIC_SSE_RCPP_RR, IIC_SSE_RCPP_RM 3049>; 3050 3051def SSE_RCPS : OpndItins< 3052 IIC_SSE_RCPS_RR, IIC_SSE_RCPS_RM 3053>; 3054} 3055 3056/// sse1_fp_unop_s - SSE1 unops in scalar form. 3057multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr, 3058 SDNode OpNode, Intrinsic F32Int, OpndItins itins> { 3059let Predicates = [HasAVX], hasSideEffects = 0 in { 3060 def V#NAME#SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), 3061 (ins FR32:$src1, FR32:$src2), 3062 !strconcat("v", OpcodeStr, 3063 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 3064 []>, VEX_4V, VEX_LIG, Sched<[itins.Sched]>; 3065 let mayLoad = 1 in { 3066 def V#NAME#SSm : SSI<opc, MRMSrcMem, (outs FR32:$dst), 3067 (ins FR32:$src1,f32mem:$src2), 3068 !strconcat("v", OpcodeStr, 3069 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 3070 []>, VEX_4V, VEX_LIG, 3071 Sched<[itins.Sched.Folded, ReadAfterLd]>; 3072 def V#NAME#SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), 3073 (ins VR128:$src1, ssmem:$src2), 3074 !strconcat("v", OpcodeStr, 3075 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 3076 []>, VEX_4V, VEX_LIG, 3077 Sched<[itins.Sched.Folded, ReadAfterLd]>; 3078 } 3079} 3080 3081 def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src), 3082 !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), 3083 [(set FR32:$dst, (OpNode FR32:$src))]>, Sched<[itins.Sched]>; 3084 // For scalar unary operations, fold a load into the operation 3085 // only in OptForSize mode. It eliminates an instruction, but it also 3086 // eliminates a whole-register clobber (the load), so it introduces a 3087 // partial register update condition. 3088 def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src), 3089 !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), 3090 [(set FR32:$dst, (OpNode (load addr:$src)))], itins.rm>, XS, 3091 Requires<[UseSSE1, OptForSize]>, Sched<[itins.Sched.Folded]>; 3092 def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 3093 !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), 3094 [(set VR128:$dst, (F32Int VR128:$src))], itins.rr>, 3095 Sched<[itins.Sched]>; 3096 def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins ssmem:$src), 3097 !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), 3098 [(set VR128:$dst, (F32Int sse_load_f32:$src))], itins.rm>, 3099 Sched<[itins.Sched.Folded]>; 3100} 3101 3102/// sse1_fp_unop_s_rw - SSE1 unops where vector form has a read-write operand. 3103multiclass sse1_fp_unop_rw<bits<8> opc, string OpcodeStr, SDNode OpNode, 3104 OpndItins itins> { 3105let Predicates = [HasAVX], hasSideEffects = 0 in { 3106 def V#NAME#SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), 3107 (ins FR32:$src1, FR32:$src2), 3108 !strconcat("v", OpcodeStr, 3109 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 3110 []>, VEX_4V, VEX_LIG, Sched<[itins.Sched]>; 3111 let mayLoad = 1 in { 3112 def V#NAME#SSm : SSI<opc, MRMSrcMem, (outs FR32:$dst), 3113 (ins FR32:$src1,f32mem:$src2), 3114 !strconcat("v", OpcodeStr, 3115 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 3116 []>, VEX_4V, VEX_LIG, 3117 Sched<[itins.Sched.Folded, ReadAfterLd]>; 3118 def V#NAME#SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), 3119 (ins VR128:$src1, ssmem:$src2), 3120 !strconcat("v", OpcodeStr, 3121 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 3122 []>, VEX_4V, VEX_LIG, 3123 Sched<[itins.Sched.Folded, ReadAfterLd]>; 3124 } 3125} 3126 3127 def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src), 3128 !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), 3129 [(set FR32:$dst, (OpNode FR32:$src))]>, Sched<[itins.Sched]>; 3130 // For scalar unary operations, fold a load into the operation 3131 // only in OptForSize mode. It eliminates an instruction, but it also 3132 // eliminates a whole-register clobber (the load), so it introduces a 3133 // partial register update condition. 3134 def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src), 3135 !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), 3136 [(set FR32:$dst, (OpNode (load addr:$src)))], itins.rm>, XS, 3137 Requires<[UseSSE1, OptForSize]>, Sched<[itins.Sched.Folded]>; 3138 let Constraints = "$src1 = $dst" in { 3139 def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), 3140 (ins VR128:$src1, VR128:$src2), 3141 !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"), 3142 [], itins.rr>, Sched<[itins.Sched]>; 3143 let mayLoad = 1, hasSideEffects = 0 in 3144 def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), 3145 (ins VR128:$src1, ssmem:$src2), 3146 !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"), 3147 [], itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>; 3148 } 3149} 3150 3151/// sse1_fp_unop_p - SSE1 unops in packed form. 3152multiclass sse1_fp_unop_p<bits<8> opc, string OpcodeStr, SDNode OpNode, 3153 OpndItins itins> { 3154let Predicates = [HasAVX] in { 3155 def V#NAME#PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 3156 !strconcat("v", OpcodeStr, 3157 "ps\t{$src, $dst|$dst, $src}"), 3158 [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))], 3159 itins.rr>, VEX, Sched<[itins.Sched]>; 3160 def V#NAME#PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 3161 !strconcat("v", OpcodeStr, 3162 "ps\t{$src, $dst|$dst, $src}"), 3163 [(set VR128:$dst, (OpNode (loadv4f32 addr:$src)))], 3164 itins.rm>, VEX, Sched<[itins.Sched.Folded]>; 3165 def V#NAME#PSYr : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 3166 !strconcat("v", OpcodeStr, 3167 "ps\t{$src, $dst|$dst, $src}"), 3168 [(set VR256:$dst, (v8f32 (OpNode VR256:$src)))], 3169 itins.rr>, VEX, VEX_L, Sched<[itins.Sched]>; 3170 def V#NAME#PSYm : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), 3171 !strconcat("v", OpcodeStr, 3172 "ps\t{$src, $dst|$dst, $src}"), 3173 [(set VR256:$dst, (OpNode (loadv8f32 addr:$src)))], 3174 itins.rm>, VEX, VEX_L, Sched<[itins.Sched.Folded]>; 3175} 3176 3177 def PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 3178 !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), 3179 [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))], itins.rr>, 3180 Sched<[itins.Sched]>; 3181 def PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 3182 !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), 3183 [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))], itins.rm>, 3184 Sched<[itins.Sched.Folded]>; 3185} 3186 3187/// sse1_fp_unop_p_int - SSE1 intrinsics unops in packed forms. 3188multiclass sse1_fp_unop_p_int<bits<8> opc, string OpcodeStr, 3189 Intrinsic V4F32Int, Intrinsic V8F32Int, 3190 OpndItins itins> { 3191let Predicates = [HasAVX] in { 3192 def V#NAME#PSr_Int : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 3193 !strconcat("v", OpcodeStr, 3194 "ps\t{$src, $dst|$dst, $src}"), 3195 [(set VR128:$dst, (V4F32Int VR128:$src))], 3196 itins.rr>, VEX, Sched<[itins.Sched]>; 3197 def V#NAME#PSm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 3198 !strconcat("v", OpcodeStr, 3199 "ps\t{$src, $dst|$dst, $src}"), 3200 [(set VR128:$dst, (V4F32Int (loadv4f32 addr:$src)))], 3201 itins.rm>, VEX, Sched<[itins.Sched.Folded]>; 3202 def V#NAME#PSYr_Int : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 3203 !strconcat("v", OpcodeStr, 3204 "ps\t{$src, $dst|$dst, $src}"), 3205 [(set VR256:$dst, (V8F32Int VR256:$src))], 3206 itins.rr>, VEX, VEX_L, Sched<[itins.Sched]>; 3207 def V#NAME#PSYm_Int : PSI<opc, MRMSrcMem, (outs VR256:$dst), 3208 (ins f256mem:$src), 3209 !strconcat("v", OpcodeStr, 3210 "ps\t{$src, $dst|$dst, $src}"), 3211 [(set VR256:$dst, (V8F32Int (loadv8f32 addr:$src)))], 3212 itins.rm>, VEX, VEX_L, Sched<[itins.Sched.Folded]>; 3213} 3214 3215 def PSr_Int : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 3216 !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), 3217 [(set VR128:$dst, (V4F32Int VR128:$src))], 3218 itins.rr>, Sched<[itins.Sched]>; 3219 def PSm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 3220 !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), 3221 [(set VR128:$dst, (V4F32Int (memopv4f32 addr:$src)))], 3222 itins.rm>, Sched<[itins.Sched.Folded]>; 3223} 3224 3225/// sse2_fp_unop_s - SSE2 unops in scalar form. 3226multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, 3227 SDNode OpNode, Intrinsic F64Int, OpndItins itins> { 3228let Predicates = [HasAVX], hasSideEffects = 0 in { 3229 def V#NAME#SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), 3230 (ins FR64:$src1, FR64:$src2), 3231 !strconcat("v", OpcodeStr, 3232 "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 3233 []>, VEX_4V, VEX_LIG, Sched<[itins.Sched]>; 3234 let mayLoad = 1 in { 3235 def V#NAME#SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst), 3236 (ins FR64:$src1,f64mem:$src2), 3237 !strconcat("v", OpcodeStr, 3238 "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 3239 []>, VEX_4V, VEX_LIG, 3240 Sched<[itins.Sched.Folded, ReadAfterLd]>; 3241 def V#NAME#SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), 3242 (ins VR128:$src1, sdmem:$src2), 3243 !strconcat("v", OpcodeStr, 3244 "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 3245 []>, VEX_4V, VEX_LIG, 3246 Sched<[itins.Sched.Folded, ReadAfterLd]>; 3247 } 3248} 3249 3250 def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src), 3251 !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"), 3252 [(set FR64:$dst, (OpNode FR64:$src))], itins.rr>, 3253 Sched<[itins.Sched]>; 3254 // See the comments in sse1_fp_unop_s for why this is OptForSize. 3255 def SDm : I<opc, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src), 3256 !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"), 3257 [(set FR64:$dst, (OpNode (load addr:$src)))], itins.rm>, XD, 3258 Requires<[UseSSE2, OptForSize]>, Sched<[itins.Sched.Folded]>; 3259 def SDr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 3260 !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"), 3261 [(set VR128:$dst, (F64Int VR128:$src))], itins.rr>, 3262 Sched<[itins.Sched]>; 3263 def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins sdmem:$src), 3264 !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"), 3265 [(set VR128:$dst, (F64Int sse_load_f64:$src))], itins.rm>, 3266 Sched<[itins.Sched.Folded]>; 3267} 3268 3269/// sse2_fp_unop_p - SSE2 unops in vector forms. 3270multiclass sse2_fp_unop_p<bits<8> opc, string OpcodeStr, 3271 SDNode OpNode, OpndItins itins> { 3272let Predicates = [HasAVX] in { 3273 def V#NAME#PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 3274 !strconcat("v", OpcodeStr, 3275 "pd\t{$src, $dst|$dst, $src}"), 3276 [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))], 3277 itins.rr>, VEX, Sched<[itins.Sched]>; 3278 def V#NAME#PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 3279 !strconcat("v", OpcodeStr, 3280 "pd\t{$src, $dst|$dst, $src}"), 3281 [(set VR128:$dst, (OpNode (loadv2f64 addr:$src)))], 3282 itins.rm>, VEX, Sched<[itins.Sched.Folded]>; 3283 def V#NAME#PDYr : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 3284 !strconcat("v", OpcodeStr, 3285 "pd\t{$src, $dst|$dst, $src}"), 3286 [(set VR256:$dst, (v4f64 (OpNode VR256:$src)))], 3287 itins.rr>, VEX, VEX_L, Sched<[itins.Sched]>; 3288 def V#NAME#PDYm : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), 3289 !strconcat("v", OpcodeStr, 3290 "pd\t{$src, $dst|$dst, $src}"), 3291 [(set VR256:$dst, (OpNode (loadv4f64 addr:$src)))], 3292 itins.rm>, VEX, VEX_L, Sched<[itins.Sched.Folded]>; 3293} 3294 3295 def PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 3296 !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), 3297 [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))], itins.rr>, 3298 Sched<[itins.Sched]>; 3299 def PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 3300 !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), 3301 [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))], itins.rm>, 3302 Sched<[itins.Sched.Folded]>; 3303} 3304 3305// Square root. 3306defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse_sqrt_ss, 3307 SSE_SQRTSS>, 3308 sse1_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTPS>, 3309 sse2_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse2_sqrt_sd, 3310 SSE_SQRTSD>, 3311 sse2_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTPD>; 3312 3313// Reciprocal approximations. Note that these typically require refinement 3314// in order to obtain suitable precision. 3315defm RSQRT : sse1_fp_unop_rw<0x52, "rsqrt", X86frsqrt, SSE_SQRTSS>, 3316 sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SSE_SQRTPS>, 3317 sse1_fp_unop_p_int<0x52, "rsqrt", int_x86_sse_rsqrt_ps, 3318 int_x86_avx_rsqrt_ps_256, SSE_SQRTPS>; 3319defm RCP : sse1_fp_unop_rw<0x53, "rcp", X86frcp, SSE_RCPS>, 3320 sse1_fp_unop_p<0x53, "rcp", X86frcp, SSE_RCPP>, 3321 sse1_fp_unop_p_int<0x53, "rcp", int_x86_sse_rcp_ps, 3322 int_x86_avx_rcp_ps_256, SSE_RCPP>; 3323 3324let Predicates = [UseAVX] in { 3325 def : Pat<(f32 (fsqrt FR32:$src)), 3326 (VSQRTSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>; 3327 def : Pat<(f32 (fsqrt (load addr:$src))), 3328 (VSQRTSSm (f32 (IMPLICIT_DEF)), addr:$src)>, 3329 Requires<[HasAVX, OptForSize]>; 3330 def : Pat<(f64 (fsqrt FR64:$src)), 3331 (VSQRTSDr (f64 (IMPLICIT_DEF)), FR64:$src)>, Requires<[HasAVX]>; 3332 def : Pat<(f64 (fsqrt (load addr:$src))), 3333 (VSQRTSDm (f64 (IMPLICIT_DEF)), addr:$src)>, 3334 Requires<[HasAVX, OptForSize]>; 3335 3336 def : Pat<(f32 (X86frsqrt FR32:$src)), 3337 (VRSQRTSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>; 3338 def : Pat<(f32 (X86frsqrt (load addr:$src))), 3339 (VRSQRTSSm (f32 (IMPLICIT_DEF)), addr:$src)>, 3340 Requires<[HasAVX, OptForSize]>; 3341 3342 def : Pat<(f32 (X86frcp FR32:$src)), 3343 (VRCPSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>; 3344 def : Pat<(f32 (X86frcp (load addr:$src))), 3345 (VRCPSSm (f32 (IMPLICIT_DEF)), addr:$src)>, 3346 Requires<[HasAVX, OptForSize]>; 3347} 3348let Predicates = [UseAVX] in { 3349 def : Pat<(int_x86_sse_sqrt_ss VR128:$src), 3350 (COPY_TO_REGCLASS (VSQRTSSr (f32 (IMPLICIT_DEF)), 3351 (COPY_TO_REGCLASS VR128:$src, FR32)), 3352 VR128)>; 3353 def : Pat<(int_x86_sse_sqrt_ss sse_load_f32:$src), 3354 (VSQRTSSm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>; 3355 3356 def : Pat<(int_x86_sse2_sqrt_sd VR128:$src), 3357 (COPY_TO_REGCLASS (VSQRTSDr (f64 (IMPLICIT_DEF)), 3358 (COPY_TO_REGCLASS VR128:$src, FR64)), 3359 VR128)>; 3360 def : Pat<(int_x86_sse2_sqrt_sd sse_load_f64:$src), 3361 (VSQRTSDm_Int (v2f64 (IMPLICIT_DEF)), sse_load_f64:$src)>; 3362} 3363 3364let Predicates = [HasAVX] in { 3365 def : Pat<(int_x86_sse_rsqrt_ss VR128:$src), 3366 (COPY_TO_REGCLASS (VRSQRTSSr (f32 (IMPLICIT_DEF)), 3367 (COPY_TO_REGCLASS VR128:$src, FR32)), 3368 VR128)>; 3369 def : Pat<(int_x86_sse_rsqrt_ss sse_load_f32:$src), 3370 (VRSQRTSSm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>; 3371 3372 def : Pat<(int_x86_sse_rcp_ss VR128:$src), 3373 (COPY_TO_REGCLASS (VRCPSSr (f32 (IMPLICIT_DEF)), 3374 (COPY_TO_REGCLASS VR128:$src, FR32)), 3375 VR128)>; 3376 def : Pat<(int_x86_sse_rcp_ss sse_load_f32:$src), 3377 (VRCPSSm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>; 3378} 3379 3380// Reciprocal approximations. Note that these typically require refinement 3381// in order to obtain suitable precision. 3382let Predicates = [UseSSE1] in { 3383 def : Pat<(int_x86_sse_rsqrt_ss VR128:$src), 3384 (RSQRTSSr_Int VR128:$src, VR128:$src)>; 3385 def : Pat<(int_x86_sse_rcp_ss VR128:$src), 3386 (RCPSSr_Int VR128:$src, VR128:$src)>; 3387} 3388 3389// There is no f64 version of the reciprocal approximation instructions. 3390 3391//===----------------------------------------------------------------------===// 3392// SSE 1 & 2 - Non-temporal stores 3393//===----------------------------------------------------------------------===// 3394 3395let AddedComplexity = 400 in { // Prefer non-temporal versions 3396let SchedRW = [WriteStore] in { 3397def VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs), 3398 (ins f128mem:$dst, VR128:$src), 3399 "movntps\t{$src, $dst|$dst, $src}", 3400 [(alignednontemporalstore (v4f32 VR128:$src), 3401 addr:$dst)], 3402 IIC_SSE_MOVNT>, VEX; 3403def VMOVNTPDmr : VPDI<0x2B, MRMDestMem, (outs), 3404 (ins f128mem:$dst, VR128:$src), 3405 "movntpd\t{$src, $dst|$dst, $src}", 3406 [(alignednontemporalstore (v2f64 VR128:$src), 3407 addr:$dst)], 3408 IIC_SSE_MOVNT>, VEX; 3409 3410let ExeDomain = SSEPackedInt in 3411def VMOVNTDQmr : VPDI<0xE7, MRMDestMem, (outs), 3412 (ins f128mem:$dst, VR128:$src), 3413 "movntdq\t{$src, $dst|$dst, $src}", 3414 [(alignednontemporalstore (v2i64 VR128:$src), 3415 addr:$dst)], 3416 IIC_SSE_MOVNT>, VEX; 3417 3418def VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs), 3419 (ins f256mem:$dst, VR256:$src), 3420 "movntps\t{$src, $dst|$dst, $src}", 3421 [(alignednontemporalstore (v8f32 VR256:$src), 3422 addr:$dst)], 3423 IIC_SSE_MOVNT>, VEX, VEX_L; 3424def VMOVNTPDYmr : VPDI<0x2B, MRMDestMem, (outs), 3425 (ins f256mem:$dst, VR256:$src), 3426 "movntpd\t{$src, $dst|$dst, $src}", 3427 [(alignednontemporalstore (v4f64 VR256:$src), 3428 addr:$dst)], 3429 IIC_SSE_MOVNT>, VEX, VEX_L; 3430let ExeDomain = SSEPackedInt in 3431def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs), 3432 (ins f256mem:$dst, VR256:$src), 3433 "movntdq\t{$src, $dst|$dst, $src}", 3434 [(alignednontemporalstore (v4i64 VR256:$src), 3435 addr:$dst)], 3436 IIC_SSE_MOVNT>, VEX, VEX_L; 3437 3438def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 3439 "movntps\t{$src, $dst|$dst, $src}", 3440 [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)], 3441 IIC_SSE_MOVNT>; 3442def MOVNTPDmr : PDI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 3443 "movntpd\t{$src, $dst|$dst, $src}", 3444 [(alignednontemporalstore(v2f64 VR128:$src), addr:$dst)], 3445 IIC_SSE_MOVNT>; 3446 3447let ExeDomain = SSEPackedInt in 3448def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 3449 "movntdq\t{$src, $dst|$dst, $src}", 3450 [(alignednontemporalstore (v2i64 VR128:$src), addr:$dst)], 3451 IIC_SSE_MOVNT>; 3452 3453// There is no AVX form for instructions below this point 3454def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), 3455 "movnti{l}\t{$src, $dst|$dst, $src}", 3456 [(nontemporalstore (i32 GR32:$src), addr:$dst)], 3457 IIC_SSE_MOVNT>, 3458 TB, Requires<[HasSSE2]>; 3459def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), 3460 "movnti{q}\t{$src, $dst|$dst, $src}", 3461 [(nontemporalstore (i64 GR64:$src), addr:$dst)], 3462 IIC_SSE_MOVNT>, 3463 TB, Requires<[HasSSE2]>; 3464} // SchedRW = [WriteStore] 3465 3466def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst), 3467 (VMOVNTDQmr addr:$dst, VR128:$src)>, Requires<[HasAVX]>; 3468 3469def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst), 3470 (MOVNTDQmr addr:$dst, VR128:$src)>, Requires<[UseSSE2]>; 3471} // AddedComplexity 3472 3473//===----------------------------------------------------------------------===// 3474// SSE 1 & 2 - Prefetch and memory fence 3475//===----------------------------------------------------------------------===// 3476 3477// Prefetch intrinsic. 3478let Predicates = [HasSSE1], SchedRW = [WriteLoad] in { 3479def PREFETCHT0 : I<0x18, MRM1m, (outs), (ins i8mem:$src), 3480 "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3), (i32 1))], 3481 IIC_SSE_PREFETCH>, TB; 3482def PREFETCHT1 : I<0x18, MRM2m, (outs), (ins i8mem:$src), 3483 "prefetcht1\t$src", [(prefetch addr:$src, imm, (i32 2), (i32 1))], 3484 IIC_SSE_PREFETCH>, TB; 3485def PREFETCHT2 : I<0x18, MRM3m, (outs), (ins i8mem:$src), 3486 "prefetcht2\t$src", [(prefetch addr:$src, imm, (i32 1), (i32 1))], 3487 IIC_SSE_PREFETCH>, TB; 3488def PREFETCHNTA : I<0x18, MRM0m, (outs), (ins i8mem:$src), 3489 "prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0), (i32 1))], 3490 IIC_SSE_PREFETCH>, TB; 3491} 3492 3493// FIXME: How should these memory instructions be modeled? 3494let SchedRW = [WriteLoad] in { 3495// Flush cache 3496def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src), 3497 "clflush\t$src", [(int_x86_sse2_clflush addr:$src)], 3498 IIC_SSE_PREFETCH>, TB, Requires<[HasSSE2]>; 3499 3500// Pause. This "instruction" is encoded as "rep; nop", so even though it 3501// was introduced with SSE2, it's backward compatible. 3502def PAUSE : I<0x90, RawFrm, (outs), (ins), "pause", [], IIC_SSE_PAUSE>, REP; 3503 3504// Load, store, and memory fence 3505def SFENCE : I<0xAE, MRM_F8, (outs), (ins), 3506 "sfence", [(int_x86_sse_sfence)], IIC_SSE_SFENCE>, 3507 TB, Requires<[HasSSE1]>; 3508def LFENCE : I<0xAE, MRM_E8, (outs), (ins), 3509 "lfence", [(int_x86_sse2_lfence)], IIC_SSE_LFENCE>, 3510 TB, Requires<[HasSSE2]>; 3511def MFENCE : I<0xAE, MRM_F0, (outs), (ins), 3512 "mfence", [(int_x86_sse2_mfence)], IIC_SSE_MFENCE>, 3513 TB, Requires<[HasSSE2]>; 3514} // SchedRW 3515 3516def : Pat<(X86SFence), (SFENCE)>; 3517def : Pat<(X86LFence), (LFENCE)>; 3518def : Pat<(X86MFence), (MFENCE)>; 3519 3520//===----------------------------------------------------------------------===// 3521// SSE 1 & 2 - Load/Store XCSR register 3522//===----------------------------------------------------------------------===// 3523 3524def VLDMXCSR : VPSI<0xAE, MRM2m, (outs), (ins i32mem:$src), 3525 "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)], 3526 IIC_SSE_LDMXCSR>, VEX, Sched<[WriteLoad]>; 3527def VSTMXCSR : VPSI<0xAE, MRM3m, (outs), (ins i32mem:$dst), 3528 "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)], 3529 IIC_SSE_STMXCSR>, VEX, Sched<[WriteStore]>; 3530 3531def LDMXCSR : PSI<0xAE, MRM2m, (outs), (ins i32mem:$src), 3532 "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)], 3533 IIC_SSE_LDMXCSR>, Sched<[WriteLoad]>; 3534def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst), 3535 "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)], 3536 IIC_SSE_STMXCSR>, Sched<[WriteStore]>; 3537 3538//===---------------------------------------------------------------------===// 3539// SSE2 - Move Aligned/Unaligned Packed Integer Instructions 3540//===---------------------------------------------------------------------===// 3541 3542let ExeDomain = SSEPackedInt in { // SSE integer instructions 3543 3544let neverHasSideEffects = 1, SchedRW = [WriteMove] in { 3545def VMOVDQArr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 3546 "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>, 3547 VEX; 3548def VMOVDQAYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 3549 "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>, 3550 VEX, VEX_L; 3551def VMOVDQUrr : VSSI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 3552 "movdqu\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVU_P_RR>, 3553 VEX; 3554def VMOVDQUYrr : VSSI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 3555 "movdqu\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVU_P_RR>, 3556 VEX, VEX_L; 3557} 3558 3559// For Disassembler 3560let isCodeGenOnly = 1, hasSideEffects = 0, SchedRW = [WriteMove] in { 3561def VMOVDQArr_REV : VPDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 3562 "movdqa\t{$src, $dst|$dst, $src}", [], 3563 IIC_SSE_MOVA_P_RR>, 3564 VEX; 3565def VMOVDQAYrr_REV : VPDI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src), 3566 "movdqa\t{$src, $dst|$dst, $src}", [], 3567 IIC_SSE_MOVA_P_RR>, VEX, VEX_L; 3568def VMOVDQUrr_REV : VSSI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 3569 "movdqu\t{$src, $dst|$dst, $src}", [], 3570 IIC_SSE_MOVU_P_RR>, 3571 VEX; 3572def VMOVDQUYrr_REV : VSSI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src), 3573 "movdqu\t{$src, $dst|$dst, $src}", [], 3574 IIC_SSE_MOVU_P_RR>, VEX, VEX_L; 3575} 3576 3577let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1, 3578 neverHasSideEffects = 1, SchedRW = [WriteLoad] in { 3579def VMOVDQArm : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 3580 "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RM>, 3581 VEX; 3582def VMOVDQAYrm : VPDI<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), 3583 "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RM>, 3584 VEX, VEX_L; 3585let Predicates = [HasAVX] in { 3586 def VMOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 3587 "vmovdqu\t{$src, $dst|$dst, $src}",[], IIC_SSE_MOVU_P_RM>, 3588 XS, VEX; 3589 def VMOVDQUYrm : I<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), 3590 "vmovdqu\t{$src, $dst|$dst, $src}",[], IIC_SSE_MOVU_P_RM>, 3591 XS, VEX, VEX_L; 3592} 3593} 3594 3595let mayStore = 1, neverHasSideEffects = 1, SchedRW = [WriteStore] in { 3596def VMOVDQAmr : VPDI<0x7F, MRMDestMem, (outs), 3597 (ins i128mem:$dst, VR128:$src), 3598 "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_MR>, 3599 VEX; 3600def VMOVDQAYmr : VPDI<0x7F, MRMDestMem, (outs), 3601 (ins i256mem:$dst, VR256:$src), 3602 "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_MR>, 3603 VEX, VEX_L; 3604let Predicates = [HasAVX] in { 3605def VMOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), 3606 "vmovdqu\t{$src, $dst|$dst, $src}",[], IIC_SSE_MOVU_P_MR>, 3607 XS, VEX; 3608def VMOVDQUYmr : I<0x7F, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src), 3609 "vmovdqu\t{$src, $dst|$dst, $src}",[], IIC_SSE_MOVU_P_MR>, 3610 XS, VEX, VEX_L; 3611} 3612} 3613 3614let SchedRW = [WriteMove] in { 3615let neverHasSideEffects = 1 in 3616def MOVDQArr : PDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 3617 "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>; 3618 3619def MOVDQUrr : I<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 3620 "movdqu\t{$src, $dst|$dst, $src}", 3621 [], IIC_SSE_MOVU_P_RR>, XS, Requires<[UseSSE2]>; 3622 3623// For Disassembler 3624let isCodeGenOnly = 1, hasSideEffects = 0 in { 3625def MOVDQArr_REV : PDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 3626 "movdqa\t{$src, $dst|$dst, $src}", [], 3627 IIC_SSE_MOVA_P_RR>; 3628 3629def MOVDQUrr_REV : I<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 3630 "movdqu\t{$src, $dst|$dst, $src}", 3631 [], IIC_SSE_MOVU_P_RR>, XS, Requires<[UseSSE2]>; 3632} 3633} // SchedRW 3634 3635let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1, 3636 neverHasSideEffects = 1, SchedRW = [WriteLoad] in { 3637def MOVDQArm : PDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 3638 "movdqa\t{$src, $dst|$dst, $src}", 3639 [/*(set VR128:$dst, (alignedloadv2i64 addr:$src))*/], 3640 IIC_SSE_MOVA_P_RM>; 3641def MOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 3642 "movdqu\t{$src, $dst|$dst, $src}", 3643 [/*(set VR128:$dst, (loadv2i64 addr:$src))*/], 3644 IIC_SSE_MOVU_P_RM>, 3645 XS, Requires<[UseSSE2]>; 3646} 3647 3648let mayStore = 1, neverHasSideEffects = 1, SchedRW = [WriteStore] in { 3649def MOVDQAmr : PDI<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), 3650 "movdqa\t{$src, $dst|$dst, $src}", 3651 [/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/], 3652 IIC_SSE_MOVA_P_MR>; 3653def MOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), 3654 "movdqu\t{$src, $dst|$dst, $src}", 3655 [/*(store (v2i64 VR128:$src), addr:$dst)*/], 3656 IIC_SSE_MOVU_P_MR>, 3657 XS, Requires<[UseSSE2]>; 3658} 3659 3660} // ExeDomain = SSEPackedInt 3661 3662let Predicates = [HasAVX] in { 3663 def : Pat<(int_x86_sse2_storeu_dq addr:$dst, VR128:$src), 3664 (VMOVDQUmr addr:$dst, VR128:$src)>; 3665 def : Pat<(int_x86_avx_storeu_dq_256 addr:$dst, VR256:$src), 3666 (VMOVDQUYmr addr:$dst, VR256:$src)>; 3667} 3668let Predicates = [UseSSE2] in 3669def : Pat<(int_x86_sse2_storeu_dq addr:$dst, VR128:$src), 3670 (MOVDQUmr addr:$dst, VR128:$src)>; 3671 3672//===---------------------------------------------------------------------===// 3673// SSE2 - Packed Integer Arithmetic Instructions 3674//===---------------------------------------------------------------------===// 3675 3676let Sched = WriteVecIMul in 3677def SSE_PMADD : OpndItins< 3678 IIC_SSE_PMADD, IIC_SSE_PMADD 3679>; 3680 3681let ExeDomain = SSEPackedInt in { // SSE integer instructions 3682 3683multiclass PDI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId, 3684 RegisterClass RC, PatFrag memop_frag, 3685 X86MemOperand x86memop, 3686 OpndItins itins, 3687 bit IsCommutable = 0, 3688 bit Is2Addr = 1> { 3689 let isCommutable = IsCommutable in 3690 def rr : PDI<opc, MRMSrcReg, (outs RC:$dst), 3691 (ins RC:$src1, RC:$src2), 3692 !if(Is2Addr, 3693 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 3694 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 3695 [(set RC:$dst, (IntId RC:$src1, RC:$src2))], itins.rr>, 3696 Sched<[itins.Sched]>; 3697 def rm : PDI<opc, MRMSrcMem, (outs RC:$dst), 3698 (ins RC:$src1, x86memop:$src2), 3699 !if(Is2Addr, 3700 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 3701 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 3702 [(set RC:$dst, (IntId RC:$src1, (bitconvert (memop_frag addr:$src2))))], 3703 itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>; 3704} 3705 3706multiclass PDI_binop_all_int<bits<8> opc, string OpcodeStr, Intrinsic IntId128, 3707 Intrinsic IntId256, OpndItins itins, 3708 bit IsCommutable = 0> { 3709let Predicates = [HasAVX] in 3710 defm V#NAME : PDI_binop_rm_int<opc, !strconcat("v", OpcodeStr), IntId128, 3711 VR128, loadv2i64, i128mem, itins, 3712 IsCommutable, 0>, VEX_4V; 3713 3714let Constraints = "$src1 = $dst" in 3715 defm NAME : PDI_binop_rm_int<opc, OpcodeStr, IntId128, VR128, memopv2i64, 3716 i128mem, itins, IsCommutable, 1>; 3717 3718let Predicates = [HasAVX2] in 3719 defm V#NAME#Y : PDI_binop_rm_int<opc, !strconcat("v", OpcodeStr), IntId256, 3720 VR256, loadv4i64, i256mem, itins, 3721 IsCommutable, 0>, VEX_4V, VEX_L; 3722} 3723 3724multiclass PDI_binop_rmi<bits<8> opc, bits<8> opc2, Format ImmForm, 3725 string OpcodeStr, SDNode OpNode, 3726 SDNode OpNode2, RegisterClass RC, 3727 ValueType DstVT, ValueType SrcVT, PatFrag bc_frag, 3728 ShiftOpndItins itins, 3729 bit Is2Addr = 1> { 3730 // src2 is always 128-bit 3731 def rr : PDI<opc, MRMSrcReg, (outs RC:$dst), 3732 (ins RC:$src1, VR128:$src2), 3733 !if(Is2Addr, 3734 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 3735 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 3736 [(set RC:$dst, (DstVT (OpNode RC:$src1, (SrcVT VR128:$src2))))], 3737 itins.rr>, Sched<[WriteVecShift]>; 3738 def rm : PDI<opc, MRMSrcMem, (outs RC:$dst), 3739 (ins RC:$src1, i128mem:$src2), 3740 !if(Is2Addr, 3741 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 3742 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 3743 [(set RC:$dst, (DstVT (OpNode RC:$src1, 3744 (bc_frag (memopv2i64 addr:$src2)))))], itins.rm>, 3745 Sched<[WriteVecShiftLd, ReadAfterLd]>; 3746 def ri : PDIi8<opc2, ImmForm, (outs RC:$dst), 3747 (ins RC:$src1, i8imm:$src2), 3748 !if(Is2Addr, 3749 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 3750 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 3751 [(set RC:$dst, (DstVT (OpNode2 RC:$src1, (i8 imm:$src2))))], itins.ri>, 3752 Sched<[WriteVecShift]>; 3753} 3754 3755/// PDI_binop_rm2 - Simple SSE2 binary operator with different src and dst types 3756multiclass PDI_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode, 3757 ValueType DstVT, ValueType SrcVT, RegisterClass RC, 3758 PatFrag memop_frag, X86MemOperand x86memop, 3759 OpndItins itins, 3760 bit IsCommutable = 0, bit Is2Addr = 1> { 3761 let isCommutable = IsCommutable in 3762 def rr : PDI<opc, MRMSrcReg, (outs RC:$dst), 3763 (ins RC:$src1, RC:$src2), 3764 !if(Is2Addr, 3765 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 3766 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 3767 [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))]>, 3768 Sched<[itins.Sched]>; 3769 def rm : PDI<opc, MRMSrcMem, (outs RC:$dst), 3770 (ins RC:$src1, x86memop:$src2), 3771 !if(Is2Addr, 3772 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 3773 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 3774 [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), 3775 (bitconvert (memop_frag addr:$src2)))))]>, 3776 Sched<[itins.Sched.Folded, ReadAfterLd]>; 3777} 3778} // ExeDomain = SSEPackedInt 3779 3780defm PADDB : PDI_binop_all<0xFC, "paddb", add, v16i8, v32i8, 3781 SSE_INTALU_ITINS_P, 1>; 3782defm PADDW : PDI_binop_all<0xFD, "paddw", add, v8i16, v16i16, 3783 SSE_INTALU_ITINS_P, 1>; 3784defm PADDD : PDI_binop_all<0xFE, "paddd", add, v4i32, v8i32, 3785 SSE_INTALU_ITINS_P, 1>; 3786defm PADDQ : PDI_binop_all<0xD4, "paddq", add, v2i64, v4i64, 3787 SSE_INTALUQ_ITINS_P, 1>; 3788defm PMULLW : PDI_binop_all<0xD5, "pmullw", mul, v8i16, v16i16, 3789 SSE_INTMUL_ITINS_P, 1>; 3790defm PSUBB : PDI_binop_all<0xF8, "psubb", sub, v16i8, v32i8, 3791 SSE_INTALU_ITINS_P, 0>; 3792defm PSUBW : PDI_binop_all<0xF9, "psubw", sub, v8i16, v16i16, 3793 SSE_INTALU_ITINS_P, 0>; 3794defm PSUBD : PDI_binop_all<0xFA, "psubd", sub, v4i32, v8i32, 3795 SSE_INTALU_ITINS_P, 0>; 3796defm PSUBQ : PDI_binop_all<0xFB, "psubq", sub, v2i64, v4i64, 3797 SSE_INTALUQ_ITINS_P, 0>; 3798defm PSUBUSB : PDI_binop_all<0xD8, "psubusb", X86subus, v16i8, v32i8, 3799 SSE_INTALU_ITINS_P, 0>; 3800defm PSUBUSW : PDI_binop_all<0xD9, "psubusw", X86subus, v8i16, v16i16, 3801 SSE_INTALU_ITINS_P, 0>; 3802defm PMINUB : PDI_binop_all<0xDA, "pminub", X86umin, v16i8, v32i8, 3803 SSE_INTALU_ITINS_P, 1>; 3804defm PMINSW : PDI_binop_all<0xEA, "pminsw", X86smin, v8i16, v16i16, 3805 SSE_INTALU_ITINS_P, 1>; 3806defm PMAXUB : PDI_binop_all<0xDE, "pmaxub", X86umax, v16i8, v32i8, 3807 SSE_INTALU_ITINS_P, 1>; 3808defm PMAXSW : PDI_binop_all<0xEE, "pmaxsw", X86smax, v8i16, v16i16, 3809 SSE_INTALU_ITINS_P, 1>; 3810 3811// Intrinsic forms 3812defm PSUBSB : PDI_binop_all_int<0xE8, "psubsb", int_x86_sse2_psubs_b, 3813 int_x86_avx2_psubs_b, SSE_INTALU_ITINS_P, 0>; 3814defm PSUBSW : PDI_binop_all_int<0xE9, "psubsw" , int_x86_sse2_psubs_w, 3815 int_x86_avx2_psubs_w, SSE_INTALU_ITINS_P, 0>; 3816defm PADDSB : PDI_binop_all_int<0xEC, "paddsb" , int_x86_sse2_padds_b, 3817 int_x86_avx2_padds_b, SSE_INTALU_ITINS_P, 1>; 3818defm PADDSW : PDI_binop_all_int<0xED, "paddsw" , int_x86_sse2_padds_w, 3819 int_x86_avx2_padds_w, SSE_INTALU_ITINS_P, 1>; 3820defm PADDUSB : PDI_binop_all_int<0xDC, "paddusb", int_x86_sse2_paddus_b, 3821 int_x86_avx2_paddus_b, SSE_INTALU_ITINS_P, 1>; 3822defm PADDUSW : PDI_binop_all_int<0xDD, "paddusw", int_x86_sse2_paddus_w, 3823 int_x86_avx2_paddus_w, SSE_INTALU_ITINS_P, 1>; 3824defm PMULHUW : PDI_binop_all_int<0xE4, "pmulhuw", int_x86_sse2_pmulhu_w, 3825 int_x86_avx2_pmulhu_w, SSE_INTMUL_ITINS_P, 1>; 3826defm PMULHW : PDI_binop_all_int<0xE5, "pmulhw" , int_x86_sse2_pmulh_w, 3827 int_x86_avx2_pmulh_w, SSE_INTMUL_ITINS_P, 1>; 3828defm PMADDWD : PDI_binop_all_int<0xF5, "pmaddwd", int_x86_sse2_pmadd_wd, 3829 int_x86_avx2_pmadd_wd, SSE_PMADD, 1>; 3830defm PAVGB : PDI_binop_all_int<0xE0, "pavgb", int_x86_sse2_pavg_b, 3831 int_x86_avx2_pavg_b, SSE_INTALU_ITINS_P, 1>; 3832defm PAVGW : PDI_binop_all_int<0xE3, "pavgw", int_x86_sse2_pavg_w, 3833 int_x86_avx2_pavg_w, SSE_INTALU_ITINS_P, 1>; 3834defm PSADBW : PDI_binop_all_int<0xF6, "psadbw", int_x86_sse2_psad_bw, 3835 int_x86_avx2_psad_bw, SSE_PMADD, 1>; 3836 3837let Predicates = [HasAVX] in 3838defm VPMULUDQ : PDI_binop_rm2<0xF4, "vpmuludq", X86pmuludq, v2i64, v4i32, VR128, 3839 loadv2i64, i128mem, SSE_INTMUL_ITINS_P, 1, 0>, 3840 VEX_4V; 3841let Predicates = [HasAVX2] in 3842defm VPMULUDQY : PDI_binop_rm2<0xF4, "vpmuludq", X86pmuludq, v4i64, v8i32, 3843 VR256, loadv4i64, i256mem, 3844 SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V, VEX_L; 3845let Constraints = "$src1 = $dst" in 3846defm PMULUDQ : PDI_binop_rm2<0xF4, "pmuludq", X86pmuludq, v2i64, v4i32, VR128, 3847 memopv2i64, i128mem, SSE_INTMUL_ITINS_P, 1>; 3848 3849//===---------------------------------------------------------------------===// 3850// SSE2 - Packed Integer Logical Instructions 3851//===---------------------------------------------------------------------===// 3852 3853let Predicates = [HasAVX] in { 3854defm VPSLLW : PDI_binop_rmi<0xF1, 0x71, MRM6r, "vpsllw", X86vshl, X86vshli, 3855 VR128, v8i16, v8i16, bc_v8i16, 3856 SSE_INTSHIFT_ITINS_P, 0>, VEX_4V; 3857defm VPSLLD : PDI_binop_rmi<0xF2, 0x72, MRM6r, "vpslld", X86vshl, X86vshli, 3858 VR128, v4i32, v4i32, bc_v4i32, 3859 SSE_INTSHIFT_ITINS_P, 0>, VEX_4V; 3860defm VPSLLQ : PDI_binop_rmi<0xF3, 0x73, MRM6r, "vpsllq", X86vshl, X86vshli, 3861 VR128, v2i64, v2i64, bc_v2i64, 3862 SSE_INTSHIFT_ITINS_P, 0>, VEX_4V; 3863 3864defm VPSRLW : PDI_binop_rmi<0xD1, 0x71, MRM2r, "vpsrlw", X86vsrl, X86vsrli, 3865 VR128, v8i16, v8i16, bc_v8i16, 3866 SSE_INTSHIFT_ITINS_P, 0>, VEX_4V; 3867defm VPSRLD : PDI_binop_rmi<0xD2, 0x72, MRM2r, "vpsrld", X86vsrl, X86vsrli, 3868 VR128, v4i32, v4i32, bc_v4i32, 3869 SSE_INTSHIFT_ITINS_P, 0>, VEX_4V; 3870defm VPSRLQ : PDI_binop_rmi<0xD3, 0x73, MRM2r, "vpsrlq", X86vsrl, X86vsrli, 3871 VR128, v2i64, v2i64, bc_v2i64, 3872 SSE_INTSHIFT_ITINS_P, 0>, VEX_4V; 3873 3874defm VPSRAW : PDI_binop_rmi<0xE1, 0x71, MRM4r, "vpsraw", X86vsra, X86vsrai, 3875 VR128, v8i16, v8i16, bc_v8i16, 3876 SSE_INTSHIFT_ITINS_P, 0>, VEX_4V; 3877defm VPSRAD : PDI_binop_rmi<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai, 3878 VR128, v4i32, v4i32, bc_v4i32, 3879 SSE_INTSHIFT_ITINS_P, 0>, VEX_4V; 3880 3881let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in { 3882 // 128-bit logical shifts. 3883 def VPSLLDQri : PDIi8<0x73, MRM7r, 3884 (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), 3885 "vpslldq\t{$src2, $src1, $dst|$dst, $src1, $src2}", 3886 [(set VR128:$dst, 3887 (int_x86_sse2_psll_dq_bs VR128:$src1, imm:$src2))]>, 3888 VEX_4V; 3889 def VPSRLDQri : PDIi8<0x73, MRM3r, 3890 (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), 3891 "vpsrldq\t{$src2, $src1, $dst|$dst, $src1, $src2}", 3892 [(set VR128:$dst, 3893 (int_x86_sse2_psrl_dq_bs VR128:$src1, imm:$src2))]>, 3894 VEX_4V; 3895 // PSRADQri doesn't exist in SSE[1-3]. 3896} 3897} // Predicates = [HasAVX] 3898 3899let Predicates = [HasAVX2] in { 3900defm VPSLLWY : PDI_binop_rmi<0xF1, 0x71, MRM6r, "vpsllw", X86vshl, X86vshli, 3901 VR256, v16i16, v8i16, bc_v8i16, 3902 SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L; 3903defm VPSLLDY : PDI_binop_rmi<0xF2, 0x72, MRM6r, "vpslld", X86vshl, X86vshli, 3904 VR256, v8i32, v4i32, bc_v4i32, 3905 SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L; 3906defm VPSLLQY : PDI_binop_rmi<0xF3, 0x73, MRM6r, "vpsllq", X86vshl, X86vshli, 3907 VR256, v4i64, v2i64, bc_v2i64, 3908 SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L; 3909 3910defm VPSRLWY : PDI_binop_rmi<0xD1, 0x71, MRM2r, "vpsrlw", X86vsrl, X86vsrli, 3911 VR256, v16i16, v8i16, bc_v8i16, 3912 SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L; 3913defm VPSRLDY : PDI_binop_rmi<0xD2, 0x72, MRM2r, "vpsrld", X86vsrl, X86vsrli, 3914 VR256, v8i32, v4i32, bc_v4i32, 3915 SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L; 3916defm VPSRLQY : PDI_binop_rmi<0xD3, 0x73, MRM2r, "vpsrlq", X86vsrl, X86vsrli, 3917 VR256, v4i64, v2i64, bc_v2i64, 3918 SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L; 3919 3920defm VPSRAWY : PDI_binop_rmi<0xE1, 0x71, MRM4r, "vpsraw", X86vsra, X86vsrai, 3921 VR256, v16i16, v8i16, bc_v8i16, 3922 SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L; 3923defm VPSRADY : PDI_binop_rmi<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai, 3924 VR256, v8i32, v4i32, bc_v4i32, 3925 SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L; 3926 3927let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in { 3928 // 256-bit logical shifts. 3929 def VPSLLDQYri : PDIi8<0x73, MRM7r, 3930 (outs VR256:$dst), (ins VR256:$src1, i32i8imm:$src2), 3931 "vpslldq\t{$src2, $src1, $dst|$dst, $src1, $src2}", 3932 [(set VR256:$dst, 3933 (int_x86_avx2_psll_dq_bs VR256:$src1, imm:$src2))]>, 3934 VEX_4V, VEX_L; 3935 def VPSRLDQYri : PDIi8<0x73, MRM3r, 3936 (outs VR256:$dst), (ins VR256:$src1, i32i8imm:$src2), 3937 "vpsrldq\t{$src2, $src1, $dst|$dst, $src1, $src2}", 3938 [(set VR256:$dst, 3939 (int_x86_avx2_psrl_dq_bs VR256:$src1, imm:$src2))]>, 3940 VEX_4V, VEX_L; 3941 // PSRADQYri doesn't exist in SSE[1-3]. 3942} 3943} // Predicates = [HasAVX2] 3944 3945let Constraints = "$src1 = $dst" in { 3946defm PSLLW : PDI_binop_rmi<0xF1, 0x71, MRM6r, "psllw", X86vshl, X86vshli, 3947 VR128, v8i16, v8i16, bc_v8i16, 3948 SSE_INTSHIFT_ITINS_P>; 3949defm PSLLD : PDI_binop_rmi<0xF2, 0x72, MRM6r, "pslld", X86vshl, X86vshli, 3950 VR128, v4i32, v4i32, bc_v4i32, 3951 SSE_INTSHIFT_ITINS_P>; 3952defm PSLLQ : PDI_binop_rmi<0xF3, 0x73, MRM6r, "psllq", X86vshl, X86vshli, 3953 VR128, v2i64, v2i64, bc_v2i64, 3954 SSE_INTSHIFT_ITINS_P>; 3955 3956defm PSRLW : PDI_binop_rmi<0xD1, 0x71, MRM2r, "psrlw", X86vsrl, X86vsrli, 3957 VR128, v8i16, v8i16, bc_v8i16, 3958 SSE_INTSHIFT_ITINS_P>; 3959defm PSRLD : PDI_binop_rmi<0xD2, 0x72, MRM2r, "psrld", X86vsrl, X86vsrli, 3960 VR128, v4i32, v4i32, bc_v4i32, 3961 SSE_INTSHIFT_ITINS_P>; 3962defm PSRLQ : PDI_binop_rmi<0xD3, 0x73, MRM2r, "psrlq", X86vsrl, X86vsrli, 3963 VR128, v2i64, v2i64, bc_v2i64, 3964 SSE_INTSHIFT_ITINS_P>; 3965 3966defm PSRAW : PDI_binop_rmi<0xE1, 0x71, MRM4r, "psraw", X86vsra, X86vsrai, 3967 VR128, v8i16, v8i16, bc_v8i16, 3968 SSE_INTSHIFT_ITINS_P>; 3969defm PSRAD : PDI_binop_rmi<0xE2, 0x72, MRM4r, "psrad", X86vsra, X86vsrai, 3970 VR128, v4i32, v4i32, bc_v4i32, 3971 SSE_INTSHIFT_ITINS_P>; 3972 3973let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in { 3974 // 128-bit logical shifts. 3975 def PSLLDQri : PDIi8<0x73, MRM7r, 3976 (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), 3977 "pslldq\t{$src2, $dst|$dst, $src2}", 3978 [(set VR128:$dst, 3979 (int_x86_sse2_psll_dq_bs VR128:$src1, imm:$src2))], 3980 IIC_SSE_INTSHDQ_P_RI>; 3981 def PSRLDQri : PDIi8<0x73, MRM3r, 3982 (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), 3983 "psrldq\t{$src2, $dst|$dst, $src2}", 3984 [(set VR128:$dst, 3985 (int_x86_sse2_psrl_dq_bs VR128:$src1, imm:$src2))], 3986 IIC_SSE_INTSHDQ_P_RI>; 3987 // PSRADQri doesn't exist in SSE[1-3]. 3988} 3989} // Constraints = "$src1 = $dst" 3990 3991let Predicates = [HasAVX] in { 3992 def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2), 3993 (VPSLLDQri VR128:$src1, (BYTE_imm imm:$src2))>; 3994 def : Pat<(int_x86_sse2_psrl_dq VR128:$src1, imm:$src2), 3995 (VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>; 3996 def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)), 3997 (VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>; 3998 3999 // Shift up / down and insert zero's. 4000 def : Pat<(v2i64 (X86vshldq VR128:$src, (i8 imm:$amt))), 4001 (VPSLLDQri VR128:$src, (BYTE_imm imm:$amt))>; 4002 def : Pat<(v2i64 (X86vshrdq VR128:$src, (i8 imm:$amt))), 4003 (VPSRLDQri VR128:$src, (BYTE_imm imm:$amt))>; 4004} 4005 4006let Predicates = [HasAVX2] in { 4007 def : Pat<(int_x86_avx2_psll_dq VR256:$src1, imm:$src2), 4008 (VPSLLDQYri VR256:$src1, (BYTE_imm imm:$src2))>; 4009 def : Pat<(int_x86_avx2_psrl_dq VR256:$src1, imm:$src2), 4010 (VPSRLDQYri VR256:$src1, (BYTE_imm imm:$src2))>; 4011} 4012 4013let Predicates = [UseSSE2] in { 4014 def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2), 4015 (PSLLDQri VR128:$src1, (BYTE_imm imm:$src2))>; 4016 def : Pat<(int_x86_sse2_psrl_dq VR128:$src1, imm:$src2), 4017 (PSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>; 4018 def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)), 4019 (PSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>; 4020 4021 // Shift up / down and insert zero's. 4022 def : Pat<(v2i64 (X86vshldq VR128:$src, (i8 imm:$amt))), 4023 (PSLLDQri VR128:$src, (BYTE_imm imm:$amt))>; 4024 def : Pat<(v2i64 (X86vshrdq VR128:$src, (i8 imm:$amt))), 4025 (PSRLDQri VR128:$src, (BYTE_imm imm:$amt))>; 4026} 4027 4028//===---------------------------------------------------------------------===// 4029// SSE2 - Packed Integer Comparison Instructions 4030//===---------------------------------------------------------------------===// 4031 4032defm PCMPEQB : PDI_binop_all<0x74, "pcmpeqb", X86pcmpeq, v16i8, v32i8, 4033 SSE_INTALU_ITINS_P, 1>; 4034defm PCMPEQW : PDI_binop_all<0x75, "pcmpeqw", X86pcmpeq, v8i16, v16i16, 4035 SSE_INTALU_ITINS_P, 1>; 4036defm PCMPEQD : PDI_binop_all<0x76, "pcmpeqd", X86pcmpeq, v4i32, v8i32, 4037 SSE_INTALU_ITINS_P, 1>; 4038defm PCMPGTB : PDI_binop_all<0x64, "pcmpgtb", X86pcmpgt, v16i8, v32i8, 4039 SSE_INTALU_ITINS_P, 0>; 4040defm PCMPGTW : PDI_binop_all<0x65, "pcmpgtw", X86pcmpgt, v8i16, v16i16, 4041 SSE_INTALU_ITINS_P, 0>; 4042defm PCMPGTD : PDI_binop_all<0x66, "pcmpgtd", X86pcmpgt, v4i32, v8i32, 4043 SSE_INTALU_ITINS_P, 0>; 4044 4045//===---------------------------------------------------------------------===// 4046// SSE2 - Packed Integer Pack Instructions 4047//===---------------------------------------------------------------------===// 4048 4049defm PACKSSWB : PDI_binop_all_int<0x63, "packsswb", int_x86_sse2_packsswb_128, 4050 int_x86_avx2_packsswb, SSE_INTALU_ITINS_P, 0>; 4051defm PACKSSDW : PDI_binop_all_int<0x6B, "packssdw", int_x86_sse2_packssdw_128, 4052 int_x86_avx2_packssdw, SSE_INTALU_ITINS_P, 0>; 4053defm PACKUSWB : PDI_binop_all_int<0x67, "packuswb", int_x86_sse2_packuswb_128, 4054 int_x86_avx2_packuswb, SSE_INTALU_ITINS_P, 0>; 4055 4056//===---------------------------------------------------------------------===// 4057// SSE2 - Packed Integer Shuffle Instructions 4058//===---------------------------------------------------------------------===// 4059 4060let ExeDomain = SSEPackedInt in { 4061multiclass sse2_pshuffle<string OpcodeStr, ValueType vt128, ValueType vt256, 4062 SDNode OpNode> { 4063let Predicates = [HasAVX] in { 4064 def V#NAME#ri : Ii8<0x70, MRMSrcReg, (outs VR128:$dst), 4065 (ins VR128:$src1, i8imm:$src2), 4066 !strconcat("v", OpcodeStr, 4067 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 4068 [(set VR128:$dst, 4069 (vt128 (OpNode VR128:$src1, (i8 imm:$src2))))], 4070 IIC_SSE_PSHUF_RI>, VEX, Sched<[WriteShuffle]>; 4071 def V#NAME#mi : Ii8<0x70, MRMSrcMem, (outs VR128:$dst), 4072 (ins i128mem:$src1, i8imm:$src2), 4073 !strconcat("v", OpcodeStr, 4074 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 4075 [(set VR128:$dst, 4076 (vt128 (OpNode (bitconvert (loadv2i64 addr:$src1)), 4077 (i8 imm:$src2))))], IIC_SSE_PSHUF_MI>, VEX, 4078 Sched<[WriteShuffleLd]>; 4079} 4080 4081let Predicates = [HasAVX2] in { 4082 def V#NAME#Yri : Ii8<0x70, MRMSrcReg, (outs VR256:$dst), 4083 (ins VR256:$src1, i8imm:$src2), 4084 !strconcat("v", OpcodeStr, 4085 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 4086 [(set VR256:$dst, 4087 (vt256 (OpNode VR256:$src1, (i8 imm:$src2))))], 4088 IIC_SSE_PSHUF_RI>, VEX, VEX_L, Sched<[WriteShuffle]>; 4089 def V#NAME#Ymi : Ii8<0x70, MRMSrcMem, (outs VR256:$dst), 4090 (ins i256mem:$src1, i8imm:$src2), 4091 !strconcat("v", OpcodeStr, 4092 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 4093 [(set VR256:$dst, 4094 (vt256 (OpNode (bitconvert (loadv4i64 addr:$src1)), 4095 (i8 imm:$src2))))], IIC_SSE_PSHUF_MI>, VEX, VEX_L, 4096 Sched<[WriteShuffleLd]>; 4097} 4098 4099let Predicates = [UseSSE2] in { 4100 def ri : Ii8<0x70, MRMSrcReg, 4101 (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2), 4102 !strconcat(OpcodeStr, 4103 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 4104 [(set VR128:$dst, 4105 (vt128 (OpNode VR128:$src1, (i8 imm:$src2))))], 4106 IIC_SSE_PSHUF_RI>, Sched<[WriteShuffle]>; 4107 def mi : Ii8<0x70, MRMSrcMem, 4108 (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2), 4109 !strconcat(OpcodeStr, 4110 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 4111 [(set VR128:$dst, 4112 (vt128 (OpNode (bitconvert (memopv2i64 addr:$src1)), 4113 (i8 imm:$src2))))], IIC_SSE_PSHUF_MI>, 4114 Sched<[WriteShuffleLd]>; 4115} 4116} 4117} // ExeDomain = SSEPackedInt 4118 4119defm PSHUFD : sse2_pshuffle<"pshufd", v4i32, v8i32, X86PShufd>, TB, OpSize; 4120defm PSHUFHW : sse2_pshuffle<"pshufhw", v8i16, v16i16, X86PShufhw>, XS; 4121defm PSHUFLW : sse2_pshuffle<"pshuflw", v8i16, v16i16, X86PShuflw>, XD; 4122 4123let Predicates = [HasAVX] in { 4124 def : Pat<(v4f32 (X86PShufd (loadv4f32 addr:$src1), (i8 imm:$imm))), 4125 (VPSHUFDmi addr:$src1, imm:$imm)>; 4126 def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))), 4127 (VPSHUFDri VR128:$src1, imm:$imm)>; 4128} 4129 4130let Predicates = [UseSSE2] in { 4131 def : Pat<(v4f32 (X86PShufd (memopv4f32 addr:$src1), (i8 imm:$imm))), 4132 (PSHUFDmi addr:$src1, imm:$imm)>; 4133 def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))), 4134 (PSHUFDri VR128:$src1, imm:$imm)>; 4135} 4136 4137//===---------------------------------------------------------------------===// 4138// SSE2 - Packed Integer Unpack Instructions 4139//===---------------------------------------------------------------------===// 4140 4141let ExeDomain = SSEPackedInt in { 4142multiclass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt, 4143 SDNode OpNode, PatFrag bc_frag, bit Is2Addr = 1> { 4144 def rr : PDI<opc, MRMSrcReg, 4145 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), 4146 !if(Is2Addr, 4147 !strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"), 4148 !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 4149 [(set VR128:$dst, (vt (OpNode VR128:$src1, VR128:$src2)))], 4150 IIC_SSE_UNPCK>, Sched<[WriteShuffle]>; 4151 def rm : PDI<opc, MRMSrcMem, 4152 (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), 4153 !if(Is2Addr, 4154 !strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"), 4155 !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 4156 [(set VR128:$dst, (OpNode VR128:$src1, 4157 (bc_frag (memopv2i64 4158 addr:$src2))))], 4159 IIC_SSE_UNPCK>, 4160 Sched<[WriteShuffleLd, ReadAfterLd]>; 4161} 4162 4163multiclass sse2_unpack_y<bits<8> opc, string OpcodeStr, ValueType vt, 4164 SDNode OpNode, PatFrag bc_frag> { 4165 def Yrr : PDI<opc, MRMSrcReg, 4166 (outs VR256:$dst), (ins VR256:$src1, VR256:$src2), 4167 !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 4168 [(set VR256:$dst, (vt (OpNode VR256:$src1, VR256:$src2)))]>, 4169 Sched<[WriteShuffle]>; 4170 def Yrm : PDI<opc, MRMSrcMem, 4171 (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2), 4172 !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 4173 [(set VR256:$dst, (OpNode VR256:$src1, 4174 (bc_frag (memopv4i64 addr:$src2))))]>, 4175 Sched<[WriteShuffleLd, ReadAfterLd]>; 4176} 4177 4178let Predicates = [HasAVX] in { 4179 defm VPUNPCKLBW : sse2_unpack<0x60, "vpunpcklbw", v16i8, X86Unpckl, 4180 bc_v16i8, 0>, VEX_4V; 4181 defm VPUNPCKLWD : sse2_unpack<0x61, "vpunpcklwd", v8i16, X86Unpckl, 4182 bc_v8i16, 0>, VEX_4V; 4183 defm VPUNPCKLDQ : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Unpckl, 4184 bc_v4i32, 0>, VEX_4V; 4185 defm VPUNPCKLQDQ : sse2_unpack<0x6C, "vpunpcklqdq", v2i64, X86Unpckl, 4186 bc_v2i64, 0>, VEX_4V; 4187 4188 defm VPUNPCKHBW : sse2_unpack<0x68, "vpunpckhbw", v16i8, X86Unpckh, 4189 bc_v16i8, 0>, VEX_4V; 4190 defm VPUNPCKHWD : sse2_unpack<0x69, "vpunpckhwd", v8i16, X86Unpckh, 4191 bc_v8i16, 0>, VEX_4V; 4192 defm VPUNPCKHDQ : sse2_unpack<0x6A, "vpunpckhdq", v4i32, X86Unpckh, 4193 bc_v4i32, 0>, VEX_4V; 4194 defm VPUNPCKHQDQ : sse2_unpack<0x6D, "vpunpckhqdq", v2i64, X86Unpckh, 4195 bc_v2i64, 0>, VEX_4V; 4196} 4197 4198let Predicates = [HasAVX2] in { 4199 defm VPUNPCKLBW : sse2_unpack_y<0x60, "vpunpcklbw", v32i8, X86Unpckl, 4200 bc_v32i8>, VEX_4V, VEX_L; 4201 defm VPUNPCKLWD : sse2_unpack_y<0x61, "vpunpcklwd", v16i16, X86Unpckl, 4202 bc_v16i16>, VEX_4V, VEX_L; 4203 defm VPUNPCKLDQ : sse2_unpack_y<0x62, "vpunpckldq", v8i32, X86Unpckl, 4204 bc_v8i32>, VEX_4V, VEX_L; 4205 defm VPUNPCKLQDQ : sse2_unpack_y<0x6C, "vpunpcklqdq", v4i64, X86Unpckl, 4206 bc_v4i64>, VEX_4V, VEX_L; 4207 4208 defm VPUNPCKHBW : sse2_unpack_y<0x68, "vpunpckhbw", v32i8, X86Unpckh, 4209 bc_v32i8>, VEX_4V, VEX_L; 4210 defm VPUNPCKHWD : sse2_unpack_y<0x69, "vpunpckhwd", v16i16, X86Unpckh, 4211 bc_v16i16>, VEX_4V, VEX_L; 4212 defm VPUNPCKHDQ : sse2_unpack_y<0x6A, "vpunpckhdq", v8i32, X86Unpckh, 4213 bc_v8i32>, VEX_4V, VEX_L; 4214 defm VPUNPCKHQDQ : sse2_unpack_y<0x6D, "vpunpckhqdq", v4i64, X86Unpckh, 4215 bc_v4i64>, VEX_4V, VEX_L; 4216} 4217 4218let Constraints = "$src1 = $dst" in { 4219 defm PUNPCKLBW : sse2_unpack<0x60, "punpcklbw", v16i8, X86Unpckl, 4220 bc_v16i8>; 4221 defm PUNPCKLWD : sse2_unpack<0x61, "punpcklwd", v8i16, X86Unpckl, 4222 bc_v8i16>; 4223 defm PUNPCKLDQ : sse2_unpack<0x62, "punpckldq", v4i32, X86Unpckl, 4224 bc_v4i32>; 4225 defm PUNPCKLQDQ : sse2_unpack<0x6C, "punpcklqdq", v2i64, X86Unpckl, 4226 bc_v2i64>; 4227 4228 defm PUNPCKHBW : sse2_unpack<0x68, "punpckhbw", v16i8, X86Unpckh, 4229 bc_v16i8>; 4230 defm PUNPCKHWD : sse2_unpack<0x69, "punpckhwd", v8i16, X86Unpckh, 4231 bc_v8i16>; 4232 defm PUNPCKHDQ : sse2_unpack<0x6A, "punpckhdq", v4i32, X86Unpckh, 4233 bc_v4i32>; 4234 defm PUNPCKHQDQ : sse2_unpack<0x6D, "punpckhqdq", v2i64, X86Unpckh, 4235 bc_v2i64>; 4236} 4237} // ExeDomain = SSEPackedInt 4238 4239//===---------------------------------------------------------------------===// 4240// SSE2 - Packed Integer Extract and Insert 4241//===---------------------------------------------------------------------===// 4242 4243let ExeDomain = SSEPackedInt in { 4244multiclass sse2_pinsrw<bit Is2Addr = 1> { 4245 def rri : Ii8<0xC4, MRMSrcReg, 4246 (outs VR128:$dst), (ins VR128:$src1, 4247 GR32orGR64:$src2, i32i8imm:$src3), 4248 !if(Is2Addr, 4249 "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}", 4250 "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 4251 [(set VR128:$dst, 4252 (X86pinsrw VR128:$src1, GR32orGR64:$src2, imm:$src3))], 4253 IIC_SSE_PINSRW>, Sched<[WriteShuffle]>; 4254 def rmi : Ii8<0xC4, MRMSrcMem, 4255 (outs VR128:$dst), (ins VR128:$src1, 4256 i16mem:$src2, i32i8imm:$src3), 4257 !if(Is2Addr, 4258 "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}", 4259 "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 4260 [(set VR128:$dst, 4261 (X86pinsrw VR128:$src1, (extloadi16 addr:$src2), 4262 imm:$src3))], IIC_SSE_PINSRW>, 4263 Sched<[WriteShuffleLd, ReadAfterLd]>; 4264} 4265 4266// Extract 4267let Predicates = [HasAVX] in 4268def VPEXTRWri : Ii8<0xC5, MRMSrcReg, 4269 (outs GR32orGR64:$dst), (ins VR128:$src1, i32i8imm:$src2), 4270 "vpextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4271 [(set GR32orGR64:$dst, (X86pextrw (v8i16 VR128:$src1), 4272 imm:$src2))]>, TB, OpSize, VEX, 4273 Sched<[WriteShuffle]>; 4274def PEXTRWri : PDIi8<0xC5, MRMSrcReg, 4275 (outs GR32orGR64:$dst), (ins VR128:$src1, i32i8imm:$src2), 4276 "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4277 [(set GR32orGR64:$dst, (X86pextrw (v8i16 VR128:$src1), 4278 imm:$src2))], IIC_SSE_PEXTRW>, 4279 Sched<[WriteShuffleLd, ReadAfterLd]>; 4280 4281// Insert 4282let Predicates = [HasAVX] in 4283defm VPINSRW : sse2_pinsrw<0>, TB, OpSize, VEX_4V; 4284 4285let Predicates = [UseSSE2], Constraints = "$src1 = $dst" in 4286defm PINSRW : sse2_pinsrw, TB, OpSize; 4287 4288} // ExeDomain = SSEPackedInt 4289 4290//===---------------------------------------------------------------------===// 4291// SSE2 - Packed Mask Creation 4292//===---------------------------------------------------------------------===// 4293 4294let ExeDomain = SSEPackedInt, SchedRW = [WriteVecLogic] in { 4295 4296def VPMOVMSKBrr : VPDI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst), 4297 (ins VR128:$src), 4298 "pmovmskb\t{$src, $dst|$dst, $src}", 4299 [(set GR32orGR64:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))], 4300 IIC_SSE_MOVMSK>, VEX; 4301 4302let Predicates = [HasAVX2] in { 4303def VPMOVMSKBYrr : VPDI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst), 4304 (ins VR256:$src), 4305 "pmovmskb\t{$src, $dst|$dst, $src}", 4306 [(set GR32orGR64:$dst, (int_x86_avx2_pmovmskb VR256:$src))]>, 4307 VEX, VEX_L; 4308} 4309 4310def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst), (ins VR128:$src), 4311 "pmovmskb\t{$src, $dst|$dst, $src}", 4312 [(set GR32orGR64:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))], 4313 IIC_SSE_MOVMSK>; 4314 4315} // ExeDomain = SSEPackedInt 4316 4317//===---------------------------------------------------------------------===// 4318// SSE2 - Conditional Store 4319//===---------------------------------------------------------------------===// 4320 4321let ExeDomain = SSEPackedInt, SchedRW = [WriteStore] in { 4322 4323let Uses = [EDI], Predicates = [HasAVX,In32BitMode] in 4324def VMASKMOVDQU : VPDI<0xF7, MRMSrcReg, (outs), 4325 (ins VR128:$src, VR128:$mask), 4326 "maskmovdqu\t{$mask, $src|$src, $mask}", 4327 [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)], 4328 IIC_SSE_MASKMOV>, VEX; 4329let Uses = [RDI], Predicates = [HasAVX,In64BitMode] in 4330def VMASKMOVDQU64 : VPDI<0xF7, MRMSrcReg, (outs), 4331 (ins VR128:$src, VR128:$mask), 4332 "maskmovdqu\t{$mask, $src|$src, $mask}", 4333 [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)], 4334 IIC_SSE_MASKMOV>, VEX; 4335 4336let Uses = [EDI], Predicates = [UseSSE2,In32BitMode] in 4337def MASKMOVDQU : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask), 4338 "maskmovdqu\t{$mask, $src|$src, $mask}", 4339 [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)], 4340 IIC_SSE_MASKMOV>; 4341let Uses = [RDI], Predicates = [UseSSE2,In64BitMode] in 4342def MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask), 4343 "maskmovdqu\t{$mask, $src|$src, $mask}", 4344 [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)], 4345 IIC_SSE_MASKMOV>; 4346 4347} // ExeDomain = SSEPackedInt 4348 4349//===---------------------------------------------------------------------===// 4350// SSE2 - Move Doubleword 4351//===---------------------------------------------------------------------===// 4352 4353//===---------------------------------------------------------------------===// 4354// Move Int Doubleword to Packed Double Int 4355// 4356def VMOVDI2PDIrr : VS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), 4357 "movd\t{$src, $dst|$dst, $src}", 4358 [(set VR128:$dst, 4359 (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>, 4360 VEX, Sched<[WriteMove]>; 4361def VMOVDI2PDIrm : VS2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), 4362 "movd\t{$src, $dst|$dst, $src}", 4363 [(set VR128:$dst, 4364 (v4i32 (scalar_to_vector (loadi32 addr:$src))))], 4365 IIC_SSE_MOVDQ>, 4366 VEX, Sched<[WriteLoad]>; 4367def VMOV64toPQIrr : VRS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), 4368 "movq\t{$src, $dst|$dst, $src}", 4369 [(set VR128:$dst, 4370 (v2i64 (scalar_to_vector GR64:$src)))], 4371 IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>; 4372let isCodeGenOnly = 1 in 4373def VMOV64toSDrr : VRS2I<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src), 4374 "movq\t{$src, $dst|$dst, $src}", 4375 [(set FR64:$dst, (bitconvert GR64:$src))], 4376 IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>; 4377 4378def MOVDI2PDIrr : S2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), 4379 "movd\t{$src, $dst|$dst, $src}", 4380 [(set VR128:$dst, 4381 (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>, 4382 Sched<[WriteMove]>; 4383def MOVDI2PDIrm : S2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), 4384 "movd\t{$src, $dst|$dst, $src}", 4385 [(set VR128:$dst, 4386 (v4i32 (scalar_to_vector (loadi32 addr:$src))))], 4387 IIC_SSE_MOVDQ>, Sched<[WriteLoad]>; 4388def MOV64toPQIrr : RS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), 4389 "mov{d|q}\t{$src, $dst|$dst, $src}", 4390 [(set VR128:$dst, 4391 (v2i64 (scalar_to_vector GR64:$src)))], 4392 IIC_SSE_MOVDQ>, Sched<[WriteMove]>; 4393let isCodeGenOnly = 1 in 4394def MOV64toSDrr : RS2I<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src), 4395 "mov{d|q}\t{$src, $dst|$dst, $src}", 4396 [(set FR64:$dst, (bitconvert GR64:$src))], 4397 IIC_SSE_MOVDQ>, Sched<[WriteMove]>; 4398 4399//===---------------------------------------------------------------------===// 4400// Move Int Doubleword to Single Scalar 4401// 4402let isCodeGenOnly = 1 in { 4403 def VMOVDI2SSrr : VS2I<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src), 4404 "movd\t{$src, $dst|$dst, $src}", 4405 [(set FR32:$dst, (bitconvert GR32:$src))], 4406 IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>; 4407 4408 def VMOVDI2SSrm : VS2I<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src), 4409 "movd\t{$src, $dst|$dst, $src}", 4410 [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))], 4411 IIC_SSE_MOVDQ>, 4412 VEX, Sched<[WriteLoad]>; 4413 def MOVDI2SSrr : S2I<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src), 4414 "movd\t{$src, $dst|$dst, $src}", 4415 [(set FR32:$dst, (bitconvert GR32:$src))], 4416 IIC_SSE_MOVDQ>, Sched<[WriteMove]>; 4417 4418 def MOVDI2SSrm : S2I<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src), 4419 "movd\t{$src, $dst|$dst, $src}", 4420 [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))], 4421 IIC_SSE_MOVDQ>, Sched<[WriteLoad]>; 4422} 4423 4424//===---------------------------------------------------------------------===// 4425// Move Packed Doubleword Int to Packed Double Int 4426// 4427def VMOVPDI2DIrr : VS2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src), 4428 "movd\t{$src, $dst|$dst, $src}", 4429 [(set GR32:$dst, (vector_extract (v4i32 VR128:$src), 4430 (iPTR 0)))], IIC_SSE_MOVD_ToGP>, VEX, 4431 Sched<[WriteMove]>; 4432def VMOVPDI2DImr : VS2I<0x7E, MRMDestMem, (outs), 4433 (ins i32mem:$dst, VR128:$src), 4434 "movd\t{$src, $dst|$dst, $src}", 4435 [(store (i32 (vector_extract (v4i32 VR128:$src), 4436 (iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>, 4437 VEX, Sched<[WriteLoad]>; 4438def MOVPDI2DIrr : S2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src), 4439 "movd\t{$src, $dst|$dst, $src}", 4440 [(set GR32:$dst, (vector_extract (v4i32 VR128:$src), 4441 (iPTR 0)))], IIC_SSE_MOVD_ToGP>, 4442 Sched<[WriteMove]>; 4443def MOVPDI2DImr : S2I<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src), 4444 "movd\t{$src, $dst|$dst, $src}", 4445 [(store (i32 (vector_extract (v4i32 VR128:$src), 4446 (iPTR 0))), addr:$dst)], 4447 IIC_SSE_MOVDQ>, Sched<[WriteLoad]>; 4448 4449def : Pat<(v8i32 (X86Vinsert (v8i32 immAllZerosV), GR32:$src2, (iPTR 0))), 4450 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIrr GR32:$src2), sub_xmm)>; 4451 4452def : Pat<(v4i64 (X86Vinsert (bc_v4i64 (v8i32 immAllZerosV)), GR64:$src2, (iPTR 0))), 4453 (SUBREG_TO_REG (i32 0), (VMOV64toPQIrr GR64:$src2), sub_xmm)>; 4454 4455def : Pat<(v8i32 (X86Vinsert undef, GR32:$src2, (iPTR 0))), 4456 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIrr GR32:$src2), sub_xmm)>; 4457 4458def : Pat<(v4i64 (X86Vinsert undef, GR64:$src2, (iPTR 0))), 4459 (SUBREG_TO_REG (i32 0), (VMOV64toPQIrr GR64:$src2), sub_xmm)>; 4460 4461//===---------------------------------------------------------------------===// 4462// Move Packed Doubleword Int first element to Doubleword Int 4463// 4464let SchedRW = [WriteMove] in { 4465def VMOVPQIto64rr : VRS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), 4466 "movq\t{$src, $dst|$dst, $src}", 4467 [(set GR64:$dst, (vector_extract (v2i64 VR128:$src), 4468 (iPTR 0)))], 4469 IIC_SSE_MOVD_ToGP>, 4470 VEX; 4471 4472def MOVPQIto64rr : RS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), 4473 "mov{d|q}\t{$src, $dst|$dst, $src}", 4474 [(set GR64:$dst, (vector_extract (v2i64 VR128:$src), 4475 (iPTR 0)))], 4476 IIC_SSE_MOVD_ToGP>; 4477} //SchedRW 4478 4479//===---------------------------------------------------------------------===// 4480// Bitcast FR64 <-> GR64 4481// 4482let isCodeGenOnly = 1 in { 4483 let Predicates = [UseAVX] in 4484 def VMOV64toSDrm : VS2SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src), 4485 "movq\t{$src, $dst|$dst, $src}", 4486 [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>, 4487 VEX, Sched<[WriteLoad]>; 4488 def VMOVSDto64rr : VRS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src), 4489 "movq\t{$src, $dst|$dst, $src}", 4490 [(set GR64:$dst, (bitconvert FR64:$src))], 4491 IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>; 4492 def VMOVSDto64mr : VRS2I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src), 4493 "movq\t{$src, $dst|$dst, $src}", 4494 [(store (i64 (bitconvert FR64:$src)), addr:$dst)], 4495 IIC_SSE_MOVDQ>, VEX, Sched<[WriteStore]>; 4496 4497 def MOV64toSDrm : S2SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src), 4498 "movq\t{$src, $dst|$dst, $src}", 4499 [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))], 4500 IIC_SSE_MOVDQ>, Sched<[WriteLoad]>; 4501 def MOVSDto64rr : RS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src), 4502 "mov{d|q}\t{$src, $dst|$dst, $src}", 4503 [(set GR64:$dst, (bitconvert FR64:$src))], 4504 IIC_SSE_MOVD_ToGP>, Sched<[WriteMove]>; 4505 def MOVSDto64mr : RS2I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src), 4506 "movq\t{$src, $dst|$dst, $src}", 4507 [(store (i64 (bitconvert FR64:$src)), addr:$dst)], 4508 IIC_SSE_MOVDQ>, Sched<[WriteStore]>; 4509} 4510 4511//===---------------------------------------------------------------------===// 4512// Move Scalar Single to Double Int 4513// 4514let isCodeGenOnly = 1 in { 4515 def VMOVSS2DIrr : VS2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src), 4516 "movd\t{$src, $dst|$dst, $src}", 4517 [(set GR32:$dst, (bitconvert FR32:$src))], 4518 IIC_SSE_MOVD_ToGP>, VEX, Sched<[WriteMove]>; 4519 def VMOVSS2DImr : VS2I<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src), 4520 "movd\t{$src, $dst|$dst, $src}", 4521 [(store (i32 (bitconvert FR32:$src)), addr:$dst)], 4522 IIC_SSE_MOVDQ>, VEX, Sched<[WriteStore]>; 4523 def MOVSS2DIrr : S2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src), 4524 "movd\t{$src, $dst|$dst, $src}", 4525 [(set GR32:$dst, (bitconvert FR32:$src))], 4526 IIC_SSE_MOVD_ToGP>, Sched<[WriteMove]>; 4527 def MOVSS2DImr : S2I<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src), 4528 "movd\t{$src, $dst|$dst, $src}", 4529 [(store (i32 (bitconvert FR32:$src)), addr:$dst)], 4530 IIC_SSE_MOVDQ>, Sched<[WriteStore]>; 4531} 4532 4533//===---------------------------------------------------------------------===// 4534// Patterns and instructions to describe movd/movq to XMM register zero-extends 4535// 4536let isCodeGenOnly = 1, SchedRW = [WriteMove] in { 4537let AddedComplexity = 15 in { 4538def VMOVZQI2PQIrr : VS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), 4539 "movq\t{$src, $dst|$dst, $src}", // X86-64 only 4540 [(set VR128:$dst, (v2i64 (X86vzmovl 4541 (v2i64 (scalar_to_vector GR64:$src)))))], 4542 IIC_SSE_MOVDQ>, 4543 VEX, VEX_W; 4544def MOVZQI2PQIrr : RS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), 4545 "mov{d|q}\t{$src, $dst|$dst, $src}", // X86-64 only 4546 [(set VR128:$dst, (v2i64 (X86vzmovl 4547 (v2i64 (scalar_to_vector GR64:$src)))))], 4548 IIC_SSE_MOVDQ>; 4549} 4550} // isCodeGenOnly, SchedRW 4551 4552let Predicates = [UseAVX] in { 4553 let AddedComplexity = 15 in 4554 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))), 4555 (VMOVDI2PDIrr GR32:$src)>; 4556 4557 // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part. 4558 let AddedComplexity = 20 in { 4559 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))), 4560 (VMOVDI2PDIrm addr:$src)>; 4561 def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))), 4562 (VMOVDI2PDIrm addr:$src)>; 4563 def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))), 4564 (VMOVDI2PDIrm addr:$src)>; 4565 } 4566 // Use regular 128-bit instructions to match 256-bit scalar_to_vec+zext. 4567 def : Pat<(v8i32 (X86vzmovl (insert_subvector undef, 4568 (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))), 4569 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIrr GR32:$src), sub_xmm)>; 4570 def : Pat<(v4i64 (X86vzmovl (insert_subvector undef, 4571 (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))), 4572 (SUBREG_TO_REG (i64 0), (VMOVZQI2PQIrr GR64:$src), sub_xmm)>; 4573} 4574 4575let Predicates = [UseSSE2] in { 4576 let AddedComplexity = 15 in 4577 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))), 4578 (MOVDI2PDIrr GR32:$src)>; 4579 4580 let AddedComplexity = 20 in { 4581 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))), 4582 (MOVDI2PDIrm addr:$src)>; 4583 def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))), 4584 (MOVDI2PDIrm addr:$src)>; 4585 def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))), 4586 (MOVDI2PDIrm addr:$src)>; 4587 } 4588} 4589 4590// These are the correct encodings of the instructions so that we know how to 4591// read correct assembly, even though we continue to emit the wrong ones for 4592// compatibility with Darwin's buggy assembler. 4593def : InstAlias<"movq\t{$src, $dst|$dst, $src}", 4594 (MOV64toPQIrr VR128:$dst, GR64:$src), 0>; 4595def : InstAlias<"movq\t{$src, $dst|$dst, $src}", 4596 (MOVPQIto64rr GR64:$dst, VR128:$src), 0>; 4597// Allow "vmovd" but print "vmovq" since we don't need compatibility for AVX. 4598def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}", 4599 (VMOV64toPQIrr VR128:$dst, GR64:$src), 0>; 4600def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}", 4601 (VMOVPQIto64rr GR64:$dst, VR128:$src), 0>; 4602 4603//===---------------------------------------------------------------------===// 4604// SSE2 - Move Quadword 4605//===---------------------------------------------------------------------===// 4606 4607//===---------------------------------------------------------------------===// 4608// Move Quadword Int to Packed Quadword Int 4609// 4610 4611let SchedRW = [WriteLoad] in { 4612def VMOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), 4613 "vmovq\t{$src, $dst|$dst, $src}", 4614 [(set VR128:$dst, 4615 (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, XS, 4616 VEX, Requires<[UseAVX]>; 4617def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), 4618 "movq\t{$src, $dst|$dst, $src}", 4619 [(set VR128:$dst, 4620 (v2i64 (scalar_to_vector (loadi64 addr:$src))))], 4621 IIC_SSE_MOVDQ>, XS, 4622 Requires<[UseSSE2]>; // SSE2 instruction with XS Prefix 4623} // SchedRW 4624 4625//===---------------------------------------------------------------------===// 4626// Move Packed Quadword Int to Quadword Int 4627// 4628let SchedRW = [WriteStore] in { 4629def VMOVPQI2QImr : VS2I<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), 4630 "movq\t{$src, $dst|$dst, $src}", 4631 [(store (i64 (vector_extract (v2i64 VR128:$src), 4632 (iPTR 0))), addr:$dst)], 4633 IIC_SSE_MOVDQ>, VEX; 4634def MOVPQI2QImr : S2I<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), 4635 "movq\t{$src, $dst|$dst, $src}", 4636 [(store (i64 (vector_extract (v2i64 VR128:$src), 4637 (iPTR 0))), addr:$dst)], 4638 IIC_SSE_MOVDQ>; 4639} // SchedRW 4640 4641//===---------------------------------------------------------------------===// 4642// Store / copy lower 64-bits of a XMM register. 4643// 4644def VMOVLQ128mr : VS2I<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), 4645 "movq\t{$src, $dst|$dst, $src}", 4646 [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>, VEX, 4647 Sched<[WriteStore]>; 4648def MOVLQ128mr : S2I<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), 4649 "movq\t{$src, $dst|$dst, $src}", 4650 [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)], 4651 IIC_SSE_MOVDQ>, Sched<[WriteStore]>; 4652 4653let isCodeGenOnly = 1, AddedComplexity = 20 in { 4654def VMOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), 4655 "vmovq\t{$src, $dst|$dst, $src}", 4656 [(set VR128:$dst, 4657 (v2i64 (X86vzmovl (v2i64 (scalar_to_vector 4658 (loadi64 addr:$src))))))], 4659 IIC_SSE_MOVDQ>, 4660 XS, VEX, Requires<[UseAVX]>, Sched<[WriteLoad]>; 4661 4662def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), 4663 "movq\t{$src, $dst|$dst, $src}", 4664 [(set VR128:$dst, 4665 (v2i64 (X86vzmovl (v2i64 (scalar_to_vector 4666 (loadi64 addr:$src))))))], 4667 IIC_SSE_MOVDQ>, 4668 XS, Requires<[UseSSE2]>, Sched<[WriteLoad]>; 4669} 4670 4671let Predicates = [UseAVX], AddedComplexity = 20 in { 4672 def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4f32 addr:$src)))), 4673 (VMOVZQI2PQIrm addr:$src)>; 4674 def : Pat<(v2i64 (X86vzload addr:$src)), 4675 (VMOVZQI2PQIrm addr:$src)>; 4676} 4677 4678let Predicates = [UseSSE2], AddedComplexity = 20 in { 4679 def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4f32 addr:$src)))), 4680 (MOVZQI2PQIrm addr:$src)>; 4681 def : Pat<(v2i64 (X86vzload addr:$src)), (MOVZQI2PQIrm addr:$src)>; 4682} 4683 4684let Predicates = [HasAVX] in { 4685def : Pat<(v4i64 (alignedX86vzload addr:$src)), 4686 (SUBREG_TO_REG (i32 0), (VMOVAPSrm addr:$src), sub_xmm)>; 4687def : Pat<(v4i64 (X86vzload addr:$src)), 4688 (SUBREG_TO_REG (i32 0), (VMOVUPSrm addr:$src), sub_xmm)>; 4689} 4690 4691//===---------------------------------------------------------------------===// 4692// Moving from XMM to XMM and clear upper 64 bits. Note, there is a bug in 4693// IA32 document. movq xmm1, xmm2 does clear the high bits. 4694// 4695let SchedRW = [WriteVecLogic] in { 4696let AddedComplexity = 15 in 4697def VMOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 4698 "vmovq\t{$src, $dst|$dst, $src}", 4699 [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))], 4700 IIC_SSE_MOVQ_RR>, 4701 XS, VEX, Requires<[UseAVX]>; 4702let AddedComplexity = 15 in 4703def MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 4704 "movq\t{$src, $dst|$dst, $src}", 4705 [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))], 4706 IIC_SSE_MOVQ_RR>, 4707 XS, Requires<[UseSSE2]>; 4708} // SchedRW 4709 4710let isCodeGenOnly = 1, SchedRW = [WriteVecLogicLd] in { 4711let AddedComplexity = 20 in 4712def VMOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 4713 "vmovq\t{$src, $dst|$dst, $src}", 4714 [(set VR128:$dst, (v2i64 (X86vzmovl 4715 (loadv2i64 addr:$src))))], 4716 IIC_SSE_MOVDQ>, 4717 XS, VEX, Requires<[UseAVX]>; 4718let AddedComplexity = 20 in { 4719def MOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 4720 "movq\t{$src, $dst|$dst, $src}", 4721 [(set VR128:$dst, (v2i64 (X86vzmovl 4722 (loadv2i64 addr:$src))))], 4723 IIC_SSE_MOVDQ>, 4724 XS, Requires<[UseSSE2]>; 4725} 4726} // isCodeGenOnly, SchedRW 4727 4728let AddedComplexity = 20 in { 4729 let Predicates = [UseAVX] in { 4730 def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))), 4731 (VMOVZPQILo2PQIrr VR128:$src)>; 4732 } 4733 let Predicates = [UseSSE2] in { 4734 def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))), 4735 (MOVZPQILo2PQIrr VR128:$src)>; 4736 } 4737} 4738 4739//===---------------------------------------------------------------------===// 4740// SSE3 - Replicate Single FP - MOVSHDUP and MOVSLDUP 4741//===---------------------------------------------------------------------===// 4742multiclass sse3_replicate_sfp<bits<8> op, SDNode OpNode, string OpcodeStr, 4743 ValueType vt, RegisterClass RC, PatFrag mem_frag, 4744 X86MemOperand x86memop> { 4745def rr : S3SI<op, MRMSrcReg, (outs RC:$dst), (ins RC:$src), 4746 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 4747 [(set RC:$dst, (vt (OpNode RC:$src)))], 4748 IIC_SSE_MOV_LH>, Sched<[WriteShuffle]>; 4749def rm : S3SI<op, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), 4750 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 4751 [(set RC:$dst, (OpNode (mem_frag addr:$src)))], 4752 IIC_SSE_MOV_LH>, Sched<[WriteShuffleLd]>; 4753} 4754 4755let Predicates = [HasAVX] in { 4756 defm VMOVSHDUP : sse3_replicate_sfp<0x16, X86Movshdup, "vmovshdup", 4757 v4f32, VR128, loadv4f32, f128mem>, VEX; 4758 defm VMOVSLDUP : sse3_replicate_sfp<0x12, X86Movsldup, "vmovsldup", 4759 v4f32, VR128, loadv4f32, f128mem>, VEX; 4760 defm VMOVSHDUPY : sse3_replicate_sfp<0x16, X86Movshdup, "vmovshdup", 4761 v8f32, VR256, loadv8f32, f256mem>, VEX, VEX_L; 4762 defm VMOVSLDUPY : sse3_replicate_sfp<0x12, X86Movsldup, "vmovsldup", 4763 v8f32, VR256, loadv8f32, f256mem>, VEX, VEX_L; 4764} 4765defm MOVSHDUP : sse3_replicate_sfp<0x16, X86Movshdup, "movshdup", v4f32, VR128, 4766 memopv4f32, f128mem>; 4767defm MOVSLDUP : sse3_replicate_sfp<0x12, X86Movsldup, "movsldup", v4f32, VR128, 4768 memopv4f32, f128mem>; 4769 4770let Predicates = [HasAVX] in { 4771 def : Pat<(v4i32 (X86Movshdup VR128:$src)), 4772 (VMOVSHDUPrr VR128:$src)>; 4773 def : Pat<(v4i32 (X86Movshdup (bc_v4i32 (loadv2i64 addr:$src)))), 4774 (VMOVSHDUPrm addr:$src)>; 4775 def : Pat<(v4i32 (X86Movsldup VR128:$src)), 4776 (VMOVSLDUPrr VR128:$src)>; 4777 def : Pat<(v4i32 (X86Movsldup (bc_v4i32 (loadv2i64 addr:$src)))), 4778 (VMOVSLDUPrm addr:$src)>; 4779 def : Pat<(v8i32 (X86Movshdup VR256:$src)), 4780 (VMOVSHDUPYrr VR256:$src)>; 4781 def : Pat<(v8i32 (X86Movshdup (bc_v8i32 (loadv4i64 addr:$src)))), 4782 (VMOVSHDUPYrm addr:$src)>; 4783 def : Pat<(v8i32 (X86Movsldup VR256:$src)), 4784 (VMOVSLDUPYrr VR256:$src)>; 4785 def : Pat<(v8i32 (X86Movsldup (bc_v8i32 (loadv4i64 addr:$src)))), 4786 (VMOVSLDUPYrm addr:$src)>; 4787} 4788 4789let Predicates = [UseSSE3] in { 4790 def : Pat<(v4i32 (X86Movshdup VR128:$src)), 4791 (MOVSHDUPrr VR128:$src)>; 4792 def : Pat<(v4i32 (X86Movshdup (bc_v4i32 (memopv2i64 addr:$src)))), 4793 (MOVSHDUPrm addr:$src)>; 4794 def : Pat<(v4i32 (X86Movsldup VR128:$src)), 4795 (MOVSLDUPrr VR128:$src)>; 4796 def : Pat<(v4i32 (X86Movsldup (bc_v4i32 (memopv2i64 addr:$src)))), 4797 (MOVSLDUPrm addr:$src)>; 4798} 4799 4800//===---------------------------------------------------------------------===// 4801// SSE3 - Replicate Double FP - MOVDDUP 4802//===---------------------------------------------------------------------===// 4803 4804multiclass sse3_replicate_dfp<string OpcodeStr> { 4805let neverHasSideEffects = 1 in 4806def rr : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 4807 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 4808 [], IIC_SSE_MOV_LH>, Sched<[WriteShuffle]>; 4809def rm : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), 4810 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 4811 [(set VR128:$dst, 4812 (v2f64 (X86Movddup 4813 (scalar_to_vector (loadf64 addr:$src)))))], 4814 IIC_SSE_MOV_LH>, Sched<[WriteShuffleLd]>; 4815} 4816 4817// FIXME: Merge with above classe when there're patterns for the ymm version 4818multiclass sse3_replicate_dfp_y<string OpcodeStr> { 4819def rr : S3DI<0x12, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 4820 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 4821 [(set VR256:$dst, (v4f64 (X86Movddup VR256:$src)))]>, 4822 Sched<[WriteShuffle]>; 4823def rm : S3DI<0x12, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), 4824 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 4825 [(set VR256:$dst, 4826 (v4f64 (X86Movddup 4827 (scalar_to_vector (loadf64 addr:$src)))))]>, 4828 Sched<[WriteShuffleLd]>; 4829} 4830 4831let Predicates = [HasAVX] in { 4832 defm VMOVDDUP : sse3_replicate_dfp<"vmovddup">, VEX; 4833 defm VMOVDDUPY : sse3_replicate_dfp_y<"vmovddup">, VEX, VEX_L; 4834} 4835 4836defm MOVDDUP : sse3_replicate_dfp<"movddup">; 4837 4838let Predicates = [HasAVX] in { 4839 def : Pat<(X86Movddup (loadv2f64 addr:$src)), 4840 (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; 4841 def : Pat<(X86Movddup (bc_v2f64 (loadv4f32 addr:$src))), 4842 (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; 4843 def : Pat<(X86Movddup (bc_v2f64 (loadv2i64 addr:$src))), 4844 (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; 4845 def : Pat<(X86Movddup (bc_v2f64 4846 (v2i64 (scalar_to_vector (loadi64 addr:$src))))), 4847 (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; 4848 4849 // 256-bit version 4850 def : Pat<(X86Movddup (loadv4f64 addr:$src)), 4851 (VMOVDDUPYrm addr:$src)>; 4852 def : Pat<(X86Movddup (loadv4i64 addr:$src)), 4853 (VMOVDDUPYrm addr:$src)>; 4854 def : Pat<(X86Movddup (v4i64 (scalar_to_vector (loadi64 addr:$src)))), 4855 (VMOVDDUPYrm addr:$src)>; 4856 def : Pat<(X86Movddup (v4i64 VR256:$src)), 4857 (VMOVDDUPYrr VR256:$src)>; 4858} 4859 4860let Predicates = [UseSSE3] in { 4861 def : Pat<(X86Movddup (memopv2f64 addr:$src)), 4862 (MOVDDUPrm addr:$src)>; 4863 def : Pat<(X86Movddup (bc_v2f64 (memopv4f32 addr:$src))), 4864 (MOVDDUPrm addr:$src)>; 4865 def : Pat<(X86Movddup (bc_v2f64 (memopv2i64 addr:$src))), 4866 (MOVDDUPrm addr:$src)>; 4867 def : Pat<(X86Movddup (bc_v2f64 4868 (v2i64 (scalar_to_vector (loadi64 addr:$src))))), 4869 (MOVDDUPrm addr:$src)>; 4870} 4871 4872//===---------------------------------------------------------------------===// 4873// SSE3 - Move Unaligned Integer 4874//===---------------------------------------------------------------------===// 4875 4876let SchedRW = [WriteLoad] in { 4877let Predicates = [HasAVX] in { 4878 def VLDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 4879 "vlddqu\t{$src, $dst|$dst, $src}", 4880 [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>, VEX; 4881 def VLDDQUYrm : S3DI<0xF0, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), 4882 "vlddqu\t{$src, $dst|$dst, $src}", 4883 [(set VR256:$dst, (int_x86_avx_ldu_dq_256 addr:$src))]>, 4884 VEX, VEX_L; 4885} 4886def LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 4887 "lddqu\t{$src, $dst|$dst, $src}", 4888 [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))], 4889 IIC_SSE_LDDQU>; 4890} 4891 4892//===---------------------------------------------------------------------===// 4893// SSE3 - Arithmetic 4894//===---------------------------------------------------------------------===// 4895 4896multiclass sse3_addsub<Intrinsic Int, string OpcodeStr, RegisterClass RC, 4897 X86MemOperand x86memop, OpndItins itins, 4898 bit Is2Addr = 1> { 4899 def rr : I<0xD0, MRMSrcReg, 4900 (outs RC:$dst), (ins RC:$src1, RC:$src2), 4901 !if(Is2Addr, 4902 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 4903 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 4904 [(set RC:$dst, (Int RC:$src1, RC:$src2))], itins.rr>, 4905 Sched<[itins.Sched]>; 4906 def rm : I<0xD0, MRMSrcMem, 4907 (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 4908 !if(Is2Addr, 4909 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 4910 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 4911 [(set RC:$dst, (Int RC:$src1, (memop addr:$src2)))], itins.rr>, 4912 Sched<[itins.Sched.Folded, ReadAfterLd]>; 4913} 4914 4915let Predicates = [HasAVX] in { 4916 let ExeDomain = SSEPackedSingle in { 4917 defm VADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "vaddsubps", VR128, 4918 f128mem, SSE_ALU_F32P, 0>, TB, XD, VEX_4V; 4919 defm VADDSUBPSY : sse3_addsub<int_x86_avx_addsub_ps_256, "vaddsubps", VR256, 4920 f256mem, SSE_ALU_F32P, 0>, TB, XD, VEX_4V, VEX_L; 4921 } 4922 let ExeDomain = SSEPackedDouble in { 4923 defm VADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "vaddsubpd", VR128, 4924 f128mem, SSE_ALU_F64P, 0>, TB, OpSize, VEX_4V; 4925 defm VADDSUBPDY : sse3_addsub<int_x86_avx_addsub_pd_256, "vaddsubpd", VR256, 4926 f256mem, SSE_ALU_F64P, 0>, TB, OpSize, VEX_4V, VEX_L; 4927 } 4928} 4929let Constraints = "$src1 = $dst", Predicates = [UseSSE3] in { 4930 let ExeDomain = SSEPackedSingle in 4931 defm ADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "addsubps", VR128, 4932 f128mem, SSE_ALU_F32P>, TB, XD; 4933 let ExeDomain = SSEPackedDouble in 4934 defm ADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "addsubpd", VR128, 4935 f128mem, SSE_ALU_F64P>, TB, OpSize; 4936} 4937 4938//===---------------------------------------------------------------------===// 4939// SSE3 Instructions 4940//===---------------------------------------------------------------------===// 4941 4942// Horizontal ops 4943multiclass S3D_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC, 4944 X86MemOperand x86memop, SDNode OpNode, bit Is2Addr = 1> { 4945 def rr : S3DI<o, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), 4946 !if(Is2Addr, 4947 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 4948 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 4949 [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR>, 4950 Sched<[WriteFAdd]>; 4951 4952 def rm : S3DI<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 4953 !if(Is2Addr, 4954 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 4955 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 4956 [(set RC:$dst, (vt (OpNode RC:$src1, (memop addr:$src2))))], 4957 IIC_SSE_HADDSUB_RM>, Sched<[WriteFAddLd, ReadAfterLd]>; 4958} 4959multiclass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC, 4960 X86MemOperand x86memop, SDNode OpNode, bit Is2Addr = 1> { 4961 def rr : S3I<o, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), 4962 !if(Is2Addr, 4963 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 4964 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 4965 [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR>, 4966 Sched<[WriteFAdd]>; 4967 4968 def rm : S3I<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 4969 !if(Is2Addr, 4970 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 4971 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 4972 [(set RC:$dst, (vt (OpNode RC:$src1, (memop addr:$src2))))], 4973 IIC_SSE_HADDSUB_RM>, Sched<[WriteFAddLd, ReadAfterLd]>; 4974} 4975 4976let Predicates = [HasAVX] in { 4977 let ExeDomain = SSEPackedSingle in { 4978 defm VHADDPS : S3D_Int<0x7C, "vhaddps", v4f32, VR128, f128mem, 4979 X86fhadd, 0>, VEX_4V; 4980 defm VHSUBPS : S3D_Int<0x7D, "vhsubps", v4f32, VR128, f128mem, 4981 X86fhsub, 0>, VEX_4V; 4982 defm VHADDPSY : S3D_Int<0x7C, "vhaddps", v8f32, VR256, f256mem, 4983 X86fhadd, 0>, VEX_4V, VEX_L; 4984 defm VHSUBPSY : S3D_Int<0x7D, "vhsubps", v8f32, VR256, f256mem, 4985 X86fhsub, 0>, VEX_4V, VEX_L; 4986 } 4987 let ExeDomain = SSEPackedDouble in { 4988 defm VHADDPD : S3_Int <0x7C, "vhaddpd", v2f64, VR128, f128mem, 4989 X86fhadd, 0>, VEX_4V; 4990 defm VHSUBPD : S3_Int <0x7D, "vhsubpd", v2f64, VR128, f128mem, 4991 X86fhsub, 0>, VEX_4V; 4992 defm VHADDPDY : S3_Int <0x7C, "vhaddpd", v4f64, VR256, f256mem, 4993 X86fhadd, 0>, VEX_4V, VEX_L; 4994 defm VHSUBPDY : S3_Int <0x7D, "vhsubpd", v4f64, VR256, f256mem, 4995 X86fhsub, 0>, VEX_4V, VEX_L; 4996 } 4997} 4998 4999let Constraints = "$src1 = $dst" in { 5000 let ExeDomain = SSEPackedSingle in { 5001 defm HADDPS : S3D_Int<0x7C, "haddps", v4f32, VR128, f128mem, X86fhadd>; 5002 defm HSUBPS : S3D_Int<0x7D, "hsubps", v4f32, VR128, f128mem, X86fhsub>; 5003 } 5004 let ExeDomain = SSEPackedDouble in { 5005 defm HADDPD : S3_Int<0x7C, "haddpd", v2f64, VR128, f128mem, X86fhadd>; 5006 defm HSUBPD : S3_Int<0x7D, "hsubpd", v2f64, VR128, f128mem, X86fhsub>; 5007 } 5008} 5009 5010//===---------------------------------------------------------------------===// 5011// SSSE3 - Packed Absolute Instructions 5012//===---------------------------------------------------------------------===// 5013 5014 5015/// SS3I_unop_rm_int - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}. 5016multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr, 5017 Intrinsic IntId128> { 5018 def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst), 5019 (ins VR128:$src), 5020 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5021 [(set VR128:$dst, (IntId128 VR128:$src))], IIC_SSE_PABS_RR>, 5022 OpSize, Sched<[WriteVecALU]>; 5023 5024 def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst), 5025 (ins i128mem:$src), 5026 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5027 [(set VR128:$dst, 5028 (IntId128 5029 (bitconvert (memopv2i64 addr:$src))))], IIC_SSE_PABS_RM>, 5030 OpSize, Sched<[WriteVecALULd]>; 5031} 5032 5033/// SS3I_unop_rm_int_y - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}. 5034multiclass SS3I_unop_rm_int_y<bits<8> opc, string OpcodeStr, 5035 Intrinsic IntId256> { 5036 def rr256 : SS38I<opc, MRMSrcReg, (outs VR256:$dst), 5037 (ins VR256:$src), 5038 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5039 [(set VR256:$dst, (IntId256 VR256:$src))]>, 5040 OpSize, Sched<[WriteVecALU]>; 5041 5042 def rm256 : SS38I<opc, MRMSrcMem, (outs VR256:$dst), 5043 (ins i256mem:$src), 5044 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5045 [(set VR256:$dst, 5046 (IntId256 5047 (bitconvert (memopv4i64 addr:$src))))]>, OpSize, 5048 Sched<[WriteVecALULd]>; 5049} 5050 5051// Helper fragments to match sext vXi1 to vXiY. 5052def v16i1sextv16i8 : PatLeaf<(v16i8 (X86pcmpgt (bc_v16i8 (v4i32 immAllZerosV)), 5053 VR128:$src))>; 5054def v8i1sextv8i16 : PatLeaf<(v8i16 (X86vsrai VR128:$src, (i8 15)))>; 5055def v4i1sextv4i32 : PatLeaf<(v4i32 (X86vsrai VR128:$src, (i8 31)))>; 5056def v32i1sextv32i8 : PatLeaf<(v32i8 (X86pcmpgt (bc_v32i8 (v8i32 immAllZerosV)), 5057 VR256:$src))>; 5058def v16i1sextv16i16: PatLeaf<(v16i16 (X86vsrai VR256:$src, (i8 15)))>; 5059def v8i1sextv8i32 : PatLeaf<(v8i32 (X86vsrai VR256:$src, (i8 31)))>; 5060 5061let Predicates = [HasAVX] in { 5062 defm VPABSB : SS3I_unop_rm_int<0x1C, "vpabsb", 5063 int_x86_ssse3_pabs_b_128>, VEX; 5064 defm VPABSW : SS3I_unop_rm_int<0x1D, "vpabsw", 5065 int_x86_ssse3_pabs_w_128>, VEX; 5066 defm VPABSD : SS3I_unop_rm_int<0x1E, "vpabsd", 5067 int_x86_ssse3_pabs_d_128>, VEX; 5068 5069 def : Pat<(xor 5070 (bc_v2i64 (v16i1sextv16i8)), 5071 (bc_v2i64 (add (v16i8 VR128:$src), (v16i1sextv16i8)))), 5072 (VPABSBrr128 VR128:$src)>; 5073 def : Pat<(xor 5074 (bc_v2i64 (v8i1sextv8i16)), 5075 (bc_v2i64 (add (v8i16 VR128:$src), (v8i1sextv8i16)))), 5076 (VPABSWrr128 VR128:$src)>; 5077 def : Pat<(xor 5078 (bc_v2i64 (v4i1sextv4i32)), 5079 (bc_v2i64 (add (v4i32 VR128:$src), (v4i1sextv4i32)))), 5080 (VPABSDrr128 VR128:$src)>; 5081} 5082 5083let Predicates = [HasAVX2] in { 5084 defm VPABSB : SS3I_unop_rm_int_y<0x1C, "vpabsb", 5085 int_x86_avx2_pabs_b>, VEX, VEX_L; 5086 defm VPABSW : SS3I_unop_rm_int_y<0x1D, "vpabsw", 5087 int_x86_avx2_pabs_w>, VEX, VEX_L; 5088 defm VPABSD : SS3I_unop_rm_int_y<0x1E, "vpabsd", 5089 int_x86_avx2_pabs_d>, VEX, VEX_L; 5090 5091 def : Pat<(xor 5092 (bc_v4i64 (v32i1sextv32i8)), 5093 (bc_v4i64 (add (v32i8 VR256:$src), (v32i1sextv32i8)))), 5094 (VPABSBrr256 VR256:$src)>; 5095 def : Pat<(xor 5096 (bc_v4i64 (v16i1sextv16i16)), 5097 (bc_v4i64 (add (v16i16 VR256:$src), (v16i1sextv16i16)))), 5098 (VPABSWrr256 VR256:$src)>; 5099 def : Pat<(xor 5100 (bc_v4i64 (v8i1sextv8i32)), 5101 (bc_v4i64 (add (v8i32 VR256:$src), (v8i1sextv8i32)))), 5102 (VPABSDrr256 VR256:$src)>; 5103} 5104 5105defm PABSB : SS3I_unop_rm_int<0x1C, "pabsb", 5106 int_x86_ssse3_pabs_b_128>; 5107defm PABSW : SS3I_unop_rm_int<0x1D, "pabsw", 5108 int_x86_ssse3_pabs_w_128>; 5109defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd", 5110 int_x86_ssse3_pabs_d_128>; 5111 5112let Predicates = [HasSSSE3] in { 5113 def : Pat<(xor 5114 (bc_v2i64 (v16i1sextv16i8)), 5115 (bc_v2i64 (add (v16i8 VR128:$src), (v16i1sextv16i8)))), 5116 (PABSBrr128 VR128:$src)>; 5117 def : Pat<(xor 5118 (bc_v2i64 (v8i1sextv8i16)), 5119 (bc_v2i64 (add (v8i16 VR128:$src), (v8i1sextv8i16)))), 5120 (PABSWrr128 VR128:$src)>; 5121 def : Pat<(xor 5122 (bc_v2i64 (v4i1sextv4i32)), 5123 (bc_v2i64 (add (v4i32 VR128:$src), (v4i1sextv4i32)))), 5124 (PABSDrr128 VR128:$src)>; 5125} 5126 5127//===---------------------------------------------------------------------===// 5128// SSSE3 - Packed Binary Operator Instructions 5129//===---------------------------------------------------------------------===// 5130 5131let Sched = WriteVecALU in { 5132def SSE_PHADDSUBD : OpndItins< 5133 IIC_SSE_PHADDSUBD_RR, IIC_SSE_PHADDSUBD_RM 5134>; 5135def SSE_PHADDSUBSW : OpndItins< 5136 IIC_SSE_PHADDSUBSW_RR, IIC_SSE_PHADDSUBSW_RM 5137>; 5138def SSE_PHADDSUBW : OpndItins< 5139 IIC_SSE_PHADDSUBW_RR, IIC_SSE_PHADDSUBW_RM 5140>; 5141} 5142let Sched = WriteShuffle in 5143def SSE_PSHUFB : OpndItins< 5144 IIC_SSE_PSHUFB_RR, IIC_SSE_PSHUFB_RM 5145>; 5146let Sched = WriteVecALU in 5147def SSE_PSIGN : OpndItins< 5148 IIC_SSE_PSIGN_RR, IIC_SSE_PSIGN_RM 5149>; 5150let Sched = WriteVecIMul in 5151def SSE_PMULHRSW : OpndItins< 5152 IIC_SSE_PMULHRSW, IIC_SSE_PMULHRSW 5153>; 5154 5155/// SS3I_binop_rm - Simple SSSE3 bin op 5156multiclass SS3I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 5157 ValueType OpVT, RegisterClass RC, PatFrag memop_frag, 5158 X86MemOperand x86memop, OpndItins itins, 5159 bit Is2Addr = 1> { 5160 let isCommutable = 1 in 5161 def rr : SS38I<opc, MRMSrcReg, (outs RC:$dst), 5162 (ins RC:$src1, RC:$src2), 5163 !if(Is2Addr, 5164 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 5165 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 5166 [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))], itins.rr>, 5167 OpSize, Sched<[itins.Sched]>; 5168 def rm : SS38I<opc, MRMSrcMem, (outs RC:$dst), 5169 (ins RC:$src1, x86memop:$src2), 5170 !if(Is2Addr, 5171 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 5172 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 5173 [(set RC:$dst, 5174 (OpVT (OpNode RC:$src1, 5175 (bitconvert (memop_frag addr:$src2)))))], itins.rm>, OpSize, 5176 Sched<[itins.Sched.Folded, ReadAfterLd]>; 5177} 5178 5179/// SS3I_binop_rm_int - Simple SSSE3 bin op whose type can be v*{i8,i16,i32}. 5180multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr, 5181 Intrinsic IntId128, OpndItins itins, 5182 bit Is2Addr = 1> { 5183 let isCommutable = 1 in 5184 def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst), 5185 (ins VR128:$src1, VR128:$src2), 5186 !if(Is2Addr, 5187 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 5188 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 5189 [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>, 5190 OpSize, Sched<[itins.Sched]>; 5191 def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst), 5192 (ins VR128:$src1, i128mem:$src2), 5193 !if(Is2Addr, 5194 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 5195 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 5196 [(set VR128:$dst, 5197 (IntId128 VR128:$src1, 5198 (bitconvert (memopv2i64 addr:$src2))))]>, OpSize, 5199 Sched<[itins.Sched.Folded, ReadAfterLd]>; 5200} 5201 5202multiclass SS3I_binop_rm_int_y<bits<8> opc, string OpcodeStr, 5203 Intrinsic IntId256> { 5204 let isCommutable = 1 in 5205 def rr256 : SS38I<opc, MRMSrcReg, (outs VR256:$dst), 5206 (ins VR256:$src1, VR256:$src2), 5207 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 5208 [(set VR256:$dst, (IntId256 VR256:$src1, VR256:$src2))]>, 5209 OpSize; 5210 def rm256 : SS38I<opc, MRMSrcMem, (outs VR256:$dst), 5211 (ins VR256:$src1, i256mem:$src2), 5212 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 5213 [(set VR256:$dst, 5214 (IntId256 VR256:$src1, 5215 (bitconvert (loadv4i64 addr:$src2))))]>, OpSize; 5216} 5217 5218let ImmT = NoImm, Predicates = [HasAVX] in { 5219let isCommutable = 0 in { 5220 defm VPHADDW : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v8i16, VR128, 5221 loadv2i64, i128mem, 5222 SSE_PHADDSUBW, 0>, VEX_4V; 5223 defm VPHADDD : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v4i32, VR128, 5224 loadv2i64, i128mem, 5225 SSE_PHADDSUBD, 0>, VEX_4V; 5226 defm VPHSUBW : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v8i16, VR128, 5227 loadv2i64, i128mem, 5228 SSE_PHADDSUBW, 0>, VEX_4V; 5229 defm VPHSUBD : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v4i32, VR128, 5230 loadv2i64, i128mem, 5231 SSE_PHADDSUBD, 0>, VEX_4V; 5232 defm VPSIGNB : SS3I_binop_rm<0x08, "vpsignb", X86psign, v16i8, VR128, 5233 loadv2i64, i128mem, 5234 SSE_PSIGN, 0>, VEX_4V; 5235 defm VPSIGNW : SS3I_binop_rm<0x09, "vpsignw", X86psign, v8i16, VR128, 5236 loadv2i64, i128mem, 5237 SSE_PSIGN, 0>, VEX_4V; 5238 defm VPSIGND : SS3I_binop_rm<0x0A, "vpsignd", X86psign, v4i32, VR128, 5239 loadv2i64, i128mem, 5240 SSE_PSIGN, 0>, VEX_4V; 5241 defm VPSHUFB : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v16i8, VR128, 5242 loadv2i64, i128mem, 5243 SSE_PSHUFB, 0>, VEX_4V; 5244 defm VPHADDSW : SS3I_binop_rm_int<0x03, "vphaddsw", 5245 int_x86_ssse3_phadd_sw_128, 5246 SSE_PHADDSUBSW, 0>, VEX_4V; 5247 defm VPHSUBSW : SS3I_binop_rm_int<0x07, "vphsubsw", 5248 int_x86_ssse3_phsub_sw_128, 5249 SSE_PHADDSUBSW, 0>, VEX_4V; 5250 defm VPMADDUBSW : SS3I_binop_rm_int<0x04, "vpmaddubsw", 5251 int_x86_ssse3_pmadd_ub_sw_128, 5252 SSE_PMADD, 0>, VEX_4V; 5253} 5254defm VPMULHRSW : SS3I_binop_rm_int<0x0B, "vpmulhrsw", 5255 int_x86_ssse3_pmul_hr_sw_128, 5256 SSE_PMULHRSW, 0>, VEX_4V; 5257} 5258 5259let ImmT = NoImm, Predicates = [HasAVX2] in { 5260let isCommutable = 0 in { 5261 defm VPHADDWY : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v16i16, VR256, 5262 loadv4i64, i256mem, 5263 SSE_PHADDSUBW, 0>, VEX_4V, VEX_L; 5264 defm VPHADDDY : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v8i32, VR256, 5265 loadv4i64, i256mem, 5266 SSE_PHADDSUBW, 0>, VEX_4V, VEX_L; 5267 defm VPHSUBWY : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v16i16, VR256, 5268 loadv4i64, i256mem, 5269 SSE_PHADDSUBW, 0>, VEX_4V, VEX_L; 5270 defm VPHSUBDY : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v8i32, VR256, 5271 loadv4i64, i256mem, 5272 SSE_PHADDSUBW, 0>, VEX_4V, VEX_L; 5273 defm VPSIGNBY : SS3I_binop_rm<0x08, "vpsignb", X86psign, v32i8, VR256, 5274 loadv4i64, i256mem, 5275 SSE_PHADDSUBW, 0>, VEX_4V, VEX_L; 5276 defm VPSIGNWY : SS3I_binop_rm<0x09, "vpsignw", X86psign, v16i16, VR256, 5277 loadv4i64, i256mem, 5278 SSE_PHADDSUBW, 0>, VEX_4V, VEX_L; 5279 defm VPSIGNDY : SS3I_binop_rm<0x0A, "vpsignd", X86psign, v8i32, VR256, 5280 loadv4i64, i256mem, 5281 SSE_PHADDSUBW, 0>, VEX_4V, VEX_L; 5282 defm VPSHUFBY : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v32i8, VR256, 5283 loadv4i64, i256mem, 5284 SSE_PHADDSUBW, 0>, VEX_4V, VEX_L; 5285 defm VPHADDSW : SS3I_binop_rm_int_y<0x03, "vphaddsw", 5286 int_x86_avx2_phadd_sw>, VEX_4V, VEX_L; 5287 defm VPHSUBSW : SS3I_binop_rm_int_y<0x07, "vphsubsw", 5288 int_x86_avx2_phsub_sw>, VEX_4V, VEX_L; 5289 defm VPMADDUBSW : SS3I_binop_rm_int_y<0x04, "vpmaddubsw", 5290 int_x86_avx2_pmadd_ub_sw>, VEX_4V, VEX_L; 5291} 5292defm VPMULHRSW : SS3I_binop_rm_int_y<0x0B, "vpmulhrsw", 5293 int_x86_avx2_pmul_hr_sw>, VEX_4V, VEX_L; 5294} 5295 5296// None of these have i8 immediate fields. 5297let ImmT = NoImm, Constraints = "$src1 = $dst" in { 5298let isCommutable = 0 in { 5299 defm PHADDW : SS3I_binop_rm<0x01, "phaddw", X86hadd, v8i16, VR128, 5300 memopv2i64, i128mem, SSE_PHADDSUBW>; 5301 defm PHADDD : SS3I_binop_rm<0x02, "phaddd", X86hadd, v4i32, VR128, 5302 memopv2i64, i128mem, SSE_PHADDSUBD>; 5303 defm PHSUBW : SS3I_binop_rm<0x05, "phsubw", X86hsub, v8i16, VR128, 5304 memopv2i64, i128mem, SSE_PHADDSUBW>; 5305 defm PHSUBD : SS3I_binop_rm<0x06, "phsubd", X86hsub, v4i32, VR128, 5306 memopv2i64, i128mem, SSE_PHADDSUBD>; 5307 defm PSIGNB : SS3I_binop_rm<0x08, "psignb", X86psign, v16i8, VR128, 5308 memopv2i64, i128mem, SSE_PSIGN>; 5309 defm PSIGNW : SS3I_binop_rm<0x09, "psignw", X86psign, v8i16, VR128, 5310 memopv2i64, i128mem, SSE_PSIGN>; 5311 defm PSIGND : SS3I_binop_rm<0x0A, "psignd", X86psign, v4i32, VR128, 5312 memopv2i64, i128mem, SSE_PSIGN>; 5313 defm PSHUFB : SS3I_binop_rm<0x00, "pshufb", X86pshufb, v16i8, VR128, 5314 memopv2i64, i128mem, SSE_PSHUFB>; 5315 defm PHADDSW : SS3I_binop_rm_int<0x03, "phaddsw", 5316 int_x86_ssse3_phadd_sw_128, 5317 SSE_PHADDSUBSW>; 5318 defm PHSUBSW : SS3I_binop_rm_int<0x07, "phsubsw", 5319 int_x86_ssse3_phsub_sw_128, 5320 SSE_PHADDSUBSW>; 5321 defm PMADDUBSW : SS3I_binop_rm_int<0x04, "pmaddubsw", 5322 int_x86_ssse3_pmadd_ub_sw_128, SSE_PMADD>; 5323} 5324defm PMULHRSW : SS3I_binop_rm_int<0x0B, "pmulhrsw", 5325 int_x86_ssse3_pmul_hr_sw_128, 5326 SSE_PMULHRSW>; 5327} 5328 5329//===---------------------------------------------------------------------===// 5330// SSSE3 - Packed Align Instruction Patterns 5331//===---------------------------------------------------------------------===// 5332 5333multiclass ssse3_palignr<string asm, bit Is2Addr = 1> { 5334 let neverHasSideEffects = 1 in { 5335 def R128rr : SS3AI<0x0F, MRMSrcReg, (outs VR128:$dst), 5336 (ins VR128:$src1, VR128:$src2, i8imm:$src3), 5337 !if(Is2Addr, 5338 !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 5339 !strconcat(asm, 5340 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 5341 [], IIC_SSE_PALIGNRR>, OpSize, Sched<[WriteShuffle]>; 5342 let mayLoad = 1 in 5343 def R128rm : SS3AI<0x0F, MRMSrcMem, (outs VR128:$dst), 5344 (ins VR128:$src1, i128mem:$src2, i8imm:$src3), 5345 !if(Is2Addr, 5346 !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 5347 !strconcat(asm, 5348 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 5349 [], IIC_SSE_PALIGNRM>, OpSize, Sched<[WriteShuffleLd, ReadAfterLd]>; 5350 } 5351} 5352 5353multiclass ssse3_palignr_y<string asm, bit Is2Addr = 1> { 5354 let neverHasSideEffects = 1 in { 5355 def R256rr : SS3AI<0x0F, MRMSrcReg, (outs VR256:$dst), 5356 (ins VR256:$src1, VR256:$src2, i8imm:$src3), 5357 !strconcat(asm, 5358 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 5359 []>, OpSize, Sched<[WriteShuffle]>; 5360 let mayLoad = 1 in 5361 def R256rm : SS3AI<0x0F, MRMSrcMem, (outs VR256:$dst), 5362 (ins VR256:$src1, i256mem:$src2, i8imm:$src3), 5363 !strconcat(asm, 5364 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 5365 []>, OpSize, Sched<[WriteShuffleLd, ReadAfterLd]>; 5366 } 5367} 5368 5369let Predicates = [HasAVX] in 5370 defm VPALIGN : ssse3_palignr<"vpalignr", 0>, VEX_4V; 5371let Predicates = [HasAVX2] in 5372 defm VPALIGN : ssse3_palignr_y<"vpalignr", 0>, VEX_4V, VEX_L; 5373let Constraints = "$src1 = $dst", Predicates = [UseSSSE3] in 5374 defm PALIGN : ssse3_palignr<"palignr">; 5375 5376let Predicates = [HasAVX2] in { 5377def : Pat<(v8i32 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))), 5378 (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>; 5379def : Pat<(v8f32 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))), 5380 (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>; 5381def : Pat<(v16i16 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))), 5382 (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>; 5383def : Pat<(v32i8 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))), 5384 (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>; 5385} 5386 5387let Predicates = [HasAVX] in { 5388def : Pat<(v4i32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))), 5389 (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; 5390def : Pat<(v4f32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))), 5391 (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; 5392def : Pat<(v8i16 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))), 5393 (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; 5394def : Pat<(v16i8 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))), 5395 (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; 5396} 5397 5398let Predicates = [UseSSSE3] in { 5399def : Pat<(v4i32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))), 5400 (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; 5401def : Pat<(v4f32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))), 5402 (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; 5403def : Pat<(v8i16 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))), 5404 (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; 5405def : Pat<(v16i8 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))), 5406 (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; 5407} 5408 5409//===---------------------------------------------------------------------===// 5410// SSSE3 - Thread synchronization 5411//===---------------------------------------------------------------------===// 5412 5413let SchedRW = [WriteSystem] in { 5414let usesCustomInserter = 1 in { 5415def MONITOR : PseudoI<(outs), (ins i32mem:$src1, GR32:$src2, GR32:$src3), 5416 [(int_x86_sse3_monitor addr:$src1, GR32:$src2, GR32:$src3)]>, 5417 Requires<[HasSSE3]>; 5418} 5419 5420let Uses = [EAX, ECX, EDX] in 5421def MONITORrrr : I<0x01, MRM_C8, (outs), (ins), "monitor", [], IIC_SSE_MONITOR>, 5422 TB, Requires<[HasSSE3]>; 5423let Uses = [ECX, EAX] in 5424def MWAITrr : I<0x01, MRM_C9, (outs), (ins), "mwait", 5425 [(int_x86_sse3_mwait ECX, EAX)], IIC_SSE_MWAIT>, 5426 TB, Requires<[HasSSE3]>; 5427} // SchedRW 5428 5429def : InstAlias<"mwait\t{%eax, %ecx|ecx, eax}", (MWAITrr)>, Requires<[In32BitMode]>; 5430def : InstAlias<"mwait\t{%rax, %rcx|rcx, rax}", (MWAITrr)>, Requires<[In64BitMode]>; 5431 5432def : InstAlias<"monitor\t{%eax, %ecx, %edx|edx, ecx, eax}", (MONITORrrr)>, 5433 Requires<[In32BitMode]>; 5434def : InstAlias<"monitor\t{%rax, %rcx, %rdx|rdx, rcx, rax}", (MONITORrrr)>, 5435 Requires<[In64BitMode]>; 5436 5437//===----------------------------------------------------------------------===// 5438// SSE4.1 - Packed Move with Sign/Zero Extend 5439//===----------------------------------------------------------------------===// 5440 5441multiclass SS41I_binop_rm_int8<bits<8> opc, string OpcodeStr, Intrinsic IntId, 5442 OpndItins itins = DEFAULT_ITINS> { 5443 def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 5444 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5445 [(set VR128:$dst, (IntId VR128:$src))], itins.rr>, OpSize; 5446 5447 def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), 5448 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5449 [(set VR128:$dst, 5450 (IntId (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))], 5451 itins.rm>, OpSize; 5452} 5453 5454multiclass SS41I_binop_rm_int16_y<bits<8> opc, string OpcodeStr, 5455 Intrinsic IntId> { 5456 def Yrr : SS48I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), 5457 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5458 [(set VR256:$dst, (IntId VR128:$src))]>, OpSize; 5459 5460 def Yrm : SS48I<opc, MRMSrcMem, (outs VR256:$dst), (ins i128mem:$src), 5461 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5462 [(set VR256:$dst, (IntId (load addr:$src)))]>, 5463 OpSize; 5464} 5465 5466let Predicates = [HasAVX] in { 5467defm VPMOVSXBW : SS41I_binop_rm_int8<0x20, "vpmovsxbw", 5468 int_x86_sse41_pmovsxbw>, VEX; 5469defm VPMOVSXWD : SS41I_binop_rm_int8<0x23, "vpmovsxwd", 5470 int_x86_sse41_pmovsxwd>, VEX; 5471defm VPMOVSXDQ : SS41I_binop_rm_int8<0x25, "vpmovsxdq", 5472 int_x86_sse41_pmovsxdq>, VEX; 5473defm VPMOVZXBW : SS41I_binop_rm_int8<0x30, "vpmovzxbw", 5474 int_x86_sse41_pmovzxbw>, VEX; 5475defm VPMOVZXWD : SS41I_binop_rm_int8<0x33, "vpmovzxwd", 5476 int_x86_sse41_pmovzxwd>, VEX; 5477defm VPMOVZXDQ : SS41I_binop_rm_int8<0x35, "vpmovzxdq", 5478 int_x86_sse41_pmovzxdq>, VEX; 5479} 5480 5481let Predicates = [HasAVX2] in { 5482defm VPMOVSXBW : SS41I_binop_rm_int16_y<0x20, "vpmovsxbw", 5483 int_x86_avx2_pmovsxbw>, VEX, VEX_L; 5484defm VPMOVSXWD : SS41I_binop_rm_int16_y<0x23, "vpmovsxwd", 5485 int_x86_avx2_pmovsxwd>, VEX, VEX_L; 5486defm VPMOVSXDQ : SS41I_binop_rm_int16_y<0x25, "vpmovsxdq", 5487 int_x86_avx2_pmovsxdq>, VEX, VEX_L; 5488defm VPMOVZXBW : SS41I_binop_rm_int16_y<0x30, "vpmovzxbw", 5489 int_x86_avx2_pmovzxbw>, VEX, VEX_L; 5490defm VPMOVZXWD : SS41I_binop_rm_int16_y<0x33, "vpmovzxwd", 5491 int_x86_avx2_pmovzxwd>, VEX, VEX_L; 5492defm VPMOVZXDQ : SS41I_binop_rm_int16_y<0x35, "vpmovzxdq", 5493 int_x86_avx2_pmovzxdq>, VEX, VEX_L; 5494} 5495 5496defm PMOVSXBW : SS41I_binop_rm_int8<0x20, "pmovsxbw", int_x86_sse41_pmovsxbw, SSE_INTALU_ITINS_P>; 5497defm PMOVSXWD : SS41I_binop_rm_int8<0x23, "pmovsxwd", int_x86_sse41_pmovsxwd, SSE_INTALU_ITINS_P>; 5498defm PMOVSXDQ : SS41I_binop_rm_int8<0x25, "pmovsxdq", int_x86_sse41_pmovsxdq, SSE_INTALU_ITINS_P>; 5499defm PMOVZXBW : SS41I_binop_rm_int8<0x30, "pmovzxbw", int_x86_sse41_pmovzxbw, SSE_INTALU_ITINS_P>; 5500defm PMOVZXWD : SS41I_binop_rm_int8<0x33, "pmovzxwd", int_x86_sse41_pmovzxwd, SSE_INTALU_ITINS_P>; 5501defm PMOVZXDQ : SS41I_binop_rm_int8<0x35, "pmovzxdq", int_x86_sse41_pmovzxdq, SSE_INTALU_ITINS_P>; 5502 5503let Predicates = [HasAVX] in { 5504 // Common patterns involving scalar load. 5505 def : Pat<(int_x86_sse41_pmovsxbw (vzmovl_v2i64 addr:$src)), 5506 (VPMOVSXBWrm addr:$src)>; 5507 def : Pat<(int_x86_sse41_pmovsxbw (vzload_v2i64 addr:$src)), 5508 (VPMOVSXBWrm addr:$src)>; 5509 def : Pat<(int_x86_sse41_pmovsxbw (bc_v16i8 (loadv2i64 addr:$src))), 5510 (VPMOVSXBWrm addr:$src)>; 5511 5512 def : Pat<(int_x86_sse41_pmovsxwd (vzmovl_v2i64 addr:$src)), 5513 (VPMOVSXWDrm addr:$src)>; 5514 def : Pat<(int_x86_sse41_pmovsxwd (vzload_v2i64 addr:$src)), 5515 (VPMOVSXWDrm addr:$src)>; 5516 def : Pat<(int_x86_sse41_pmovsxwd (bc_v8i16 (loadv2i64 addr:$src))), 5517 (VPMOVSXWDrm addr:$src)>; 5518 5519 def : Pat<(int_x86_sse41_pmovsxdq (vzmovl_v2i64 addr:$src)), 5520 (VPMOVSXDQrm addr:$src)>; 5521 def : Pat<(int_x86_sse41_pmovsxdq (vzload_v2i64 addr:$src)), 5522 (VPMOVSXDQrm addr:$src)>; 5523 def : Pat<(int_x86_sse41_pmovsxdq (bc_v4i32 (loadv2i64 addr:$src))), 5524 (VPMOVSXDQrm addr:$src)>; 5525 5526 def : Pat<(int_x86_sse41_pmovzxbw (vzmovl_v2i64 addr:$src)), 5527 (VPMOVZXBWrm addr:$src)>; 5528 def : Pat<(int_x86_sse41_pmovzxbw (vzload_v2i64 addr:$src)), 5529 (VPMOVZXBWrm addr:$src)>; 5530 def : Pat<(int_x86_sse41_pmovzxbw (bc_v16i8 (loadv2i64 addr:$src))), 5531 (VPMOVZXBWrm addr:$src)>; 5532 5533 def : Pat<(int_x86_sse41_pmovzxwd (vzmovl_v2i64 addr:$src)), 5534 (VPMOVZXWDrm addr:$src)>; 5535 def : Pat<(int_x86_sse41_pmovzxwd (vzload_v2i64 addr:$src)), 5536 (VPMOVZXWDrm addr:$src)>; 5537 def : Pat<(int_x86_sse41_pmovzxwd (bc_v8i16 (loadv2i64 addr:$src))), 5538 (VPMOVZXWDrm addr:$src)>; 5539 5540 def : Pat<(int_x86_sse41_pmovzxdq (vzmovl_v2i64 addr:$src)), 5541 (VPMOVZXDQrm addr:$src)>; 5542 def : Pat<(int_x86_sse41_pmovzxdq (vzload_v2i64 addr:$src)), 5543 (VPMOVZXDQrm addr:$src)>; 5544 def : Pat<(int_x86_sse41_pmovzxdq (bc_v4i32 (loadv2i64 addr:$src))), 5545 (VPMOVZXDQrm addr:$src)>; 5546} 5547 5548let Predicates = [UseSSE41] in { 5549 // Common patterns involving scalar load. 5550 def : Pat<(int_x86_sse41_pmovsxbw (vzmovl_v2i64 addr:$src)), 5551 (PMOVSXBWrm addr:$src)>; 5552 def : Pat<(int_x86_sse41_pmovsxbw (vzload_v2i64 addr:$src)), 5553 (PMOVSXBWrm addr:$src)>; 5554 def : Pat<(int_x86_sse41_pmovsxbw (bc_v16i8 (loadv2i64 addr:$src))), 5555 (PMOVSXBWrm addr:$src)>; 5556 5557 def : Pat<(int_x86_sse41_pmovsxwd (vzmovl_v2i64 addr:$src)), 5558 (PMOVSXWDrm addr:$src)>; 5559 def : Pat<(int_x86_sse41_pmovsxwd (vzload_v2i64 addr:$src)), 5560 (PMOVSXWDrm addr:$src)>; 5561 def : Pat<(int_x86_sse41_pmovsxwd (bc_v8i16 (loadv2i64 addr:$src))), 5562 (PMOVSXWDrm addr:$src)>; 5563 5564 def : Pat<(int_x86_sse41_pmovsxdq (vzmovl_v2i64 addr:$src)), 5565 (PMOVSXDQrm addr:$src)>; 5566 def : Pat<(int_x86_sse41_pmovsxdq (vzload_v2i64 addr:$src)), 5567 (PMOVSXDQrm addr:$src)>; 5568 def : Pat<(int_x86_sse41_pmovsxdq (bc_v4i32 (loadv2i64 addr:$src))), 5569 (PMOVSXDQrm addr:$src)>; 5570 5571 def : Pat<(int_x86_sse41_pmovzxbw (vzmovl_v2i64 addr:$src)), 5572 (PMOVZXBWrm addr:$src)>; 5573 def : Pat<(int_x86_sse41_pmovzxbw (vzload_v2i64 addr:$src)), 5574 (PMOVZXBWrm addr:$src)>; 5575 def : Pat<(int_x86_sse41_pmovzxbw (bc_v16i8 (loadv2i64 addr:$src))), 5576 (PMOVZXBWrm addr:$src)>; 5577 5578 def : Pat<(int_x86_sse41_pmovzxwd (vzmovl_v2i64 addr:$src)), 5579 (PMOVZXWDrm addr:$src)>; 5580 def : Pat<(int_x86_sse41_pmovzxwd (vzload_v2i64 addr:$src)), 5581 (PMOVZXWDrm addr:$src)>; 5582 def : Pat<(int_x86_sse41_pmovzxwd (bc_v8i16 (loadv2i64 addr:$src))), 5583 (PMOVZXWDrm addr:$src)>; 5584 5585 def : Pat<(int_x86_sse41_pmovzxdq (vzmovl_v2i64 addr:$src)), 5586 (PMOVZXDQrm addr:$src)>; 5587 def : Pat<(int_x86_sse41_pmovzxdq (vzload_v2i64 addr:$src)), 5588 (PMOVZXDQrm addr:$src)>; 5589 def : Pat<(int_x86_sse41_pmovzxdq (bc_v4i32 (loadv2i64 addr:$src))), 5590 (PMOVZXDQrm addr:$src)>; 5591} 5592 5593let Predicates = [HasAVX2] in { 5594 let AddedComplexity = 15 in { 5595 def : Pat<(v4i64 (X86vzmovly (v4i32 VR128:$src))), 5596 (VPMOVZXDQYrr VR128:$src)>; 5597 def : Pat<(v8i32 (X86vzmovly (v8i16 VR128:$src))), 5598 (VPMOVZXWDYrr VR128:$src)>; 5599 def : Pat<(v16i16 (X86vzmovly (v16i8 VR128:$src))), 5600 (VPMOVZXBWYrr VR128:$src)>; 5601 } 5602 5603 def : Pat<(v4i64 (X86vsmovl (v4i32 VR128:$src))), (VPMOVSXDQYrr VR128:$src)>; 5604 def : Pat<(v8i32 (X86vsmovl (v8i16 VR128:$src))), (VPMOVSXWDYrr VR128:$src)>; 5605 def : Pat<(v16i16 (X86vsmovl (v16i8 VR128:$src))), (VPMOVSXBWYrr VR128:$src)>; 5606} 5607 5608let Predicates = [HasAVX] in { 5609 def : Pat<(v2i64 (X86vsmovl (v4i32 VR128:$src))), (VPMOVSXDQrr VR128:$src)>; 5610 def : Pat<(v4i32 (X86vsmovl (v8i16 VR128:$src))), (VPMOVSXWDrr VR128:$src)>; 5611 def : Pat<(v8i16 (X86vsmovl (v16i8 VR128:$src))), (VPMOVSXBWrr VR128:$src)>; 5612} 5613 5614let Predicates = [UseSSE41] in { 5615 def : Pat<(v2i64 (X86vsmovl (v4i32 VR128:$src))), (PMOVSXDQrr VR128:$src)>; 5616 def : Pat<(v4i32 (X86vsmovl (v8i16 VR128:$src))), (PMOVSXWDrr VR128:$src)>; 5617 def : Pat<(v8i16 (X86vsmovl (v16i8 VR128:$src))), (PMOVSXBWrr VR128:$src)>; 5618} 5619 5620 5621multiclass SS41I_binop_rm_int4<bits<8> opc, string OpcodeStr, Intrinsic IntId, 5622 OpndItins itins = DEFAULT_ITINS> { 5623 def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 5624 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5625 [(set VR128:$dst, (IntId VR128:$src))], itins.rr>, OpSize; 5626 5627 def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), 5628 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5629 [(set VR128:$dst, 5630 (IntId (bitconvert (v4i32 (scalar_to_vector (loadi32 addr:$src))))))], 5631 itins.rm>, 5632 OpSize; 5633} 5634 5635multiclass SS41I_binop_rm_int8_y<bits<8> opc, string OpcodeStr, 5636 Intrinsic IntId> { 5637 def Yrr : SS48I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), 5638 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5639 [(set VR256:$dst, (IntId VR128:$src))]>, OpSize; 5640 5641 def Yrm : SS48I<opc, MRMSrcMem, (outs VR256:$dst), (ins i32mem:$src), 5642 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5643 [(set VR256:$dst, 5644 (IntId (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))]>, 5645 OpSize; 5646} 5647 5648let Predicates = [HasAVX] in { 5649defm VPMOVSXBD : SS41I_binop_rm_int4<0x21, "vpmovsxbd", int_x86_sse41_pmovsxbd>, 5650 VEX; 5651defm VPMOVSXWQ : SS41I_binop_rm_int4<0x24, "vpmovsxwq", int_x86_sse41_pmovsxwq>, 5652 VEX; 5653defm VPMOVZXBD : SS41I_binop_rm_int4<0x31, "vpmovzxbd", int_x86_sse41_pmovzxbd>, 5654 VEX; 5655defm VPMOVZXWQ : SS41I_binop_rm_int4<0x34, "vpmovzxwq", int_x86_sse41_pmovzxwq>, 5656 VEX; 5657} 5658 5659let Predicates = [HasAVX2] in { 5660defm VPMOVSXBD : SS41I_binop_rm_int8_y<0x21, "vpmovsxbd", 5661 int_x86_avx2_pmovsxbd>, VEX, VEX_L; 5662defm VPMOVSXWQ : SS41I_binop_rm_int8_y<0x24, "vpmovsxwq", 5663 int_x86_avx2_pmovsxwq>, VEX, VEX_L; 5664defm VPMOVZXBD : SS41I_binop_rm_int8_y<0x31, "vpmovzxbd", 5665 int_x86_avx2_pmovzxbd>, VEX, VEX_L; 5666defm VPMOVZXWQ : SS41I_binop_rm_int8_y<0x34, "vpmovzxwq", 5667 int_x86_avx2_pmovzxwq>, VEX, VEX_L; 5668} 5669 5670defm PMOVSXBD : SS41I_binop_rm_int4<0x21, "pmovsxbd", int_x86_sse41_pmovsxbd, 5671 SSE_INTALU_ITINS_P>; 5672defm PMOVSXWQ : SS41I_binop_rm_int4<0x24, "pmovsxwq", int_x86_sse41_pmovsxwq, 5673 SSE_INTALU_ITINS_P>; 5674defm PMOVZXBD : SS41I_binop_rm_int4<0x31, "pmovzxbd", int_x86_sse41_pmovzxbd, 5675 SSE_INTALU_ITINS_P>; 5676defm PMOVZXWQ : SS41I_binop_rm_int4<0x34, "pmovzxwq", int_x86_sse41_pmovzxwq, 5677 SSE_INTALU_ITINS_P>; 5678 5679let Predicates = [HasAVX] in { 5680 // Common patterns involving scalar load 5681 def : Pat<(int_x86_sse41_pmovsxbd (vzmovl_v4i32 addr:$src)), 5682 (VPMOVSXBDrm addr:$src)>; 5683 def : Pat<(int_x86_sse41_pmovsxwq (vzmovl_v4i32 addr:$src)), 5684 (VPMOVSXWQrm addr:$src)>; 5685 5686 def : Pat<(int_x86_sse41_pmovzxbd (vzmovl_v4i32 addr:$src)), 5687 (VPMOVZXBDrm addr:$src)>; 5688 def : Pat<(int_x86_sse41_pmovzxwq (vzmovl_v4i32 addr:$src)), 5689 (VPMOVZXWQrm addr:$src)>; 5690} 5691 5692let Predicates = [UseSSE41] in { 5693 // Common patterns involving scalar load 5694 def : Pat<(int_x86_sse41_pmovsxbd (vzmovl_v4i32 addr:$src)), 5695 (PMOVSXBDrm addr:$src)>; 5696 def : Pat<(int_x86_sse41_pmovsxwq (vzmovl_v4i32 addr:$src)), 5697 (PMOVSXWQrm addr:$src)>; 5698 5699 def : Pat<(int_x86_sse41_pmovzxbd (vzmovl_v4i32 addr:$src)), 5700 (PMOVZXBDrm addr:$src)>; 5701 def : Pat<(int_x86_sse41_pmovzxwq (vzmovl_v4i32 addr:$src)), 5702 (PMOVZXWQrm addr:$src)>; 5703} 5704 5705multiclass SS41I_binop_rm_int2<bits<8> opc, string OpcodeStr, Intrinsic IntId, 5706 OpndItins itins = DEFAULT_ITINS> { 5707 def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 5708 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5709 [(set VR128:$dst, (IntId VR128:$src))]>, OpSize; 5710 5711 // Expecting a i16 load any extended to i32 value. 5712 def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), (ins i16mem:$src), 5713 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5714 [(set VR128:$dst, (IntId (bitconvert 5715 (v4i32 (scalar_to_vector (loadi16_anyext addr:$src))))))]>, 5716 OpSize; 5717} 5718 5719multiclass SS41I_binop_rm_int4_y<bits<8> opc, string OpcodeStr, 5720 Intrinsic IntId> { 5721 def Yrr : SS48I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), 5722 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5723 [(set VR256:$dst, (IntId VR128:$src))]>, OpSize; 5724 5725 // Expecting a i16 load any extended to i32 value. 5726 def Yrm : SS48I<opc, MRMSrcMem, (outs VR256:$dst), (ins i16mem:$src), 5727 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5728 [(set VR256:$dst, (IntId (bitconvert 5729 (v4i32 (scalar_to_vector (loadi32 addr:$src))))))]>, 5730 OpSize; 5731} 5732 5733let Predicates = [HasAVX] in { 5734defm VPMOVSXBQ : SS41I_binop_rm_int2<0x22, "vpmovsxbq", int_x86_sse41_pmovsxbq>, 5735 VEX; 5736defm VPMOVZXBQ : SS41I_binop_rm_int2<0x32, "vpmovzxbq", int_x86_sse41_pmovzxbq>, 5737 VEX; 5738} 5739let Predicates = [HasAVX2] in { 5740defm VPMOVSXBQ : SS41I_binop_rm_int4_y<0x22, "vpmovsxbq", 5741 int_x86_avx2_pmovsxbq>, VEX, VEX_L; 5742defm VPMOVZXBQ : SS41I_binop_rm_int4_y<0x32, "vpmovzxbq", 5743 int_x86_avx2_pmovzxbq>, VEX, VEX_L; 5744} 5745defm PMOVSXBQ : SS41I_binop_rm_int2<0x22, "pmovsxbq", int_x86_sse41_pmovsxbq, 5746 SSE_INTALU_ITINS_P>; 5747defm PMOVZXBQ : SS41I_binop_rm_int2<0x32, "pmovzxbq", int_x86_sse41_pmovzxbq, 5748 SSE_INTALU_ITINS_P>; 5749 5750let Predicates = [HasAVX2] in { 5751 def : Pat<(v16i16 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBWYrr VR128:$src)>; 5752 def : Pat<(v8i32 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBDYrr VR128:$src)>; 5753 def : Pat<(v4i64 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBQYrr VR128:$src)>; 5754 5755 def : Pat<(v8i32 (X86vsext (v8i16 VR128:$src))), (VPMOVSXWDYrr VR128:$src)>; 5756 def : Pat<(v4i64 (X86vsext (v8i16 VR128:$src))), (VPMOVSXWQYrr VR128:$src)>; 5757 5758 def : Pat<(v4i64 (X86vsext (v4i32 VR128:$src))), (VPMOVSXDQYrr VR128:$src)>; 5759 5760 def : Pat<(v16i16 (X86vsext (v32i8 VR256:$src))), 5761 (VPMOVSXBWYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>; 5762 def : Pat<(v8i32 (X86vsext (v32i8 VR256:$src))), 5763 (VPMOVSXBDYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>; 5764 def : Pat<(v4i64 (X86vsext (v32i8 VR256:$src))), 5765 (VPMOVSXBQYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>; 5766 5767 def : Pat<(v8i32 (X86vsext (v16i16 VR256:$src))), 5768 (VPMOVSXWDYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>; 5769 def : Pat<(v4i64 (X86vsext (v16i16 VR256:$src))), 5770 (VPMOVSXWQYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>; 5771 5772 def : Pat<(v4i64 (X86vsext (v8i32 VR256:$src))), 5773 (VPMOVSXDQYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>; 5774 5775 def : Pat<(v8i32 (X86vsmovl (v8i16 (bitconvert (v2i64 (load addr:$src)))))), 5776 (VPMOVSXWDYrm addr:$src)>; 5777 def : Pat<(v4i64 (X86vsmovl (v4i32 (bitconvert (v2i64 (load addr:$src)))))), 5778 (VPMOVSXDQYrm addr:$src)>; 5779 5780 def : Pat<(v8i32 (X86vsext (v16i8 (bitconvert (v2i64 5781 (scalar_to_vector (loadi64 addr:$src))))))), 5782 (VPMOVSXBDYrm addr:$src)>; 5783 def : Pat<(v8i32 (X86vsext (v16i8 (bitconvert (v2f64 5784 (scalar_to_vector (loadf64 addr:$src))))))), 5785 (VPMOVSXBDYrm addr:$src)>; 5786 5787 def : Pat<(v4i64 (X86vsext (v8i16 (bitconvert (v2i64 5788 (scalar_to_vector (loadi64 addr:$src))))))), 5789 (VPMOVSXWQYrm addr:$src)>; 5790 def : Pat<(v4i64 (X86vsext (v8i16 (bitconvert (v2f64 5791 (scalar_to_vector (loadf64 addr:$src))))))), 5792 (VPMOVSXWQYrm addr:$src)>; 5793 5794 def : Pat<(v4i64 (X86vsext (v16i8 (bitconvert (v4i32 5795 (scalar_to_vector (loadi32 addr:$src))))))), 5796 (VPMOVSXBQYrm addr:$src)>; 5797} 5798 5799let Predicates = [HasAVX] in { 5800 // Common patterns involving scalar load 5801 def : Pat<(int_x86_sse41_pmovsxbq 5802 (bitconvert (v4i32 (X86vzmovl 5803 (v4i32 (scalar_to_vector (loadi32 addr:$src))))))), 5804 (VPMOVSXBQrm addr:$src)>; 5805 5806 def : Pat<(int_x86_sse41_pmovzxbq 5807 (bitconvert (v4i32 (X86vzmovl 5808 (v4i32 (scalar_to_vector (loadi32 addr:$src))))))), 5809 (VPMOVZXBQrm addr:$src)>; 5810} 5811 5812let Predicates = [UseSSE41] in { 5813 def : Pat<(v8i16 (X86vsext (v16i8 VR128:$src))), (PMOVSXBWrr VR128:$src)>; 5814 def : Pat<(v4i32 (X86vsext (v16i8 VR128:$src))), (PMOVSXBDrr VR128:$src)>; 5815 def : Pat<(v2i64 (X86vsext (v16i8 VR128:$src))), (PMOVSXBQrr VR128:$src)>; 5816 5817 def : Pat<(v4i32 (X86vsext (v8i16 VR128:$src))), (PMOVSXWDrr VR128:$src)>; 5818 def : Pat<(v2i64 (X86vsext (v8i16 VR128:$src))), (PMOVSXWQrr VR128:$src)>; 5819 5820 def : Pat<(v2i64 (X86vsext (v4i32 VR128:$src))), (PMOVSXDQrr VR128:$src)>; 5821 5822 // Common patterns involving scalar load 5823 def : Pat<(int_x86_sse41_pmovsxbq 5824 (bitconvert (v4i32 (X86vzmovl 5825 (v4i32 (scalar_to_vector (loadi32 addr:$src))))))), 5826 (PMOVSXBQrm addr:$src)>; 5827 5828 def : Pat<(int_x86_sse41_pmovzxbq 5829 (bitconvert (v4i32 (X86vzmovl 5830 (v4i32 (scalar_to_vector (loadi32 addr:$src))))))), 5831 (PMOVZXBQrm addr:$src)>; 5832 5833 def : Pat<(v4i32 (X86vsext (v8i16 (bitconvert (v2i64 5834 (scalar_to_vector (loadi64 addr:$src))))))), 5835 (PMOVSXWDrm addr:$src)>; 5836 def : Pat<(v4i32 (X86vsext (v8i16 (bitconvert (v2f64 5837 (scalar_to_vector (loadf64 addr:$src))))))), 5838 (PMOVSXWDrm addr:$src)>; 5839 def : Pat<(v4i32 (X86vsext (v16i8 (bitconvert (v4i32 5840 (scalar_to_vector (loadi32 addr:$src))))))), 5841 (PMOVSXBDrm addr:$src)>; 5842 def : Pat<(v2i64 (X86vsext (v8i16 (bitconvert (v4i32 5843 (scalar_to_vector (loadi32 addr:$src))))))), 5844 (PMOVSXWQrm addr:$src)>; 5845 def : Pat<(v2i64 (X86vsext (v16i8 (bitconvert (v4i32 5846 (scalar_to_vector (extloadi32i16 addr:$src))))))), 5847 (PMOVSXBQrm addr:$src)>; 5848 def : Pat<(v2i64 (X86vsext (v4i32 (bitconvert (v2i64 5849 (scalar_to_vector (loadi64 addr:$src))))))), 5850 (PMOVSXDQrm addr:$src)>; 5851 def : Pat<(v2i64 (X86vsext (v4i32 (bitconvert (v2f64 5852 (scalar_to_vector (loadf64 addr:$src))))))), 5853 (PMOVSXDQrm addr:$src)>; 5854 def : Pat<(v8i16 (X86vsext (v16i8 (bitconvert (v2i64 5855 (scalar_to_vector (loadi64 addr:$src))))))), 5856 (PMOVSXBWrm addr:$src)>; 5857 def : Pat<(v8i16 (X86vsext (v16i8 (bitconvert (v2f64 5858 (scalar_to_vector (loadf64 addr:$src))))))), 5859 (PMOVSXBWrm addr:$src)>; 5860} 5861 5862let Predicates = [HasAVX2] in { 5863 def : Pat<(v16i16 (X86vzext (v16i8 VR128:$src))), (VPMOVZXBWYrr VR128:$src)>; 5864 def : Pat<(v8i32 (X86vzext (v16i8 VR128:$src))), (VPMOVZXBDYrr VR128:$src)>; 5865 def : Pat<(v4i64 (X86vzext (v16i8 VR128:$src))), (VPMOVZXBQYrr VR128:$src)>; 5866 5867 def : Pat<(v8i32 (X86vzext (v8i16 VR128:$src))), (VPMOVZXWDYrr VR128:$src)>; 5868 def : Pat<(v4i64 (X86vzext (v8i16 VR128:$src))), (VPMOVZXWQYrr VR128:$src)>; 5869 5870 def : Pat<(v4i64 (X86vzext (v4i32 VR128:$src))), (VPMOVZXDQYrr VR128:$src)>; 5871 5872 def : Pat<(v16i16 (X86vzext (v32i8 VR256:$src))), 5873 (VPMOVZXBWYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>; 5874 def : Pat<(v8i32 (X86vzext (v32i8 VR256:$src))), 5875 (VPMOVZXBDYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>; 5876 def : Pat<(v4i64 (X86vzext (v32i8 VR256:$src))), 5877 (VPMOVZXBQYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>; 5878 5879 def : Pat<(v8i32 (X86vzext (v16i16 VR256:$src))), 5880 (VPMOVZXWDYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>; 5881 def : Pat<(v4i64 (X86vzext (v16i16 VR256:$src))), 5882 (VPMOVZXWQYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>; 5883 5884 def : Pat<(v4i64 (X86vzext (v8i32 VR256:$src))), 5885 (VPMOVZXDQYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>; 5886} 5887 5888let Predicates = [HasAVX] in { 5889 def : Pat<(v8i16 (X86vzext (v16i8 VR128:$src))), (VPMOVZXBWrr VR128:$src)>; 5890 def : Pat<(v4i32 (X86vzext (v16i8 VR128:$src))), (VPMOVZXBDrr VR128:$src)>; 5891 def : Pat<(v2i64 (X86vzext (v16i8 VR128:$src))), (VPMOVZXBQrr VR128:$src)>; 5892 5893 def : Pat<(v4i32 (X86vzext (v8i16 VR128:$src))), (VPMOVZXWDrr VR128:$src)>; 5894 def : Pat<(v2i64 (X86vzext (v8i16 VR128:$src))), (VPMOVZXWQrr VR128:$src)>; 5895 5896 def : Pat<(v2i64 (X86vzext (v4i32 VR128:$src))), (VPMOVZXDQrr VR128:$src)>; 5897 5898 def : Pat<(v8i16 (X86vzext (v16i8 (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))), 5899 (VPMOVZXBWrm addr:$src)>; 5900 def : Pat<(v8i16 (X86vzext (v16i8 (bitconvert (v2f64 (scalar_to_vector (loadf64 addr:$src))))))), 5901 (VPMOVZXBWrm addr:$src)>; 5902 def : Pat<(v4i32 (X86vzext (v16i8 (bitconvert (v4i32 (scalar_to_vector (loadi32 addr:$src))))))), 5903 (VPMOVZXBDrm addr:$src)>; 5904 def : Pat<(v2i64 (X86vzext (v16i8 (bitconvert (v4i32 (scalar_to_vector (loadi16_anyext addr:$src))))))), 5905 (VPMOVZXBQrm addr:$src)>; 5906 5907 def : Pat<(v4i32 (X86vzext (v8i16 (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))), 5908 (VPMOVZXWDrm addr:$src)>; 5909 def : Pat<(v4i32 (X86vzext (v8i16 (bitconvert (v2f64 (scalar_to_vector (loadf64 addr:$src))))))), 5910 (VPMOVZXWDrm addr:$src)>; 5911 def : Pat<(v2i64 (X86vzext (v8i16 (bitconvert (v4i32 (scalar_to_vector (loadi32 addr:$src))))))), 5912 (VPMOVZXWQrm addr:$src)>; 5913 5914 def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))), 5915 (VPMOVZXDQrm addr:$src)>; 5916 def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2f64 (scalar_to_vector (loadf64 addr:$src))))))), 5917 (VPMOVZXDQrm addr:$src)>; 5918 def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2i64 (X86vzload addr:$src)))))), 5919 (VPMOVZXDQrm addr:$src)>; 5920 5921 def : Pat<(v8i16 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBWrr VR128:$src)>; 5922 def : Pat<(v4i32 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBDrr VR128:$src)>; 5923 def : Pat<(v2i64 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBQrr VR128:$src)>; 5924 5925 def : Pat<(v4i32 (X86vsext (v8i16 VR128:$src))), (VPMOVSXWDrr VR128:$src)>; 5926 def : Pat<(v2i64 (X86vsext (v8i16 VR128:$src))), (VPMOVSXWQrr VR128:$src)>; 5927 5928 def : Pat<(v2i64 (X86vsext (v4i32 VR128:$src))), (VPMOVSXDQrr VR128:$src)>; 5929 5930 def : Pat<(v4i32 (X86vsext (v8i16 (bitconvert (v2i64 5931 (scalar_to_vector (loadi64 addr:$src))))))), 5932 (VPMOVSXWDrm addr:$src)>; 5933 def : Pat<(v2i64 (X86vsext (v4i32 (bitconvert (v2i64 5934 (scalar_to_vector (loadi64 addr:$src))))))), 5935 (VPMOVSXDQrm addr:$src)>; 5936 def : Pat<(v4i32 (X86vsext (v8i16 (bitconvert (v2f64 5937 (scalar_to_vector (loadf64 addr:$src))))))), 5938 (VPMOVSXWDrm addr:$src)>; 5939 def : Pat<(v2i64 (X86vsext (v4i32 (bitconvert (v2f64 5940 (scalar_to_vector (loadf64 addr:$src))))))), 5941 (VPMOVSXDQrm addr:$src)>; 5942 def : Pat<(v8i16 (X86vsext (v16i8 (bitconvert (v2i64 5943 (scalar_to_vector (loadi64 addr:$src))))))), 5944 (VPMOVSXBWrm addr:$src)>; 5945 def : Pat<(v8i16 (X86vsext (v16i8 (bitconvert (v2f64 5946 (scalar_to_vector (loadf64 addr:$src))))))), 5947 (VPMOVSXBWrm addr:$src)>; 5948 5949 def : Pat<(v4i32 (X86vsext (v16i8 (bitconvert (v4i32 5950 (scalar_to_vector (loadi32 addr:$src))))))), 5951 (VPMOVSXBDrm addr:$src)>; 5952 def : Pat<(v2i64 (X86vsext (v8i16 (bitconvert (v4i32 5953 (scalar_to_vector (loadi32 addr:$src))))))), 5954 (VPMOVSXWQrm addr:$src)>; 5955 def : Pat<(v2i64 (X86vsext (v16i8 (bitconvert (v4i32 5956 (scalar_to_vector (extloadi32i16 addr:$src))))))), 5957 (VPMOVSXBQrm addr:$src)>; 5958} 5959 5960let Predicates = [UseSSE41] in { 5961 def : Pat<(v8i16 (X86vzext (v16i8 VR128:$src))), (PMOVZXBWrr VR128:$src)>; 5962 def : Pat<(v4i32 (X86vzext (v16i8 VR128:$src))), (PMOVZXBDrr VR128:$src)>; 5963 def : Pat<(v2i64 (X86vzext (v16i8 VR128:$src))), (PMOVZXBQrr VR128:$src)>; 5964 5965 def : Pat<(v4i32 (X86vzext (v8i16 VR128:$src))), (PMOVZXWDrr VR128:$src)>; 5966 def : Pat<(v2i64 (X86vzext (v8i16 VR128:$src))), (PMOVZXWQrr VR128:$src)>; 5967 5968 def : Pat<(v2i64 (X86vzext (v4i32 VR128:$src))), (PMOVZXDQrr VR128:$src)>; 5969 5970 def : Pat<(v8i16 (X86vzext (v16i8 (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))), 5971 (PMOVZXBWrm addr:$src)>; 5972 def : Pat<(v8i16 (X86vzext (v16i8 (bitconvert (v2f64 (scalar_to_vector (loadf64 addr:$src))))))), 5973 (PMOVZXBWrm addr:$src)>; 5974 def : Pat<(v4i32 (X86vzext (v16i8 (bitconvert (v4i32 (scalar_to_vector (loadi32 addr:$src))))))), 5975 (PMOVZXBDrm addr:$src)>; 5976 def : Pat<(v2i64 (X86vzext (v16i8 (bitconvert (v4i32 (scalar_to_vector (loadi16_anyext addr:$src))))))), 5977 (PMOVZXBQrm addr:$src)>; 5978 5979 def : Pat<(v4i32 (X86vzext (v8i16 (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))), 5980 (PMOVZXWDrm addr:$src)>; 5981 def : Pat<(v4i32 (X86vzext (v8i16 (bitconvert (v2f64 (scalar_to_vector (loadf64 addr:$src))))))), 5982 (PMOVZXWDrm addr:$src)>; 5983 def : Pat<(v2i64 (X86vzext (v8i16 (bitconvert (v4i32 (scalar_to_vector (loadi32 addr:$src))))))), 5984 (PMOVZXWQrm addr:$src)>; 5985 5986 def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))), 5987 (PMOVZXDQrm addr:$src)>; 5988 def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2f64 (scalar_to_vector (loadf64 addr:$src))))))), 5989 (PMOVZXDQrm addr:$src)>; 5990 def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2i64 (X86vzload addr:$src)))))), 5991 (PMOVZXDQrm addr:$src)>; 5992} 5993 5994//===----------------------------------------------------------------------===// 5995// SSE4.1 - Extract Instructions 5996//===----------------------------------------------------------------------===// 5997 5998/// SS41I_binop_ext8 - SSE 4.1 extract 8 bits to 32 bit reg or 8 bit mem 5999multiclass SS41I_extract8<bits<8> opc, string OpcodeStr> { 6000 def rr : SS4AIi8<opc, MRMDestReg, (outs GR32orGR64:$dst), 6001 (ins VR128:$src1, i32i8imm:$src2), 6002 !strconcat(OpcodeStr, 6003 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6004 [(set GR32orGR64:$dst, (X86pextrb (v16i8 VR128:$src1), 6005 imm:$src2))]>, 6006 OpSize; 6007 let neverHasSideEffects = 1, mayStore = 1 in 6008 def mr : SS4AIi8<opc, MRMDestMem, (outs), 6009 (ins i8mem:$dst, VR128:$src1, i32i8imm:$src2), 6010 !strconcat(OpcodeStr, 6011 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6012 []>, OpSize; 6013// FIXME: 6014// There's an AssertZext in the way of writing the store pattern 6015// (store (i8 (trunc (X86pextrb (v16i8 VR128:$src1), imm:$src2))), addr:$dst) 6016} 6017 6018let Predicates = [HasAVX] in 6019 defm VPEXTRB : SS41I_extract8<0x14, "vpextrb">, VEX; 6020 6021defm PEXTRB : SS41I_extract8<0x14, "pextrb">; 6022 6023 6024/// SS41I_extract16 - SSE 4.1 extract 16 bits to memory destination 6025multiclass SS41I_extract16<bits<8> opc, string OpcodeStr> { 6026 let isCodeGenOnly = 1, hasSideEffects = 0 in 6027 def rr_REV : SS4AIi8<opc, MRMDestReg, (outs GR32orGR64:$dst), 6028 (ins VR128:$src1, i32i8imm:$src2), 6029 !strconcat(OpcodeStr, 6030 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6031 []>, OpSize; 6032 6033 let neverHasSideEffects = 1, mayStore = 1 in 6034 def mr : SS4AIi8<opc, MRMDestMem, (outs), 6035 (ins i16mem:$dst, VR128:$src1, i32i8imm:$src2), 6036 !strconcat(OpcodeStr, 6037 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6038 []>, OpSize; 6039// FIXME: 6040// There's an AssertZext in the way of writing the store pattern 6041// (store (i16 (trunc (X86pextrw (v16i8 VR128:$src1), imm:$src2))), addr:$dst) 6042} 6043 6044let Predicates = [HasAVX] in 6045 defm VPEXTRW : SS41I_extract16<0x15, "vpextrw">, VEX; 6046 6047defm PEXTRW : SS41I_extract16<0x15, "pextrw">; 6048 6049 6050/// SS41I_extract32 - SSE 4.1 extract 32 bits to int reg or memory destination 6051multiclass SS41I_extract32<bits<8> opc, string OpcodeStr> { 6052 def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst), 6053 (ins VR128:$src1, i32i8imm:$src2), 6054 !strconcat(OpcodeStr, 6055 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6056 [(set GR32:$dst, 6057 (extractelt (v4i32 VR128:$src1), imm:$src2))]>, OpSize; 6058 def mr : SS4AIi8<opc, MRMDestMem, (outs), 6059 (ins i32mem:$dst, VR128:$src1, i32i8imm:$src2), 6060 !strconcat(OpcodeStr, 6061 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6062 [(store (extractelt (v4i32 VR128:$src1), imm:$src2), 6063 addr:$dst)]>, OpSize; 6064} 6065 6066let Predicates = [HasAVX] in 6067 defm VPEXTRD : SS41I_extract32<0x16, "vpextrd">, VEX; 6068 6069defm PEXTRD : SS41I_extract32<0x16, "pextrd">; 6070 6071/// SS41I_extract32 - SSE 4.1 extract 32 bits to int reg or memory destination 6072multiclass SS41I_extract64<bits<8> opc, string OpcodeStr> { 6073 def rr : SS4AIi8<opc, MRMDestReg, (outs GR64:$dst), 6074 (ins VR128:$src1, i32i8imm:$src2), 6075 !strconcat(OpcodeStr, 6076 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6077 [(set GR64:$dst, 6078 (extractelt (v2i64 VR128:$src1), imm:$src2))]>, OpSize, REX_W; 6079 def mr : SS4AIi8<opc, MRMDestMem, (outs), 6080 (ins i64mem:$dst, VR128:$src1, i32i8imm:$src2), 6081 !strconcat(OpcodeStr, 6082 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6083 [(store (extractelt (v2i64 VR128:$src1), imm:$src2), 6084 addr:$dst)]>, OpSize, REX_W; 6085} 6086 6087let Predicates = [HasAVX] in 6088 defm VPEXTRQ : SS41I_extract64<0x16, "vpextrq">, VEX, VEX_W; 6089 6090defm PEXTRQ : SS41I_extract64<0x16, "pextrq">; 6091 6092/// SS41I_extractf32 - SSE 4.1 extract 32 bits fp value to int reg or memory 6093/// destination 6094multiclass SS41I_extractf32<bits<8> opc, string OpcodeStr, 6095 OpndItins itins = DEFAULT_ITINS> { 6096 def rr : SS4AIi8<opc, MRMDestReg, (outs GR32orGR64:$dst), 6097 (ins VR128:$src1, i32i8imm:$src2), 6098 !strconcat(OpcodeStr, 6099 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6100 [(set GR32orGR64:$dst, 6101 (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2))], 6102 itins.rr>, 6103 OpSize; 6104 def mr : SS4AIi8<opc, MRMDestMem, (outs), 6105 (ins f32mem:$dst, VR128:$src1, i32i8imm:$src2), 6106 !strconcat(OpcodeStr, 6107 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6108 [(store (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2), 6109 addr:$dst)], itins.rm>, OpSize; 6110} 6111 6112let ExeDomain = SSEPackedSingle in { 6113 let Predicates = [UseAVX] in 6114 defm VEXTRACTPS : SS41I_extractf32<0x17, "vextractps">, VEX; 6115 defm EXTRACTPS : SS41I_extractf32<0x17, "extractps", SSE_EXTRACT_ITINS>; 6116} 6117 6118// Also match an EXTRACTPS store when the store is done as f32 instead of i32. 6119def : Pat<(store (f32 (bitconvert (extractelt (bc_v4i32 (v4f32 VR128:$src1)), 6120 imm:$src2))), 6121 addr:$dst), 6122 (VEXTRACTPSmr addr:$dst, VR128:$src1, imm:$src2)>, 6123 Requires<[HasAVX]>; 6124def : Pat<(store (f32 (bitconvert (extractelt (bc_v4i32 (v4f32 VR128:$src1)), 6125 imm:$src2))), 6126 addr:$dst), 6127 (EXTRACTPSmr addr:$dst, VR128:$src1, imm:$src2)>, 6128 Requires<[UseSSE41]>; 6129 6130//===----------------------------------------------------------------------===// 6131// SSE4.1 - Insert Instructions 6132//===----------------------------------------------------------------------===// 6133 6134multiclass SS41I_insert8<bits<8> opc, string asm, bit Is2Addr = 1> { 6135 def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst), 6136 (ins VR128:$src1, GR32orGR64:$src2, i32i8imm:$src3), 6137 !if(Is2Addr, 6138 !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6139 !strconcat(asm, 6140 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 6141 [(set VR128:$dst, 6142 (X86pinsrb VR128:$src1, GR32orGR64:$src2, imm:$src3))]>, OpSize; 6143 def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst), 6144 (ins VR128:$src1, i8mem:$src2, i32i8imm:$src3), 6145 !if(Is2Addr, 6146 !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6147 !strconcat(asm, 6148 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 6149 [(set VR128:$dst, 6150 (X86pinsrb VR128:$src1, (extloadi8 addr:$src2), 6151 imm:$src3))]>, OpSize; 6152} 6153 6154let Predicates = [HasAVX] in 6155 defm VPINSRB : SS41I_insert8<0x20, "vpinsrb", 0>, VEX_4V; 6156let Constraints = "$src1 = $dst" in 6157 defm PINSRB : SS41I_insert8<0x20, "pinsrb">; 6158 6159multiclass SS41I_insert32<bits<8> opc, string asm, bit Is2Addr = 1> { 6160 def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst), 6161 (ins VR128:$src1, GR32:$src2, i32i8imm:$src3), 6162 !if(Is2Addr, 6163 !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6164 !strconcat(asm, 6165 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 6166 [(set VR128:$dst, 6167 (v4i32 (insertelt VR128:$src1, GR32:$src2, imm:$src3)))]>, 6168 OpSize; 6169 def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst), 6170 (ins VR128:$src1, i32mem:$src2, i32i8imm:$src3), 6171 !if(Is2Addr, 6172 !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6173 !strconcat(asm, 6174 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 6175 [(set VR128:$dst, 6176 (v4i32 (insertelt VR128:$src1, (loadi32 addr:$src2), 6177 imm:$src3)))]>, OpSize; 6178} 6179 6180let Predicates = [HasAVX] in 6181 defm VPINSRD : SS41I_insert32<0x22, "vpinsrd", 0>, VEX_4V; 6182let Constraints = "$src1 = $dst" in 6183 defm PINSRD : SS41I_insert32<0x22, "pinsrd">; 6184 6185multiclass SS41I_insert64<bits<8> opc, string asm, bit Is2Addr = 1> { 6186 def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst), 6187 (ins VR128:$src1, GR64:$src2, i32i8imm:$src3), 6188 !if(Is2Addr, 6189 !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6190 !strconcat(asm, 6191 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 6192 [(set VR128:$dst, 6193 (v2i64 (insertelt VR128:$src1, GR64:$src2, imm:$src3)))]>, 6194 OpSize; 6195 def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst), 6196 (ins VR128:$src1, i64mem:$src2, i32i8imm:$src3), 6197 !if(Is2Addr, 6198 !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6199 !strconcat(asm, 6200 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 6201 [(set VR128:$dst, 6202 (v2i64 (insertelt VR128:$src1, (loadi64 addr:$src2), 6203 imm:$src3)))]>, OpSize; 6204} 6205 6206let Predicates = [HasAVX] in 6207 defm VPINSRQ : SS41I_insert64<0x22, "vpinsrq", 0>, VEX_4V, VEX_W; 6208let Constraints = "$src1 = $dst" in 6209 defm PINSRQ : SS41I_insert64<0x22, "pinsrq">, REX_W; 6210 6211// insertps has a few different modes, there's the first two here below which 6212// are optimized inserts that won't zero arbitrary elements in the destination 6213// vector. The next one matches the intrinsic and could zero arbitrary elements 6214// in the target vector. 6215multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1, 6216 OpndItins itins = DEFAULT_ITINS> { 6217 def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst), 6218 (ins VR128:$src1, VR128:$src2, u32u8imm:$src3), 6219 !if(Is2Addr, 6220 !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6221 !strconcat(asm, 6222 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 6223 [(set VR128:$dst, 6224 (X86insrtps VR128:$src1, VR128:$src2, imm:$src3))], itins.rr>, 6225 OpSize; 6226 def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst), 6227 (ins VR128:$src1, f32mem:$src2, u32u8imm:$src3), 6228 !if(Is2Addr, 6229 !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6230 !strconcat(asm, 6231 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 6232 [(set VR128:$dst, 6233 (X86insrtps VR128:$src1, 6234 (v4f32 (scalar_to_vector (loadf32 addr:$src2))), 6235 imm:$src3))], itins.rm>, OpSize; 6236} 6237 6238let ExeDomain = SSEPackedSingle in { 6239 let Predicates = [UseAVX] in 6240 defm VINSERTPS : SS41I_insertf32<0x21, "vinsertps", 0>, VEX_4V; 6241 let Constraints = "$src1 = $dst" in 6242 defm INSERTPS : SS41I_insertf32<0x21, "insertps", 1, SSE_INSERT_ITINS>; 6243} 6244 6245//===----------------------------------------------------------------------===// 6246// SSE4.1 - Round Instructions 6247//===----------------------------------------------------------------------===// 6248 6249multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd, string OpcodeStr, 6250 X86MemOperand x86memop, RegisterClass RC, 6251 PatFrag mem_frag32, PatFrag mem_frag64, 6252 Intrinsic V4F32Int, Intrinsic V2F64Int> { 6253let ExeDomain = SSEPackedSingle in { 6254 // Intrinsic operation, reg. 6255 // Vector intrinsic operation, reg 6256 def PSr : SS4AIi8<opcps, MRMSrcReg, 6257 (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2), 6258 !strconcat(OpcodeStr, 6259 "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6260 [(set RC:$dst, (V4F32Int RC:$src1, imm:$src2))], 6261 IIC_SSE_ROUNDPS_REG>, 6262 OpSize; 6263 6264 // Vector intrinsic operation, mem 6265 def PSm : SS4AIi8<opcps, MRMSrcMem, 6266 (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2), 6267 !strconcat(OpcodeStr, 6268 "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6269 [(set RC:$dst, 6270 (V4F32Int (mem_frag32 addr:$src1),imm:$src2))], 6271 IIC_SSE_ROUNDPS_MEM>, 6272 OpSize; 6273} // ExeDomain = SSEPackedSingle 6274 6275let ExeDomain = SSEPackedDouble in { 6276 // Vector intrinsic operation, reg 6277 def PDr : SS4AIi8<opcpd, MRMSrcReg, 6278 (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2), 6279 !strconcat(OpcodeStr, 6280 "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6281 [(set RC:$dst, (V2F64Int RC:$src1, imm:$src2))], 6282 IIC_SSE_ROUNDPS_REG>, 6283 OpSize; 6284 6285 // Vector intrinsic operation, mem 6286 def PDm : SS4AIi8<opcpd, MRMSrcMem, 6287 (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2), 6288 !strconcat(OpcodeStr, 6289 "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6290 [(set RC:$dst, 6291 (V2F64Int (mem_frag64 addr:$src1),imm:$src2))], 6292 IIC_SSE_ROUNDPS_REG>, 6293 OpSize; 6294} // ExeDomain = SSEPackedDouble 6295} 6296 6297multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd, 6298 string OpcodeStr, 6299 Intrinsic F32Int, 6300 Intrinsic F64Int, bit Is2Addr = 1> { 6301let ExeDomain = GenericDomain in { 6302 // Operation, reg. 6303 let hasSideEffects = 0 in 6304 def SSr : SS4AIi8<opcss, MRMSrcReg, 6305 (outs FR32:$dst), (ins FR32:$src1, FR32:$src2, i32i8imm:$src3), 6306 !if(Is2Addr, 6307 !strconcat(OpcodeStr, 6308 "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6309 !strconcat(OpcodeStr, 6310 "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 6311 []>, OpSize; 6312 6313 // Intrinsic operation, reg. 6314 def SSr_Int : SS4AIi8<opcss, MRMSrcReg, 6315 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3), 6316 !if(Is2Addr, 6317 !strconcat(OpcodeStr, 6318 "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6319 !strconcat(OpcodeStr, 6320 "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 6321 [(set VR128:$dst, (F32Int VR128:$src1, VR128:$src2, imm:$src3))]>, 6322 OpSize; 6323 6324 // Intrinsic operation, mem. 6325 def SSm : SS4AIi8<opcss, MRMSrcMem, 6326 (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2, i32i8imm:$src3), 6327 !if(Is2Addr, 6328 !strconcat(OpcodeStr, 6329 "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6330 !strconcat(OpcodeStr, 6331 "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 6332 [(set VR128:$dst, 6333 (F32Int VR128:$src1, sse_load_f32:$src2, imm:$src3))]>, 6334 OpSize; 6335 6336 // Operation, reg. 6337 let hasSideEffects = 0 in 6338 def SDr : SS4AIi8<opcsd, MRMSrcReg, 6339 (outs FR64:$dst), (ins FR64:$src1, FR64:$src2, i32i8imm:$src3), 6340 !if(Is2Addr, 6341 !strconcat(OpcodeStr, 6342 "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6343 !strconcat(OpcodeStr, 6344 "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 6345 []>, OpSize; 6346 6347 // Intrinsic operation, reg. 6348 def SDr_Int : SS4AIi8<opcsd, MRMSrcReg, 6349 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3), 6350 !if(Is2Addr, 6351 !strconcat(OpcodeStr, 6352 "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6353 !strconcat(OpcodeStr, 6354 "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 6355 [(set VR128:$dst, (F64Int VR128:$src1, VR128:$src2, imm:$src3))]>, 6356 OpSize; 6357 6358 // Intrinsic operation, mem. 6359 def SDm : SS4AIi8<opcsd, MRMSrcMem, 6360 (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2, i32i8imm:$src3), 6361 !if(Is2Addr, 6362 !strconcat(OpcodeStr, 6363 "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6364 !strconcat(OpcodeStr, 6365 "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 6366 [(set VR128:$dst, 6367 (F64Int VR128:$src1, sse_load_f64:$src2, imm:$src3))]>, 6368 OpSize; 6369} // ExeDomain = GenericDomain 6370} 6371 6372// FP round - roundss, roundps, roundsd, roundpd 6373let Predicates = [HasAVX] in { 6374 // Intrinsic form 6375 defm VROUND : sse41_fp_unop_rm<0x08, 0x09, "vround", f128mem, VR128, 6376 loadv4f32, loadv2f64, 6377 int_x86_sse41_round_ps, 6378 int_x86_sse41_round_pd>, VEX; 6379 defm VROUNDY : sse41_fp_unop_rm<0x08, 0x09, "vround", f256mem, VR256, 6380 loadv8f32, loadv4f64, 6381 int_x86_avx_round_ps_256, 6382 int_x86_avx_round_pd_256>, VEX, VEX_L; 6383 defm VROUND : sse41_fp_binop_rm<0x0A, 0x0B, "vround", 6384 int_x86_sse41_round_ss, 6385 int_x86_sse41_round_sd, 0>, VEX_4V, VEX_LIG; 6386 6387 def : Pat<(ffloor FR32:$src), 6388 (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x1))>; 6389 def : Pat<(f64 (ffloor FR64:$src)), 6390 (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x1))>; 6391 def : Pat<(f32 (fnearbyint FR32:$src)), 6392 (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0xC))>; 6393 def : Pat<(f64 (fnearbyint FR64:$src)), 6394 (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0xC))>; 6395 def : Pat<(f32 (fceil FR32:$src)), 6396 (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x2))>; 6397 def : Pat<(f64 (fceil FR64:$src)), 6398 (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x2))>; 6399 def : Pat<(f32 (frint FR32:$src)), 6400 (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x4))>; 6401 def : Pat<(f64 (frint FR64:$src)), 6402 (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x4))>; 6403 def : Pat<(f32 (ftrunc FR32:$src)), 6404 (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x3))>; 6405 def : Pat<(f64 (ftrunc FR64:$src)), 6406 (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x3))>; 6407 6408 def : Pat<(v4f32 (ffloor VR128:$src)), 6409 (VROUNDPSr VR128:$src, (i32 0x1))>; 6410 def : Pat<(v4f32 (fnearbyint VR128:$src)), 6411 (VROUNDPSr VR128:$src, (i32 0xC))>; 6412 def : Pat<(v4f32 (fceil VR128:$src)), 6413 (VROUNDPSr VR128:$src, (i32 0x2))>; 6414 def : Pat<(v4f32 (frint VR128:$src)), 6415 (VROUNDPSr VR128:$src, (i32 0x4))>; 6416 def : Pat<(v4f32 (ftrunc VR128:$src)), 6417 (VROUNDPSr VR128:$src, (i32 0x3))>; 6418 6419 def : Pat<(v2f64 (ffloor VR128:$src)), 6420 (VROUNDPDr VR128:$src, (i32 0x1))>; 6421 def : Pat<(v2f64 (fnearbyint VR128:$src)), 6422 (VROUNDPDr VR128:$src, (i32 0xC))>; 6423 def : Pat<(v2f64 (fceil VR128:$src)), 6424 (VROUNDPDr VR128:$src, (i32 0x2))>; 6425 def : Pat<(v2f64 (frint VR128:$src)), 6426 (VROUNDPDr VR128:$src, (i32 0x4))>; 6427 def : Pat<(v2f64 (ftrunc VR128:$src)), 6428 (VROUNDPDr VR128:$src, (i32 0x3))>; 6429 6430 def : Pat<(v8f32 (ffloor VR256:$src)), 6431 (VROUNDYPSr VR256:$src, (i32 0x1))>; 6432 def : Pat<(v8f32 (fnearbyint VR256:$src)), 6433 (VROUNDYPSr VR256:$src, (i32 0xC))>; 6434 def : Pat<(v8f32 (fceil VR256:$src)), 6435 (VROUNDYPSr VR256:$src, (i32 0x2))>; 6436 def : Pat<(v8f32 (frint VR256:$src)), 6437 (VROUNDYPSr VR256:$src, (i32 0x4))>; 6438 def : Pat<(v8f32 (ftrunc VR256:$src)), 6439 (VROUNDYPSr VR256:$src, (i32 0x3))>; 6440 6441 def : Pat<(v4f64 (ffloor VR256:$src)), 6442 (VROUNDYPDr VR256:$src, (i32 0x1))>; 6443 def : Pat<(v4f64 (fnearbyint VR256:$src)), 6444 (VROUNDYPDr VR256:$src, (i32 0xC))>; 6445 def : Pat<(v4f64 (fceil VR256:$src)), 6446 (VROUNDYPDr VR256:$src, (i32 0x2))>; 6447 def : Pat<(v4f64 (frint VR256:$src)), 6448 (VROUNDYPDr VR256:$src, (i32 0x4))>; 6449 def : Pat<(v4f64 (ftrunc VR256:$src)), 6450 (VROUNDYPDr VR256:$src, (i32 0x3))>; 6451} 6452 6453defm ROUND : sse41_fp_unop_rm<0x08, 0x09, "round", f128mem, VR128, 6454 memopv4f32, memopv2f64, 6455 int_x86_sse41_round_ps, int_x86_sse41_round_pd>; 6456let Constraints = "$src1 = $dst" in 6457defm ROUND : sse41_fp_binop_rm<0x0A, 0x0B, "round", 6458 int_x86_sse41_round_ss, int_x86_sse41_round_sd>; 6459 6460let Predicates = [UseSSE41] in { 6461 def : Pat<(ffloor FR32:$src), 6462 (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x1))>; 6463 def : Pat<(f64 (ffloor FR64:$src)), 6464 (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x1))>; 6465 def : Pat<(f32 (fnearbyint FR32:$src)), 6466 (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0xC))>; 6467 def : Pat<(f64 (fnearbyint FR64:$src)), 6468 (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0xC))>; 6469 def : Pat<(f32 (fceil FR32:$src)), 6470 (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x2))>; 6471 def : Pat<(f64 (fceil FR64:$src)), 6472 (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x2))>; 6473 def : Pat<(f32 (frint FR32:$src)), 6474 (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x4))>; 6475 def : Pat<(f64 (frint FR64:$src)), 6476 (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x4))>; 6477 def : Pat<(f32 (ftrunc FR32:$src)), 6478 (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x3))>; 6479 def : Pat<(f64 (ftrunc FR64:$src)), 6480 (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x3))>; 6481 6482 def : Pat<(v4f32 (ffloor VR128:$src)), 6483 (ROUNDPSr VR128:$src, (i32 0x1))>; 6484 def : Pat<(v4f32 (fnearbyint VR128:$src)), 6485 (ROUNDPSr VR128:$src, (i32 0xC))>; 6486 def : Pat<(v4f32 (fceil VR128:$src)), 6487 (ROUNDPSr VR128:$src, (i32 0x2))>; 6488 def : Pat<(v4f32 (frint VR128:$src)), 6489 (ROUNDPSr VR128:$src, (i32 0x4))>; 6490 def : Pat<(v4f32 (ftrunc VR128:$src)), 6491 (ROUNDPSr VR128:$src, (i32 0x3))>; 6492 6493 def : Pat<(v2f64 (ffloor VR128:$src)), 6494 (ROUNDPDr VR128:$src, (i32 0x1))>; 6495 def : Pat<(v2f64 (fnearbyint VR128:$src)), 6496 (ROUNDPDr VR128:$src, (i32 0xC))>; 6497 def : Pat<(v2f64 (fceil VR128:$src)), 6498 (ROUNDPDr VR128:$src, (i32 0x2))>; 6499 def : Pat<(v2f64 (frint VR128:$src)), 6500 (ROUNDPDr VR128:$src, (i32 0x4))>; 6501 def : Pat<(v2f64 (ftrunc VR128:$src)), 6502 (ROUNDPDr VR128:$src, (i32 0x3))>; 6503} 6504 6505//===----------------------------------------------------------------------===// 6506// SSE4.1 - Packed Bit Test 6507//===----------------------------------------------------------------------===// 6508 6509// ptest instruction we'll lower to this in X86ISelLowering primarily from 6510// the intel intrinsic that corresponds to this. 6511let Defs = [EFLAGS], Predicates = [HasAVX] in { 6512def VPTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), 6513 "vptest\t{$src2, $src1|$src1, $src2}", 6514 [(set EFLAGS, (X86ptest VR128:$src1, (v2i64 VR128:$src2)))]>, 6515 OpSize, VEX; 6516def VPTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), 6517 "vptest\t{$src2, $src1|$src1, $src2}", 6518 [(set EFLAGS,(X86ptest VR128:$src1, (loadv2i64 addr:$src2)))]>, 6519 OpSize, VEX; 6520 6521def VPTESTYrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR256:$src1, VR256:$src2), 6522 "vptest\t{$src2, $src1|$src1, $src2}", 6523 [(set EFLAGS, (X86ptest VR256:$src1, (v4i64 VR256:$src2)))]>, 6524 OpSize, VEX, VEX_L; 6525def VPTESTYrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR256:$src1, i256mem:$src2), 6526 "vptest\t{$src2, $src1|$src1, $src2}", 6527 [(set EFLAGS,(X86ptest VR256:$src1, (loadv4i64 addr:$src2)))]>, 6528 OpSize, VEX, VEX_L; 6529} 6530 6531let Defs = [EFLAGS] in { 6532def PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), 6533 "ptest\t{$src2, $src1|$src1, $src2}", 6534 [(set EFLAGS, (X86ptest VR128:$src1, (v2i64 VR128:$src2)))]>, 6535 OpSize; 6536def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), 6537 "ptest\t{$src2, $src1|$src1, $src2}", 6538 [(set EFLAGS, (X86ptest VR128:$src1, (memopv2i64 addr:$src2)))]>, 6539 OpSize; 6540} 6541 6542// The bit test instructions below are AVX only 6543multiclass avx_bittest<bits<8> opc, string OpcodeStr, RegisterClass RC, 6544 X86MemOperand x86memop, PatFrag mem_frag, ValueType vt> { 6545 def rr : SS48I<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2), 6546 !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 6547 [(set EFLAGS, (X86testp RC:$src1, (vt RC:$src2)))]>, OpSize, VEX; 6548 def rm : SS48I<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2), 6549 !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 6550 [(set EFLAGS, (X86testp RC:$src1, (mem_frag addr:$src2)))]>, 6551 OpSize, VEX; 6552} 6553 6554let Defs = [EFLAGS], Predicates = [HasAVX] in { 6555let ExeDomain = SSEPackedSingle in { 6556defm VTESTPS : avx_bittest<0x0E, "vtestps", VR128, f128mem, loadv4f32, v4f32>; 6557defm VTESTPSY : avx_bittest<0x0E, "vtestps", VR256, f256mem, loadv8f32, v8f32>, 6558 VEX_L; 6559} 6560let ExeDomain = SSEPackedDouble in { 6561defm VTESTPD : avx_bittest<0x0F, "vtestpd", VR128, f128mem, loadv2f64, v2f64>; 6562defm VTESTPDY : avx_bittest<0x0F, "vtestpd", VR256, f256mem, loadv4f64, v4f64>, 6563 VEX_L; 6564} 6565} 6566 6567//===----------------------------------------------------------------------===// 6568// SSE4.1 - Misc Instructions 6569//===----------------------------------------------------------------------===// 6570 6571let Defs = [EFLAGS], Predicates = [HasPOPCNT] in { 6572 def POPCNT16rr : I<0xB8, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), 6573 "popcnt{w}\t{$src, $dst|$dst, $src}", 6574 [(set GR16:$dst, (ctpop GR16:$src)), (implicit EFLAGS)], 6575 IIC_SSE_POPCNT_RR>, 6576 OpSize, XS; 6577 def POPCNT16rm : I<0xB8, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), 6578 "popcnt{w}\t{$src, $dst|$dst, $src}", 6579 [(set GR16:$dst, (ctpop (loadi16 addr:$src))), 6580 (implicit EFLAGS)], IIC_SSE_POPCNT_RM>, OpSize, XS; 6581 6582 def POPCNT32rr : I<0xB8, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), 6583 "popcnt{l}\t{$src, $dst|$dst, $src}", 6584 [(set GR32:$dst, (ctpop GR32:$src)), (implicit EFLAGS)], 6585 IIC_SSE_POPCNT_RR>, 6586 XS; 6587 def POPCNT32rm : I<0xB8, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), 6588 "popcnt{l}\t{$src, $dst|$dst, $src}", 6589 [(set GR32:$dst, (ctpop (loadi32 addr:$src))), 6590 (implicit EFLAGS)], IIC_SSE_POPCNT_RM>, XS; 6591 6592 def POPCNT64rr : RI<0xB8, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), 6593 "popcnt{q}\t{$src, $dst|$dst, $src}", 6594 [(set GR64:$dst, (ctpop GR64:$src)), (implicit EFLAGS)], 6595 IIC_SSE_POPCNT_RR>, 6596 XS; 6597 def POPCNT64rm : RI<0xB8, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), 6598 "popcnt{q}\t{$src, $dst|$dst, $src}", 6599 [(set GR64:$dst, (ctpop (loadi64 addr:$src))), 6600 (implicit EFLAGS)], IIC_SSE_POPCNT_RM>, XS; 6601} 6602 6603 6604 6605// SS41I_unop_rm_int_v16 - SSE 4.1 unary operator whose type is v8i16. 6606multiclass SS41I_unop_rm_int_v16<bits<8> opc, string OpcodeStr, 6607 Intrinsic IntId128> { 6608 def rr128 : SS48I<opc, MRMSrcReg, (outs VR128:$dst), 6609 (ins VR128:$src), 6610 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 6611 [(set VR128:$dst, (IntId128 VR128:$src))]>, OpSize; 6612 def rm128 : SS48I<opc, MRMSrcMem, (outs VR128:$dst), 6613 (ins i128mem:$src), 6614 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 6615 [(set VR128:$dst, 6616 (IntId128 6617 (bitconvert (memopv2i64 addr:$src))))]>, OpSize; 6618} 6619 6620let Predicates = [HasAVX] in 6621defm VPHMINPOSUW : SS41I_unop_rm_int_v16 <0x41, "vphminposuw", 6622 int_x86_sse41_phminposuw>, VEX; 6623defm PHMINPOSUW : SS41I_unop_rm_int_v16 <0x41, "phminposuw", 6624 int_x86_sse41_phminposuw>; 6625 6626/// SS41I_binop_rm_int - Simple SSE 4.1 binary operator 6627multiclass SS41I_binop_rm_int<bits<8> opc, string OpcodeStr, 6628 Intrinsic IntId128, bit Is2Addr = 1, 6629 OpndItins itins = DEFAULT_ITINS> { 6630 let isCommutable = 1 in 6631 def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), 6632 (ins VR128:$src1, VR128:$src2), 6633 !if(Is2Addr, 6634 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 6635 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 6636 [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))], 6637 itins.rr>, OpSize; 6638 def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), 6639 (ins VR128:$src1, i128mem:$src2), 6640 !if(Is2Addr, 6641 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 6642 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 6643 [(set VR128:$dst, 6644 (IntId128 VR128:$src1, 6645 (bitconvert (memopv2i64 addr:$src2))))], 6646 itins.rm>, OpSize; 6647} 6648 6649/// SS41I_binop_rm_int_y - Simple SSE 4.1 binary operator 6650multiclass SS41I_binop_rm_int_y<bits<8> opc, string OpcodeStr, 6651 Intrinsic IntId256> { 6652 let isCommutable = 1 in 6653 def Yrr : SS48I<opc, MRMSrcReg, (outs VR256:$dst), 6654 (ins VR256:$src1, VR256:$src2), 6655 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6656 [(set VR256:$dst, (IntId256 VR256:$src1, VR256:$src2))]>, OpSize; 6657 def Yrm : SS48I<opc, MRMSrcMem, (outs VR256:$dst), 6658 (ins VR256:$src1, i256mem:$src2), 6659 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6660 [(set VR256:$dst, 6661 (IntId256 VR256:$src1, 6662 (bitconvert (loadv4i64 addr:$src2))))]>, OpSize; 6663} 6664 6665 6666/// SS48I_binop_rm - Simple SSE41 binary operator. 6667multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 6668 ValueType OpVT, RegisterClass RC, PatFrag memop_frag, 6669 X86MemOperand x86memop, bit Is2Addr = 1, 6670 OpndItins itins = DEFAULT_ITINS> { 6671 let isCommutable = 1 in 6672 def rr : SS48I<opc, MRMSrcReg, (outs RC:$dst), 6673 (ins RC:$src1, RC:$src2), 6674 !if(Is2Addr, 6675 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 6676 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 6677 [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>, OpSize; 6678 def rm : SS48I<opc, MRMSrcMem, (outs RC:$dst), 6679 (ins RC:$src1, x86memop:$src2), 6680 !if(Is2Addr, 6681 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 6682 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 6683 [(set RC:$dst, 6684 (OpVT (OpNode RC:$src1, 6685 (bitconvert (memop_frag addr:$src2)))))]>, OpSize; 6686} 6687 6688let Predicates = [HasAVX] in { 6689 let isCommutable = 0 in 6690 defm VPACKUSDW : SS41I_binop_rm_int<0x2B, "vpackusdw", int_x86_sse41_packusdw, 6691 0>, VEX_4V; 6692 defm VPMINSB : SS48I_binop_rm<0x38, "vpminsb", X86smin, v16i8, VR128, 6693 loadv2i64, i128mem, 0>, VEX_4V; 6694 defm VPMINSD : SS48I_binop_rm<0x39, "vpminsd", X86smin, v4i32, VR128, 6695 loadv2i64, i128mem, 0>, VEX_4V; 6696 defm VPMINUD : SS48I_binop_rm<0x3B, "vpminud", X86umin, v4i32, VR128, 6697 loadv2i64, i128mem, 0>, VEX_4V; 6698 defm VPMINUW : SS48I_binop_rm<0x3A, "vpminuw", X86umin, v8i16, VR128, 6699 loadv2i64, i128mem, 0>, VEX_4V; 6700 defm VPMAXSB : SS48I_binop_rm<0x3C, "vpmaxsb", X86smax, v16i8, VR128, 6701 loadv2i64, i128mem, 0>, VEX_4V; 6702 defm VPMAXSD : SS48I_binop_rm<0x3D, "vpmaxsd", X86smax, v4i32, VR128, 6703 loadv2i64, i128mem, 0>, VEX_4V; 6704 defm VPMAXUD : SS48I_binop_rm<0x3F, "vpmaxud", X86umax, v4i32, VR128, 6705 loadv2i64, i128mem, 0>, VEX_4V; 6706 defm VPMAXUW : SS48I_binop_rm<0x3E, "vpmaxuw", X86umax, v8i16, VR128, 6707 loadv2i64, i128mem, 0>, VEX_4V; 6708 defm VPMULDQ : SS41I_binop_rm_int<0x28, "vpmuldq", int_x86_sse41_pmuldq, 6709 0>, VEX_4V; 6710} 6711 6712let Predicates = [HasAVX2] in { 6713 let isCommutable = 0 in 6714 defm VPACKUSDW : SS41I_binop_rm_int_y<0x2B, "vpackusdw", 6715 int_x86_avx2_packusdw>, VEX_4V, VEX_L; 6716 defm VPMINSBY : SS48I_binop_rm<0x38, "vpminsb", X86smin, v32i8, VR256, 6717 loadv4i64, i256mem, 0>, VEX_4V, VEX_L; 6718 defm VPMINSDY : SS48I_binop_rm<0x39, "vpminsd", X86smin, v8i32, VR256, 6719 loadv4i64, i256mem, 0>, VEX_4V, VEX_L; 6720 defm VPMINUDY : SS48I_binop_rm<0x3B, "vpminud", X86umin, v8i32, VR256, 6721 loadv4i64, i256mem, 0>, VEX_4V, VEX_L; 6722 defm VPMINUWY : SS48I_binop_rm<0x3A, "vpminuw", X86umin, v16i16, VR256, 6723 loadv4i64, i256mem, 0>, VEX_4V, VEX_L; 6724 defm VPMAXSBY : SS48I_binop_rm<0x3C, "vpmaxsb", X86smax, v32i8, VR256, 6725 loadv4i64, i256mem, 0>, VEX_4V, VEX_L; 6726 defm VPMAXSDY : SS48I_binop_rm<0x3D, "vpmaxsd", X86smax, v8i32, VR256, 6727 loadv4i64, i256mem, 0>, VEX_4V, VEX_L; 6728 defm VPMAXUDY : SS48I_binop_rm<0x3F, "vpmaxud", X86umax, v8i32, VR256, 6729 loadv4i64, i256mem, 0>, VEX_4V, VEX_L; 6730 defm VPMAXUWY : SS48I_binop_rm<0x3E, "vpmaxuw", X86umax, v16i16, VR256, 6731 loadv4i64, i256mem, 0>, VEX_4V, VEX_L; 6732 defm VPMULDQ : SS41I_binop_rm_int_y<0x28, "vpmuldq", 6733 int_x86_avx2_pmul_dq>, VEX_4V, VEX_L; 6734} 6735 6736let Constraints = "$src1 = $dst" in { 6737 let isCommutable = 0 in 6738 defm PACKUSDW : SS41I_binop_rm_int<0x2B, "packusdw", int_x86_sse41_packusdw>; 6739 defm PMINSB : SS48I_binop_rm<0x38, "pminsb", X86smin, v16i8, VR128, 6740 memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>; 6741 defm PMINSD : SS48I_binop_rm<0x39, "pminsd", X86smin, v4i32, VR128, 6742 memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>; 6743 defm PMINUD : SS48I_binop_rm<0x3B, "pminud", X86umin, v4i32, VR128, 6744 memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>; 6745 defm PMINUW : SS48I_binop_rm<0x3A, "pminuw", X86umin, v8i16, VR128, 6746 memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>; 6747 defm PMAXSB : SS48I_binop_rm<0x3C, "pmaxsb", X86smax, v16i8, VR128, 6748 memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>; 6749 defm PMAXSD : SS48I_binop_rm<0x3D, "pmaxsd", X86smax, v4i32, VR128, 6750 memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>; 6751 defm PMAXUD : SS48I_binop_rm<0x3F, "pmaxud", X86umax, v4i32, VR128, 6752 memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>; 6753 defm PMAXUW : SS48I_binop_rm<0x3E, "pmaxuw", X86umax, v8i16, VR128, 6754 memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>; 6755 defm PMULDQ : SS41I_binop_rm_int<0x28, "pmuldq", int_x86_sse41_pmuldq, 6756 1, SSE_INTMUL_ITINS_P>; 6757} 6758 6759let Predicates = [HasAVX] in { 6760 defm VPMULLD : SS48I_binop_rm<0x40, "vpmulld", mul, v4i32, VR128, 6761 memopv2i64, i128mem, 0>, VEX_4V; 6762 defm VPCMPEQQ : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v2i64, VR128, 6763 memopv2i64, i128mem, 0>, VEX_4V; 6764} 6765let Predicates = [HasAVX2] in { 6766 defm VPMULLDY : SS48I_binop_rm<0x40, "vpmulld", mul, v8i32, VR256, 6767 memopv4i64, i256mem, 0>, VEX_4V, VEX_L; 6768 defm VPCMPEQQY : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v4i64, VR256, 6769 memopv4i64, i256mem, 0>, VEX_4V, VEX_L; 6770} 6771 6772let Constraints = "$src1 = $dst" in { 6773 defm PMULLD : SS48I_binop_rm<0x40, "pmulld", mul, v4i32, VR128, 6774 memopv2i64, i128mem, 1, SSE_PMULLD_ITINS>; 6775 defm PCMPEQQ : SS48I_binop_rm<0x29, "pcmpeqq", X86pcmpeq, v2i64, VR128, 6776 memopv2i64, i128mem, 1, SSE_INTALUQ_ITINS_P>; 6777} 6778 6779/// SS41I_binop_rmi_int - SSE 4.1 binary operator with 8-bit immediate 6780multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr, 6781 Intrinsic IntId, RegisterClass RC, PatFrag memop_frag, 6782 X86MemOperand x86memop, bit Is2Addr = 1, 6783 OpndItins itins = DEFAULT_ITINS> { 6784 let isCommutable = 1 in 6785 def rri : SS4AIi8<opc, MRMSrcReg, (outs RC:$dst), 6786 (ins RC:$src1, RC:$src2, u32u8imm:$src3), 6787 !if(Is2Addr, 6788 !strconcat(OpcodeStr, 6789 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6790 !strconcat(OpcodeStr, 6791 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 6792 [(set RC:$dst, (IntId RC:$src1, RC:$src2, imm:$src3))], itins.rr>, 6793 OpSize; 6794 def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst), 6795 (ins RC:$src1, x86memop:$src2, u32u8imm:$src3), 6796 !if(Is2Addr, 6797 !strconcat(OpcodeStr, 6798 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6799 !strconcat(OpcodeStr, 6800 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 6801 [(set RC:$dst, 6802 (IntId RC:$src1, 6803 (bitconvert (memop_frag addr:$src2)), imm:$src3))], itins.rm>, 6804 OpSize; 6805} 6806 6807let Predicates = [HasAVX] in { 6808 let isCommutable = 0 in { 6809 let ExeDomain = SSEPackedSingle in { 6810 defm VBLENDPS : SS41I_binop_rmi_int<0x0C, "vblendps", int_x86_sse41_blendps, 6811 VR128, loadv4f32, f128mem, 0>, VEX_4V; 6812 defm VBLENDPSY : SS41I_binop_rmi_int<0x0C, "vblendps", 6813 int_x86_avx_blend_ps_256, VR256, loadv8f32, 6814 f256mem, 0>, VEX_4V, VEX_L; 6815 } 6816 let ExeDomain = SSEPackedDouble in { 6817 defm VBLENDPD : SS41I_binop_rmi_int<0x0D, "vblendpd", int_x86_sse41_blendpd, 6818 VR128, loadv2f64, f128mem, 0>, VEX_4V; 6819 defm VBLENDPDY : SS41I_binop_rmi_int<0x0D, "vblendpd", 6820 int_x86_avx_blend_pd_256,VR256, loadv4f64, 6821 f256mem, 0>, VEX_4V, VEX_L; 6822 } 6823 defm VPBLENDW : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_sse41_pblendw, 6824 VR128, loadv2i64, i128mem, 0>, VEX_4V; 6825 defm VMPSADBW : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_sse41_mpsadbw, 6826 VR128, loadv2i64, i128mem, 0>, VEX_4V; 6827 } 6828 let ExeDomain = SSEPackedSingle in 6829 defm VDPPS : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps, 6830 VR128, loadv4f32, f128mem, 0>, VEX_4V; 6831 let ExeDomain = SSEPackedDouble in 6832 defm VDPPD : SS41I_binop_rmi_int<0x41, "vdppd", int_x86_sse41_dppd, 6833 VR128, loadv2f64, f128mem, 0>, VEX_4V; 6834 let ExeDomain = SSEPackedSingle in 6835 defm VDPPSY : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_avx_dp_ps_256, 6836 VR256, loadv8f32, i256mem, 0>, VEX_4V, VEX_L; 6837} 6838 6839let Predicates = [HasAVX2] in { 6840 let isCommutable = 0 in { 6841 defm VPBLENDWY : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_avx2_pblendw, 6842 VR256, loadv4i64, i256mem, 0>, VEX_4V, VEX_L; 6843 defm VMPSADBWY : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_avx2_mpsadbw, 6844 VR256, loadv4i64, i256mem, 0>, VEX_4V, VEX_L; 6845 } 6846} 6847 6848let Constraints = "$src1 = $dst" in { 6849 let isCommutable = 0 in { 6850 let ExeDomain = SSEPackedSingle in 6851 defm BLENDPS : SS41I_binop_rmi_int<0x0C, "blendps", int_x86_sse41_blendps, 6852 VR128, memopv4f32, f128mem, 6853 1, SSE_INTALU_ITINS_P>; 6854 let ExeDomain = SSEPackedDouble in 6855 defm BLENDPD : SS41I_binop_rmi_int<0x0D, "blendpd", int_x86_sse41_blendpd, 6856 VR128, memopv2f64, f128mem, 6857 1, SSE_INTALU_ITINS_P>; 6858 defm PBLENDW : SS41I_binop_rmi_int<0x0E, "pblendw", int_x86_sse41_pblendw, 6859 VR128, memopv2i64, i128mem, 6860 1, SSE_INTALU_ITINS_P>; 6861 defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", int_x86_sse41_mpsadbw, 6862 VR128, memopv2i64, i128mem, 6863 1, SSE_INTMUL_ITINS_P>; 6864 } 6865 let ExeDomain = SSEPackedSingle in 6866 defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps, 6867 VR128, memopv4f32, f128mem, 1, 6868 SSE_DPPS_ITINS>; 6869 let ExeDomain = SSEPackedDouble in 6870 defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", int_x86_sse41_dppd, 6871 VR128, memopv2f64, f128mem, 1, 6872 SSE_DPPD_ITINS>; 6873} 6874 6875/// SS41I_quaternary_int_avx - AVX SSE 4.1 with 4 operators 6876multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr, 6877 RegisterClass RC, X86MemOperand x86memop, 6878 PatFrag mem_frag, Intrinsic IntId> { 6879 def rr : Ii8<opc, MRMSrcReg, (outs RC:$dst), 6880 (ins RC:$src1, RC:$src2, RC:$src3), 6881 !strconcat(OpcodeStr, 6882 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 6883 [(set RC:$dst, (IntId RC:$src1, RC:$src2, RC:$src3))], 6884 NoItinerary, SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM; 6885 6886 def rm : Ii8<opc, MRMSrcMem, (outs RC:$dst), 6887 (ins RC:$src1, x86memop:$src2, RC:$src3), 6888 !strconcat(OpcodeStr, 6889 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 6890 [(set RC:$dst, 6891 (IntId RC:$src1, (bitconvert (mem_frag addr:$src2)), 6892 RC:$src3))], 6893 NoItinerary, SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM; 6894} 6895 6896let Predicates = [HasAVX] in { 6897let ExeDomain = SSEPackedDouble in { 6898defm VBLENDVPD : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR128, f128mem, 6899 loadv2f64, int_x86_sse41_blendvpd>; 6900defm VBLENDVPDY : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR256, f256mem, 6901 loadv4f64, int_x86_avx_blendv_pd_256>, VEX_L; 6902} // ExeDomain = SSEPackedDouble 6903let ExeDomain = SSEPackedSingle in { 6904defm VBLENDVPS : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR128, f128mem, 6905 loadv4f32, int_x86_sse41_blendvps>; 6906defm VBLENDVPSY : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR256, f256mem, 6907 loadv8f32, int_x86_avx_blendv_ps_256>, VEX_L; 6908} // ExeDomain = SSEPackedSingle 6909defm VPBLENDVB : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR128, i128mem, 6910 loadv2i64, int_x86_sse41_pblendvb>; 6911} 6912 6913let Predicates = [HasAVX2] in { 6914defm VPBLENDVBY : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR256, i256mem, 6915 loadv4i64, int_x86_avx2_pblendvb>, VEX_L; 6916} 6917 6918let Predicates = [HasAVX] in { 6919 def : Pat<(v16i8 (vselect (v16i8 VR128:$mask), (v16i8 VR128:$src1), 6920 (v16i8 VR128:$src2))), 6921 (VPBLENDVBrr VR128:$src2, VR128:$src1, VR128:$mask)>; 6922 def : Pat<(v4i32 (vselect (v4i32 VR128:$mask), (v4i32 VR128:$src1), 6923 (v4i32 VR128:$src2))), 6924 (VBLENDVPSrr VR128:$src2, VR128:$src1, VR128:$mask)>; 6925 def : Pat<(v4f32 (vselect (v4i32 VR128:$mask), (v4f32 VR128:$src1), 6926 (v4f32 VR128:$src2))), 6927 (VBLENDVPSrr VR128:$src2, VR128:$src1, VR128:$mask)>; 6928 def : Pat<(v2i64 (vselect (v2i64 VR128:$mask), (v2i64 VR128:$src1), 6929 (v2i64 VR128:$src2))), 6930 (VBLENDVPDrr VR128:$src2, VR128:$src1, VR128:$mask)>; 6931 def : Pat<(v2f64 (vselect (v2i64 VR128:$mask), (v2f64 VR128:$src1), 6932 (v2f64 VR128:$src2))), 6933 (VBLENDVPDrr VR128:$src2, VR128:$src1, VR128:$mask)>; 6934 def : Pat<(v8i32 (vselect (v8i32 VR256:$mask), (v8i32 VR256:$src1), 6935 (v8i32 VR256:$src2))), 6936 (VBLENDVPSYrr VR256:$src2, VR256:$src1, VR256:$mask)>; 6937 def : Pat<(v8f32 (vselect (v8i32 VR256:$mask), (v8f32 VR256:$src1), 6938 (v8f32 VR256:$src2))), 6939 (VBLENDVPSYrr VR256:$src2, VR256:$src1, VR256:$mask)>; 6940 def : Pat<(v4i64 (vselect (v4i64 VR256:$mask), (v4i64 VR256:$src1), 6941 (v4i64 VR256:$src2))), 6942 (VBLENDVPDYrr VR256:$src2, VR256:$src1, VR256:$mask)>; 6943 def : Pat<(v4f64 (vselect (v4i64 VR256:$mask), (v4f64 VR256:$src1), 6944 (v4f64 VR256:$src2))), 6945 (VBLENDVPDYrr VR256:$src2, VR256:$src1, VR256:$mask)>; 6946 6947 def : Pat<(v8f32 (X86Blendi (v8f32 VR256:$src1), (v8f32 VR256:$src2), 6948 (imm:$mask))), 6949 (VBLENDPSYrri VR256:$src1, VR256:$src2, imm:$mask)>; 6950 def : Pat<(v4f64 (X86Blendi (v4f64 VR256:$src1), (v4f64 VR256:$src2), 6951 (imm:$mask))), 6952 (VBLENDPDYrri VR256:$src1, VR256:$src2, imm:$mask)>; 6953 6954 def : Pat<(v8i16 (X86Blendi (v8i16 VR128:$src1), (v8i16 VR128:$src2), 6955 (imm:$mask))), 6956 (VPBLENDWrri VR128:$src1, VR128:$src2, imm:$mask)>; 6957 def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$src1), (v4f32 VR128:$src2), 6958 (imm:$mask))), 6959 (VBLENDPSrri VR128:$src1, VR128:$src2, imm:$mask)>; 6960 def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$src1), (v2f64 VR128:$src2), 6961 (imm:$mask))), 6962 (VBLENDPDrri VR128:$src1, VR128:$src2, imm:$mask)>; 6963} 6964 6965let Predicates = [HasAVX2] in { 6966 def : Pat<(v32i8 (vselect (v32i8 VR256:$mask), (v32i8 VR256:$src1), 6967 (v32i8 VR256:$src2))), 6968 (VPBLENDVBYrr VR256:$src2, VR256:$src1, VR256:$mask)>; 6969 def : Pat<(v16i16 (X86Blendi (v16i16 VR256:$src1), (v16i16 VR256:$src2), 6970 (imm:$mask))), 6971 (VPBLENDWYrri VR256:$src1, VR256:$src2, imm:$mask)>; 6972} 6973 6974/// SS41I_ternary_int - SSE 4.1 ternary operator 6975let Uses = [XMM0], Constraints = "$src1 = $dst" in { 6976 multiclass SS41I_ternary_int<bits<8> opc, string OpcodeStr, PatFrag mem_frag, 6977 X86MemOperand x86memop, Intrinsic IntId, 6978 OpndItins itins = DEFAULT_ITINS> { 6979 def rr0 : SS48I<opc, MRMSrcReg, (outs VR128:$dst), 6980 (ins VR128:$src1, VR128:$src2), 6981 !strconcat(OpcodeStr, 6982 "\t{$src2, $dst|$dst, $src2}"), 6983 [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2, XMM0))], 6984 itins.rr>, OpSize; 6985 6986 def rm0 : SS48I<opc, MRMSrcMem, (outs VR128:$dst), 6987 (ins VR128:$src1, x86memop:$src2), 6988 !strconcat(OpcodeStr, 6989 "\t{$src2, $dst|$dst, $src2}"), 6990 [(set VR128:$dst, 6991 (IntId VR128:$src1, 6992 (bitconvert (mem_frag addr:$src2)), XMM0))], 6993 itins.rm>, OpSize; 6994 } 6995} 6996 6997let ExeDomain = SSEPackedDouble in 6998defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", memopv2f64, f128mem, 6999 int_x86_sse41_blendvpd>; 7000let ExeDomain = SSEPackedSingle in 7001defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", memopv4f32, f128mem, 7002 int_x86_sse41_blendvps>; 7003defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", memopv2i64, i128mem, 7004 int_x86_sse41_pblendvb>; 7005 7006// Aliases with the implicit xmm0 argument 7007def : InstAlias<"blendvpd\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}", 7008 (BLENDVPDrr0 VR128:$dst, VR128:$src2)>; 7009def : InstAlias<"blendvpd\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}", 7010 (BLENDVPDrm0 VR128:$dst, f128mem:$src2)>; 7011def : InstAlias<"blendvps\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}", 7012 (BLENDVPSrr0 VR128:$dst, VR128:$src2)>; 7013def : InstAlias<"blendvps\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}", 7014 (BLENDVPSrm0 VR128:$dst, f128mem:$src2)>; 7015def : InstAlias<"pblendvb\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}", 7016 (PBLENDVBrr0 VR128:$dst, VR128:$src2)>; 7017def : InstAlias<"pblendvb\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}", 7018 (PBLENDVBrm0 VR128:$dst, i128mem:$src2)>; 7019 7020let Predicates = [UseSSE41] in { 7021 def : Pat<(v16i8 (vselect (v16i8 XMM0), (v16i8 VR128:$src1), 7022 (v16i8 VR128:$src2))), 7023 (PBLENDVBrr0 VR128:$src2, VR128:$src1)>; 7024 def : Pat<(v4i32 (vselect (v4i32 XMM0), (v4i32 VR128:$src1), 7025 (v4i32 VR128:$src2))), 7026 (BLENDVPSrr0 VR128:$src2, VR128:$src1)>; 7027 def : Pat<(v4f32 (vselect (v4i32 XMM0), (v4f32 VR128:$src1), 7028 (v4f32 VR128:$src2))), 7029 (BLENDVPSrr0 VR128:$src2, VR128:$src1)>; 7030 def : Pat<(v2i64 (vselect (v2i64 XMM0), (v2i64 VR128:$src1), 7031 (v2i64 VR128:$src2))), 7032 (BLENDVPDrr0 VR128:$src2, VR128:$src1)>; 7033 def : Pat<(v2f64 (vselect (v2i64 XMM0), (v2f64 VR128:$src1), 7034 (v2f64 VR128:$src2))), 7035 (BLENDVPDrr0 VR128:$src2, VR128:$src1)>; 7036 7037 def : Pat<(v8i16 (X86Blendi (v8i16 VR128:$src1), (v8i16 VR128:$src2), 7038 (imm:$mask))), 7039 (PBLENDWrri VR128:$src1, VR128:$src2, imm:$mask)>; 7040 def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$src1), (v4f32 VR128:$src2), 7041 (imm:$mask))), 7042 (BLENDPSrri VR128:$src1, VR128:$src2, imm:$mask)>; 7043 def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$src1), (v2f64 VR128:$src2), 7044 (imm:$mask))), 7045 (BLENDPDrri VR128:$src1, VR128:$src2, imm:$mask)>; 7046 7047} 7048 7049let Predicates = [HasAVX] in 7050def VMOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 7051 "vmovntdqa\t{$src, $dst|$dst, $src}", 7052 [(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>, 7053 OpSize, VEX; 7054let Predicates = [HasAVX2] in 7055def VMOVNTDQAYrm : SS48I<0x2A, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), 7056 "vmovntdqa\t{$src, $dst|$dst, $src}", 7057 [(set VR256:$dst, (int_x86_avx2_movntdqa addr:$src))]>, 7058 OpSize, VEX, VEX_L; 7059def MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 7060 "movntdqa\t{$src, $dst|$dst, $src}", 7061 [(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>, 7062 OpSize; 7063 7064//===----------------------------------------------------------------------===// 7065// SSE4.2 - Compare Instructions 7066//===----------------------------------------------------------------------===// 7067 7068/// SS42I_binop_rm - Simple SSE 4.2 binary operator 7069multiclass SS42I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 7070 ValueType OpVT, RegisterClass RC, PatFrag memop_frag, 7071 X86MemOperand x86memop, bit Is2Addr = 1> { 7072 def rr : SS428I<opc, MRMSrcReg, (outs RC:$dst), 7073 (ins RC:$src1, RC:$src2), 7074 !if(Is2Addr, 7075 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 7076 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 7077 [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>, 7078 OpSize; 7079 def rm : SS428I<opc, MRMSrcMem, (outs RC:$dst), 7080 (ins RC:$src1, x86memop:$src2), 7081 !if(Is2Addr, 7082 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 7083 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 7084 [(set RC:$dst, 7085 (OpVT (OpNode RC:$src1, (memop_frag addr:$src2))))]>, OpSize; 7086} 7087 7088let Predicates = [HasAVX] in 7089 defm VPCMPGTQ : SS42I_binop_rm<0x37, "vpcmpgtq", X86pcmpgt, v2i64, VR128, 7090 loadv2i64, i128mem, 0>, VEX_4V; 7091 7092let Predicates = [HasAVX2] in 7093 defm VPCMPGTQY : SS42I_binop_rm<0x37, "vpcmpgtq", X86pcmpgt, v4i64, VR256, 7094 loadv4i64, i256mem, 0>, VEX_4V, VEX_L; 7095 7096let Constraints = "$src1 = $dst" in 7097 defm PCMPGTQ : SS42I_binop_rm<0x37, "pcmpgtq", X86pcmpgt, v2i64, VR128, 7098 memopv2i64, i128mem>; 7099 7100//===----------------------------------------------------------------------===// 7101// SSE4.2 - String/text Processing Instructions 7102//===----------------------------------------------------------------------===// 7103 7104// Packed Compare Implicit Length Strings, Return Mask 7105multiclass pseudo_pcmpistrm<string asm> { 7106 def REG : PseudoI<(outs VR128:$dst), 7107 (ins VR128:$src1, VR128:$src2, i8imm:$src3), 7108 [(set VR128:$dst, (int_x86_sse42_pcmpistrm128 VR128:$src1, VR128:$src2, 7109 imm:$src3))]>; 7110 def MEM : PseudoI<(outs VR128:$dst), 7111 (ins VR128:$src1, i128mem:$src2, i8imm:$src3), 7112 [(set VR128:$dst, (int_x86_sse42_pcmpistrm128 VR128:$src1, 7113 (bc_v16i8 (memopv2i64 addr:$src2)), imm:$src3))]>; 7114} 7115 7116let Defs = [EFLAGS], usesCustomInserter = 1 in { 7117 defm VPCMPISTRM128 : pseudo_pcmpistrm<"#VPCMPISTRM128">, Requires<[HasAVX]>; 7118 defm PCMPISTRM128 : pseudo_pcmpistrm<"#PCMPISTRM128">, Requires<[UseSSE42]>; 7119} 7120 7121multiclass pcmpistrm_SS42AI<string asm> { 7122 def rr : SS42AI<0x62, MRMSrcReg, (outs), 7123 (ins VR128:$src1, VR128:$src2, i8imm:$src3), 7124 !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), 7125 []>, OpSize; 7126 let mayLoad = 1 in 7127 def rm :SS42AI<0x62, MRMSrcMem, (outs), 7128 (ins VR128:$src1, i128mem:$src2, i8imm:$src3), 7129 !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), 7130 []>, OpSize; 7131} 7132 7133let Defs = [XMM0, EFLAGS], neverHasSideEffects = 1 in { 7134 let Predicates = [HasAVX] in 7135 defm VPCMPISTRM128 : pcmpistrm_SS42AI<"vpcmpistrm">, VEX; 7136 defm PCMPISTRM128 : pcmpistrm_SS42AI<"pcmpistrm"> ; 7137} 7138 7139// Packed Compare Explicit Length Strings, Return Mask 7140multiclass pseudo_pcmpestrm<string asm> { 7141 def REG : PseudoI<(outs VR128:$dst), 7142 (ins VR128:$src1, VR128:$src3, i8imm:$src5), 7143 [(set VR128:$dst, (int_x86_sse42_pcmpestrm128 7144 VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5))]>; 7145 def MEM : PseudoI<(outs VR128:$dst), 7146 (ins VR128:$src1, i128mem:$src3, i8imm:$src5), 7147 [(set VR128:$dst, (int_x86_sse42_pcmpestrm128 VR128:$src1, EAX, 7148 (bc_v16i8 (memopv2i64 addr:$src3)), EDX, imm:$src5))]>; 7149} 7150 7151let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in { 7152 defm VPCMPESTRM128 : pseudo_pcmpestrm<"#VPCMPESTRM128">, Requires<[HasAVX]>; 7153 defm PCMPESTRM128 : pseudo_pcmpestrm<"#PCMPESTRM128">, Requires<[UseSSE42]>; 7154} 7155 7156multiclass SS42AI_pcmpestrm<string asm> { 7157 def rr : SS42AI<0x60, MRMSrcReg, (outs), 7158 (ins VR128:$src1, VR128:$src3, i8imm:$src5), 7159 !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), 7160 []>, OpSize; 7161 let mayLoad = 1 in 7162 def rm : SS42AI<0x60, MRMSrcMem, (outs), 7163 (ins VR128:$src1, i128mem:$src3, i8imm:$src5), 7164 !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), 7165 []>, OpSize; 7166} 7167 7168let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX], neverHasSideEffects = 1 in { 7169 let Predicates = [HasAVX] in 7170 defm VPCMPESTRM128 : SS42AI_pcmpestrm<"vpcmpestrm">, VEX; 7171 defm PCMPESTRM128 : SS42AI_pcmpestrm<"pcmpestrm">; 7172} 7173 7174// Packed Compare Implicit Length Strings, Return Index 7175multiclass pseudo_pcmpistri<string asm> { 7176 def REG : PseudoI<(outs GR32:$dst), 7177 (ins VR128:$src1, VR128:$src2, i8imm:$src3), 7178 [(set GR32:$dst, EFLAGS, 7179 (X86pcmpistri VR128:$src1, VR128:$src2, imm:$src3))]>; 7180 def MEM : PseudoI<(outs GR32:$dst), 7181 (ins VR128:$src1, i128mem:$src2, i8imm:$src3), 7182 [(set GR32:$dst, EFLAGS, (X86pcmpistri VR128:$src1, 7183 (bc_v16i8 (memopv2i64 addr:$src2)), imm:$src3))]>; 7184} 7185 7186let Defs = [EFLAGS], usesCustomInserter = 1 in { 7187 defm VPCMPISTRI : pseudo_pcmpistri<"#VPCMPISTRI">, Requires<[HasAVX]>; 7188 defm PCMPISTRI : pseudo_pcmpistri<"#PCMPISTRI">, Requires<[UseSSE42]>; 7189} 7190 7191multiclass SS42AI_pcmpistri<string asm> { 7192 def rr : SS42AI<0x63, MRMSrcReg, (outs), 7193 (ins VR128:$src1, VR128:$src2, i8imm:$src3), 7194 !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), 7195 []>, OpSize; 7196 let mayLoad = 1 in 7197 def rm : SS42AI<0x63, MRMSrcMem, (outs), 7198 (ins VR128:$src1, i128mem:$src2, i8imm:$src3), 7199 !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), 7200 []>, OpSize; 7201} 7202 7203let Defs = [ECX, EFLAGS], neverHasSideEffects = 1 in { 7204 let Predicates = [HasAVX] in 7205 defm VPCMPISTRI : SS42AI_pcmpistri<"vpcmpistri">, VEX; 7206 defm PCMPISTRI : SS42AI_pcmpistri<"pcmpistri">; 7207} 7208 7209// Packed Compare Explicit Length Strings, Return Index 7210multiclass pseudo_pcmpestri<string asm> { 7211 def REG : PseudoI<(outs GR32:$dst), 7212 (ins VR128:$src1, VR128:$src3, i8imm:$src5), 7213 [(set GR32:$dst, EFLAGS, 7214 (X86pcmpestri VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5))]>; 7215 def MEM : PseudoI<(outs GR32:$dst), 7216 (ins VR128:$src1, i128mem:$src3, i8imm:$src5), 7217 [(set GR32:$dst, EFLAGS, 7218 (X86pcmpestri VR128:$src1, EAX, (bc_v16i8 (memopv2i64 addr:$src3)), EDX, 7219 imm:$src5))]>; 7220} 7221 7222let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in { 7223 defm VPCMPESTRI : pseudo_pcmpestri<"#VPCMPESTRI">, Requires<[HasAVX]>; 7224 defm PCMPESTRI : pseudo_pcmpestri<"#PCMPESTRI">, Requires<[UseSSE42]>; 7225} 7226 7227multiclass SS42AI_pcmpestri<string asm> { 7228 def rr : SS42AI<0x61, MRMSrcReg, (outs), 7229 (ins VR128:$src1, VR128:$src3, i8imm:$src5), 7230 !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), 7231 []>, OpSize; 7232 let mayLoad = 1 in 7233 def rm : SS42AI<0x61, MRMSrcMem, (outs), 7234 (ins VR128:$src1, i128mem:$src3, i8imm:$src5), 7235 !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), 7236 []>, OpSize; 7237} 7238 7239let Defs = [ECX, EFLAGS], Uses = [EAX, EDX], neverHasSideEffects = 1 in { 7240 let Predicates = [HasAVX] in 7241 defm VPCMPESTRI : SS42AI_pcmpestri<"vpcmpestri">, VEX; 7242 defm PCMPESTRI : SS42AI_pcmpestri<"pcmpestri">; 7243} 7244 7245//===----------------------------------------------------------------------===// 7246// SSE4.2 - CRC Instructions 7247//===----------------------------------------------------------------------===// 7248 7249// No CRC instructions have AVX equivalents 7250 7251// crc intrinsic instruction 7252// This set of instructions are only rm, the only difference is the size 7253// of r and m. 7254class SS42I_crc32r<bits<8> opc, string asm, RegisterClass RCOut, 7255 RegisterClass RCIn, SDPatternOperator Int> : 7256 SS42FI<opc, MRMSrcReg, (outs RCOut:$dst), (ins RCOut:$src1, RCIn:$src2), 7257 !strconcat(asm, "\t{$src2, $src1|$src1, $src2}"), 7258 [(set RCOut:$dst, (Int RCOut:$src1, RCIn:$src2))], IIC_CRC32_REG>; 7259 7260class SS42I_crc32m<bits<8> opc, string asm, RegisterClass RCOut, 7261 X86MemOperand x86memop, SDPatternOperator Int> : 7262 SS42FI<opc, MRMSrcMem, (outs RCOut:$dst), (ins RCOut:$src1, x86memop:$src2), 7263 !strconcat(asm, "\t{$src2, $src1|$src1, $src2}"), 7264 [(set RCOut:$dst, (Int RCOut:$src1, (load addr:$src2)))], 7265 IIC_CRC32_MEM>; 7266 7267let Constraints = "$src1 = $dst" in { 7268 def CRC32r32m8 : SS42I_crc32m<0xF0, "crc32{b}", GR32, i8mem, 7269 int_x86_sse42_crc32_32_8>; 7270 def CRC32r32r8 : SS42I_crc32r<0xF0, "crc32{b}", GR32, GR8, 7271 int_x86_sse42_crc32_32_8>; 7272 def CRC32r32m16 : SS42I_crc32m<0xF1, "crc32{w}", GR32, i16mem, 7273 int_x86_sse42_crc32_32_16>, OpSize; 7274 def CRC32r32r16 : SS42I_crc32r<0xF1, "crc32{w}", GR32, GR16, 7275 int_x86_sse42_crc32_32_16>, OpSize; 7276 def CRC32r32m32 : SS42I_crc32m<0xF1, "crc32{l}", GR32, i32mem, 7277 int_x86_sse42_crc32_32_32>; 7278 def CRC32r32r32 : SS42I_crc32r<0xF1, "crc32{l}", GR32, GR32, 7279 int_x86_sse42_crc32_32_32>; 7280 def CRC32r64m64 : SS42I_crc32m<0xF1, "crc32{q}", GR64, i64mem, 7281 int_x86_sse42_crc32_64_64>, REX_W; 7282 def CRC32r64r64 : SS42I_crc32r<0xF1, "crc32{q}", GR64, GR64, 7283 int_x86_sse42_crc32_64_64>, REX_W; 7284 let hasSideEffects = 0 in { 7285 let mayLoad = 1 in 7286 def CRC32r64m8 : SS42I_crc32m<0xF0, "crc32{b}", GR64, i8mem, 7287 null_frag>, REX_W; 7288 def CRC32r64r8 : SS42I_crc32r<0xF0, "crc32{b}", GR64, GR8, 7289 null_frag>, REX_W; 7290 } 7291} 7292 7293//===----------------------------------------------------------------------===// 7294// SHA-NI Instructions 7295//===----------------------------------------------------------------------===// 7296 7297multiclass SHAI_binop<bits<8> Opc, string OpcodeStr, Intrinsic IntId, 7298 bit UsesXMM0 = 0> { 7299 def rr : I<Opc, MRMSrcReg, (outs VR128:$dst), 7300 (ins VR128:$src1, VR128:$src2), 7301 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 7302 [!if(UsesXMM0, 7303 (set VR128:$dst, (IntId VR128:$src1, VR128:$src2, XMM0)), 7304 (set VR128:$dst, (IntId VR128:$src1, VR128:$src2)))]>, T8; 7305 7306 def rm : I<Opc, MRMSrcMem, (outs VR128:$dst), 7307 (ins VR128:$src1, i128mem:$src2), 7308 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 7309 [!if(UsesXMM0, 7310 (set VR128:$dst, (IntId VR128:$src1, 7311 (bc_v4i32 (memopv2i64 addr:$src2)), XMM0)), 7312 (set VR128:$dst, (IntId VR128:$src1, 7313 (bc_v4i32 (memopv2i64 addr:$src2)))))]>, T8; 7314} 7315 7316let Constraints = "$src1 = $dst", Predicates = [HasSHA] in { 7317 def SHA1RNDS4rri : Ii8<0xCC, MRMSrcReg, (outs VR128:$dst), 7318 (ins VR128:$src1, VR128:$src2, i8imm:$src3), 7319 "sha1rnds4\t{$src3, $src2, $dst|$dst, $src2, $src3}", 7320 [(set VR128:$dst, 7321 (int_x86_sha1rnds4 VR128:$src1, VR128:$src2, 7322 (i8 imm:$src3)))]>, TA; 7323 def SHA1RNDS4rmi : Ii8<0xCC, MRMSrcMem, (outs VR128:$dst), 7324 (ins VR128:$src1, i128mem:$src2, i8imm:$src3), 7325 "sha1rnds4\t{$src3, $src2, $dst|$dst, $src2, $src3}", 7326 [(set VR128:$dst, 7327 (int_x86_sha1rnds4 VR128:$src1, 7328 (bc_v4i32 (memopv2i64 addr:$src2)), 7329 (i8 imm:$src3)))]>, TA; 7330 7331 defm SHA1NEXTE : SHAI_binop<0xC8, "sha1nexte", int_x86_sha1nexte>; 7332 defm SHA1MSG1 : SHAI_binop<0xC9, "sha1msg1", int_x86_sha1msg1>; 7333 defm SHA1MSG2 : SHAI_binop<0xCA, "sha1msg2", int_x86_sha1msg2>; 7334 7335 let Uses=[XMM0] in 7336 defm SHA256RNDS2 : SHAI_binop<0xCB, "sha256rnds2", int_x86_sha256rnds2, 1>; 7337 7338 defm SHA256MSG1 : SHAI_binop<0xCC, "sha256msg1", int_x86_sha256msg1>; 7339 defm SHA256MSG2 : SHAI_binop<0xCD, "sha256msg2", int_x86_sha256msg2>; 7340} 7341 7342// Aliases with explicit %xmm0 7343def : InstAlias<"sha256rnds2\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}", 7344 (SHA256RNDS2rr VR128:$dst, VR128:$src2)>; 7345def : InstAlias<"sha256rnds2\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}", 7346 (SHA256RNDS2rm VR128:$dst, i128mem:$src2)>; 7347 7348//===----------------------------------------------------------------------===// 7349// AES-NI Instructions 7350//===----------------------------------------------------------------------===// 7351 7352multiclass AESI_binop_rm_int<bits<8> opc, string OpcodeStr, 7353 Intrinsic IntId128, bit Is2Addr = 1> { 7354 def rr : AES8I<opc, MRMSrcReg, (outs VR128:$dst), 7355 (ins VR128:$src1, VR128:$src2), 7356 !if(Is2Addr, 7357 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 7358 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 7359 [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>, 7360 OpSize; 7361 def rm : AES8I<opc, MRMSrcMem, (outs VR128:$dst), 7362 (ins VR128:$src1, i128mem:$src2), 7363 !if(Is2Addr, 7364 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 7365 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 7366 [(set VR128:$dst, 7367 (IntId128 VR128:$src1, (memopv2i64 addr:$src2)))]>, OpSize; 7368} 7369 7370// Perform One Round of an AES Encryption/Decryption Flow 7371let Predicates = [HasAVX, HasAES] in { 7372 defm VAESENC : AESI_binop_rm_int<0xDC, "vaesenc", 7373 int_x86_aesni_aesenc, 0>, VEX_4V; 7374 defm VAESENCLAST : AESI_binop_rm_int<0xDD, "vaesenclast", 7375 int_x86_aesni_aesenclast, 0>, VEX_4V; 7376 defm VAESDEC : AESI_binop_rm_int<0xDE, "vaesdec", 7377 int_x86_aesni_aesdec, 0>, VEX_4V; 7378 defm VAESDECLAST : AESI_binop_rm_int<0xDF, "vaesdeclast", 7379 int_x86_aesni_aesdeclast, 0>, VEX_4V; 7380} 7381 7382let Constraints = "$src1 = $dst" in { 7383 defm AESENC : AESI_binop_rm_int<0xDC, "aesenc", 7384 int_x86_aesni_aesenc>; 7385 defm AESENCLAST : AESI_binop_rm_int<0xDD, "aesenclast", 7386 int_x86_aesni_aesenclast>; 7387 defm AESDEC : AESI_binop_rm_int<0xDE, "aesdec", 7388 int_x86_aesni_aesdec>; 7389 defm AESDECLAST : AESI_binop_rm_int<0xDF, "aesdeclast", 7390 int_x86_aesni_aesdeclast>; 7391} 7392 7393// Perform the AES InvMixColumn Transformation 7394let Predicates = [HasAVX, HasAES] in { 7395 def VAESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst), 7396 (ins VR128:$src1), 7397 "vaesimc\t{$src1, $dst|$dst, $src1}", 7398 [(set VR128:$dst, 7399 (int_x86_aesni_aesimc VR128:$src1))]>, 7400 OpSize, VEX; 7401 def VAESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst), 7402 (ins i128mem:$src1), 7403 "vaesimc\t{$src1, $dst|$dst, $src1}", 7404 [(set VR128:$dst, (int_x86_aesni_aesimc (loadv2i64 addr:$src1)))]>, 7405 OpSize, VEX; 7406} 7407def AESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst), 7408 (ins VR128:$src1), 7409 "aesimc\t{$src1, $dst|$dst, $src1}", 7410 [(set VR128:$dst, 7411 (int_x86_aesni_aesimc VR128:$src1))]>, 7412 OpSize; 7413def AESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst), 7414 (ins i128mem:$src1), 7415 "aesimc\t{$src1, $dst|$dst, $src1}", 7416 [(set VR128:$dst, (int_x86_aesni_aesimc (memopv2i64 addr:$src1)))]>, 7417 OpSize; 7418 7419// AES Round Key Generation Assist 7420let Predicates = [HasAVX, HasAES] in { 7421 def VAESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst), 7422 (ins VR128:$src1, i8imm:$src2), 7423 "vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", 7424 [(set VR128:$dst, 7425 (int_x86_aesni_aeskeygenassist VR128:$src1, imm:$src2))]>, 7426 OpSize, VEX; 7427 def VAESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst), 7428 (ins i128mem:$src1, i8imm:$src2), 7429 "vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", 7430 [(set VR128:$dst, 7431 (int_x86_aesni_aeskeygenassist (loadv2i64 addr:$src1), imm:$src2))]>, 7432 OpSize, VEX; 7433} 7434def AESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst), 7435 (ins VR128:$src1, i8imm:$src2), 7436 "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", 7437 [(set VR128:$dst, 7438 (int_x86_aesni_aeskeygenassist VR128:$src1, imm:$src2))]>, 7439 OpSize; 7440def AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst), 7441 (ins i128mem:$src1, i8imm:$src2), 7442 "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", 7443 [(set VR128:$dst, 7444 (int_x86_aesni_aeskeygenassist (memopv2i64 addr:$src1), imm:$src2))]>, 7445 OpSize; 7446 7447//===----------------------------------------------------------------------===// 7448// PCLMUL Instructions 7449//===----------------------------------------------------------------------===// 7450 7451// AVX carry-less Multiplication instructions 7452def VPCLMULQDQrr : AVXPCLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst), 7453 (ins VR128:$src1, VR128:$src2, i8imm:$src3), 7454 "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 7455 [(set VR128:$dst, 7456 (int_x86_pclmulqdq VR128:$src1, VR128:$src2, imm:$src3))]>; 7457 7458def VPCLMULQDQrm : AVXPCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst), 7459 (ins VR128:$src1, i128mem:$src2, i8imm:$src3), 7460 "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 7461 [(set VR128:$dst, (int_x86_pclmulqdq VR128:$src1, 7462 (loadv2i64 addr:$src2), imm:$src3))]>; 7463 7464// Carry-less Multiplication instructions 7465let Constraints = "$src1 = $dst" in { 7466def PCLMULQDQrr : PCLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst), 7467 (ins VR128:$src1, VR128:$src2, i8imm:$src3), 7468 "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}", 7469 [(set VR128:$dst, 7470 (int_x86_pclmulqdq VR128:$src1, VR128:$src2, imm:$src3))], 7471 IIC_SSE_PCLMULQDQ_RR>; 7472 7473def PCLMULQDQrm : PCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst), 7474 (ins VR128:$src1, i128mem:$src2, i8imm:$src3), 7475 "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}", 7476 [(set VR128:$dst, (int_x86_pclmulqdq VR128:$src1, 7477 (memopv2i64 addr:$src2), imm:$src3))], 7478 IIC_SSE_PCLMULQDQ_RM>; 7479} // Constraints = "$src1 = $dst" 7480 7481 7482multiclass pclmul_alias<string asm, int immop> { 7483 def : InstAlias<!strconcat("pclmul", asm, "dq {$src, $dst|$dst, $src}"), 7484 (PCLMULQDQrr VR128:$dst, VR128:$src, immop)>; 7485 7486 def : InstAlias<!strconcat("pclmul", asm, "dq {$src, $dst|$dst, $src}"), 7487 (PCLMULQDQrm VR128:$dst, i128mem:$src, immop)>; 7488 7489 def : InstAlias<!strconcat("vpclmul", asm, 7490 "dq {$src2, $src1, $dst|$dst, $src1, $src2}"), 7491 (VPCLMULQDQrr VR128:$dst, VR128:$src1, VR128:$src2, immop)>; 7492 7493 def : InstAlias<!strconcat("vpclmul", asm, 7494 "dq {$src2, $src1, $dst|$dst, $src1, $src2}"), 7495 (VPCLMULQDQrm VR128:$dst, VR128:$src1, i128mem:$src2, immop)>; 7496} 7497defm : pclmul_alias<"hqhq", 0x11>; 7498defm : pclmul_alias<"hqlq", 0x01>; 7499defm : pclmul_alias<"lqhq", 0x10>; 7500defm : pclmul_alias<"lqlq", 0x00>; 7501 7502//===----------------------------------------------------------------------===// 7503// SSE4A Instructions 7504//===----------------------------------------------------------------------===// 7505 7506let Predicates = [HasSSE4A] in { 7507 7508let Constraints = "$src = $dst" in { 7509def EXTRQI : Ii8<0x78, MRM0r, (outs VR128:$dst), 7510 (ins VR128:$src, i8imm:$len, i8imm:$idx), 7511 "extrq\t{$idx, $len, $src|$src, $len, $idx}", 7512 [(set VR128:$dst, (int_x86_sse4a_extrqi VR128:$src, imm:$len, 7513 imm:$idx))]>, TB, OpSize; 7514def EXTRQ : I<0x79, MRMSrcReg, (outs VR128:$dst), 7515 (ins VR128:$src, VR128:$mask), 7516 "extrq\t{$mask, $src|$src, $mask}", 7517 [(set VR128:$dst, (int_x86_sse4a_extrq VR128:$src, 7518 VR128:$mask))]>, TB, OpSize; 7519 7520def INSERTQI : Ii8<0x78, MRMSrcReg, (outs VR128:$dst), 7521 (ins VR128:$src, VR128:$src2, i8imm:$len, i8imm:$idx), 7522 "insertq\t{$idx, $len, $src2, $src|$src, $src2, $len, $idx}", 7523 [(set VR128:$dst, (int_x86_sse4a_insertqi VR128:$src, 7524 VR128:$src2, imm:$len, imm:$idx))]>, XD; 7525def INSERTQ : I<0x79, MRMSrcReg, (outs VR128:$dst), 7526 (ins VR128:$src, VR128:$mask), 7527 "insertq\t{$mask, $src|$src, $mask}", 7528 [(set VR128:$dst, (int_x86_sse4a_insertq VR128:$src, 7529 VR128:$mask))]>, XD; 7530} 7531 7532def MOVNTSS : I<0x2B, MRMDestMem, (outs), (ins f32mem:$dst, VR128:$src), 7533 "movntss\t{$src, $dst|$dst, $src}", 7534 [(int_x86_sse4a_movnt_ss addr:$dst, VR128:$src)]>, XS; 7535 7536def MOVNTSD : I<0x2B, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 7537 "movntsd\t{$src, $dst|$dst, $src}", 7538 [(int_x86_sse4a_movnt_sd addr:$dst, VR128:$src)]>, XD; 7539} 7540 7541//===----------------------------------------------------------------------===// 7542// AVX Instructions 7543//===----------------------------------------------------------------------===// 7544 7545//===----------------------------------------------------------------------===// 7546// VBROADCAST - Load from memory and broadcast to all elements of the 7547// destination operand 7548// 7549class avx_broadcast<bits<8> opc, string OpcodeStr, RegisterClass RC, 7550 X86MemOperand x86memop, Intrinsic Int> : 7551 AVX8I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), 7552 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 7553 [(set RC:$dst, (Int addr:$src))]>, VEX; 7554 7555// AVX2 adds register forms 7556class avx2_broadcast_reg<bits<8> opc, string OpcodeStr, RegisterClass RC, 7557 Intrinsic Int> : 7558 AVX28I<opc, MRMSrcReg, (outs RC:$dst), (ins VR128:$src), 7559 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 7560 [(set RC:$dst, (Int VR128:$src))]>, VEX; 7561 7562let ExeDomain = SSEPackedSingle in { 7563 def VBROADCASTSSrm : avx_broadcast<0x18, "vbroadcastss", VR128, f32mem, 7564 int_x86_avx_vbroadcast_ss>; 7565 def VBROADCASTSSYrm : avx_broadcast<0x18, "vbroadcastss", VR256, f32mem, 7566 int_x86_avx_vbroadcast_ss_256>, VEX_L; 7567} 7568let ExeDomain = SSEPackedDouble in 7569def VBROADCASTSDYrm : avx_broadcast<0x19, "vbroadcastsd", VR256, f64mem, 7570 int_x86_avx_vbroadcast_sd_256>, VEX_L; 7571def VBROADCASTF128 : avx_broadcast<0x1A, "vbroadcastf128", VR256, f128mem, 7572 int_x86_avx_vbroadcastf128_pd_256>, VEX_L; 7573 7574let ExeDomain = SSEPackedSingle in { 7575 def VBROADCASTSSrr : avx2_broadcast_reg<0x18, "vbroadcastss", VR128, 7576 int_x86_avx2_vbroadcast_ss_ps>; 7577 def VBROADCASTSSYrr : avx2_broadcast_reg<0x18, "vbroadcastss", VR256, 7578 int_x86_avx2_vbroadcast_ss_ps_256>, VEX_L; 7579} 7580let ExeDomain = SSEPackedDouble in 7581def VBROADCASTSDYrr : avx2_broadcast_reg<0x19, "vbroadcastsd", VR256, 7582 int_x86_avx2_vbroadcast_sd_pd_256>, VEX_L; 7583 7584let Predicates = [HasAVX2] in 7585def VBROADCASTI128 : avx_broadcast<0x5A, "vbroadcasti128", VR256, i128mem, 7586 int_x86_avx2_vbroadcasti128>, VEX_L; 7587 7588let Predicates = [HasAVX] in 7589def : Pat<(int_x86_avx_vbroadcastf128_ps_256 addr:$src), 7590 (VBROADCASTF128 addr:$src)>; 7591 7592 7593//===----------------------------------------------------------------------===// 7594// VINSERTF128 - Insert packed floating-point values 7595// 7596let neverHasSideEffects = 1, ExeDomain = SSEPackedSingle in { 7597def VINSERTF128rr : AVXAIi8<0x18, MRMSrcReg, (outs VR256:$dst), 7598 (ins VR256:$src1, VR128:$src2, i8imm:$src3), 7599 "vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 7600 []>, VEX_4V, VEX_L; 7601let mayLoad = 1 in 7602def VINSERTF128rm : AVXAIi8<0x18, MRMSrcMem, (outs VR256:$dst), 7603 (ins VR256:$src1, f128mem:$src2, i8imm:$src3), 7604 "vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 7605 []>, VEX_4V, VEX_L; 7606} 7607 7608let Predicates = [HasAVX] in { 7609def : Pat<(vinsert128_insert:$ins (v8f32 VR256:$src1), (v4f32 VR128:$src2), 7610 (iPTR imm)), 7611 (VINSERTF128rr VR256:$src1, VR128:$src2, 7612 (INSERT_get_vinsert128_imm VR256:$ins))>; 7613def : Pat<(vinsert128_insert:$ins (v4f64 VR256:$src1), (v2f64 VR128:$src2), 7614 (iPTR imm)), 7615 (VINSERTF128rr VR256:$src1, VR128:$src2, 7616 (INSERT_get_vinsert128_imm VR256:$ins))>; 7617 7618def : Pat<(vinsert128_insert:$ins (v8f32 VR256:$src1), (loadv4f32 addr:$src2), 7619 (iPTR imm)), 7620 (VINSERTF128rm VR256:$src1, addr:$src2, 7621 (INSERT_get_vinsert128_imm VR256:$ins))>; 7622def : Pat<(vinsert128_insert:$ins (v4f64 VR256:$src1), (loadv2f64 addr:$src2), 7623 (iPTR imm)), 7624 (VINSERTF128rm VR256:$src1, addr:$src2, 7625 (INSERT_get_vinsert128_imm VR256:$ins))>; 7626} 7627 7628let Predicates = [HasAVX1Only] in { 7629def : Pat<(vinsert128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2), 7630 (iPTR imm)), 7631 (VINSERTF128rr VR256:$src1, VR128:$src2, 7632 (INSERT_get_vinsert128_imm VR256:$ins))>; 7633def : Pat<(vinsert128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2), 7634 (iPTR imm)), 7635 (VINSERTF128rr VR256:$src1, VR128:$src2, 7636 (INSERT_get_vinsert128_imm VR256:$ins))>; 7637def : Pat<(vinsert128_insert:$ins (v32i8 VR256:$src1), (v16i8 VR128:$src2), 7638 (iPTR imm)), 7639 (VINSERTF128rr VR256:$src1, VR128:$src2, 7640 (INSERT_get_vinsert128_imm VR256:$ins))>; 7641def : Pat<(vinsert128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2), 7642 (iPTR imm)), 7643 (VINSERTF128rr VR256:$src1, VR128:$src2, 7644 (INSERT_get_vinsert128_imm VR256:$ins))>; 7645 7646def : Pat<(vinsert128_insert:$ins (v4i64 VR256:$src1), (loadv2i64 addr:$src2), 7647 (iPTR imm)), 7648 (VINSERTF128rm VR256:$src1, addr:$src2, 7649 (INSERT_get_vinsert128_imm VR256:$ins))>; 7650def : Pat<(vinsert128_insert:$ins (v8i32 VR256:$src1), 7651 (bc_v4i32 (loadv2i64 addr:$src2)), 7652 (iPTR imm)), 7653 (VINSERTF128rm VR256:$src1, addr:$src2, 7654 (INSERT_get_vinsert128_imm VR256:$ins))>; 7655def : Pat<(vinsert128_insert:$ins (v32i8 VR256:$src1), 7656 (bc_v16i8 (loadv2i64 addr:$src2)), 7657 (iPTR imm)), 7658 (VINSERTF128rm VR256:$src1, addr:$src2, 7659 (INSERT_get_vinsert128_imm VR256:$ins))>; 7660def : Pat<(vinsert128_insert:$ins (v16i16 VR256:$src1), 7661 (bc_v8i16 (loadv2i64 addr:$src2)), 7662 (iPTR imm)), 7663 (VINSERTF128rm VR256:$src1, addr:$src2, 7664 (INSERT_get_vinsert128_imm VR256:$ins))>; 7665} 7666 7667//===----------------------------------------------------------------------===// 7668// VEXTRACTF128 - Extract packed floating-point values 7669// 7670let neverHasSideEffects = 1, ExeDomain = SSEPackedSingle in { 7671def VEXTRACTF128rr : AVXAIi8<0x19, MRMDestReg, (outs VR128:$dst), 7672 (ins VR256:$src1, i8imm:$src2), 7673 "vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}", 7674 []>, VEX, VEX_L; 7675let mayStore = 1 in 7676def VEXTRACTF128mr : AVXAIi8<0x19, MRMDestMem, (outs), 7677 (ins f128mem:$dst, VR256:$src1, i8imm:$src2), 7678 "vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}", 7679 []>, VEX, VEX_L; 7680} 7681 7682// AVX1 patterns 7683let Predicates = [HasAVX] in { 7684def : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)), 7685 (v4f32 (VEXTRACTF128rr 7686 (v8f32 VR256:$src1), 7687 (EXTRACT_get_vextract128_imm VR128:$ext)))>; 7688def : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)), 7689 (v2f64 (VEXTRACTF128rr 7690 (v4f64 VR256:$src1), 7691 (EXTRACT_get_vextract128_imm VR128:$ext)))>; 7692 7693def : Pat<(store (v4f32 (vextract128_extract:$ext (v8f32 VR256:$src1), 7694 (iPTR imm))), addr:$dst), 7695 (VEXTRACTF128mr addr:$dst, VR256:$src1, 7696 (EXTRACT_get_vextract128_imm VR128:$ext))>; 7697def : Pat<(store (v2f64 (vextract128_extract:$ext (v4f64 VR256:$src1), 7698 (iPTR imm))), addr:$dst), 7699 (VEXTRACTF128mr addr:$dst, VR256:$src1, 7700 (EXTRACT_get_vextract128_imm VR128:$ext))>; 7701} 7702 7703let Predicates = [HasAVX1Only] in { 7704def : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)), 7705 (v2i64 (VEXTRACTF128rr 7706 (v4i64 VR256:$src1), 7707 (EXTRACT_get_vextract128_imm VR128:$ext)))>; 7708def : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)), 7709 (v4i32 (VEXTRACTF128rr 7710 (v8i32 VR256:$src1), 7711 (EXTRACT_get_vextract128_imm VR128:$ext)))>; 7712def : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)), 7713 (v8i16 (VEXTRACTF128rr 7714 (v16i16 VR256:$src1), 7715 (EXTRACT_get_vextract128_imm VR128:$ext)))>; 7716def : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)), 7717 (v16i8 (VEXTRACTF128rr 7718 (v32i8 VR256:$src1), 7719 (EXTRACT_get_vextract128_imm VR128:$ext)))>; 7720 7721def : Pat<(alignedstore (v2i64 (vextract128_extract:$ext (v4i64 VR256:$src1), 7722 (iPTR imm))), addr:$dst), 7723 (VEXTRACTF128mr addr:$dst, VR256:$src1, 7724 (EXTRACT_get_vextract128_imm VR128:$ext))>; 7725def : Pat<(alignedstore (v4i32 (vextract128_extract:$ext (v8i32 VR256:$src1), 7726 (iPTR imm))), addr:$dst), 7727 (VEXTRACTF128mr addr:$dst, VR256:$src1, 7728 (EXTRACT_get_vextract128_imm VR128:$ext))>; 7729def : Pat<(alignedstore (v8i16 (vextract128_extract:$ext (v16i16 VR256:$src1), 7730 (iPTR imm))), addr:$dst), 7731 (VEXTRACTF128mr addr:$dst, VR256:$src1, 7732 (EXTRACT_get_vextract128_imm VR128:$ext))>; 7733def : Pat<(alignedstore (v16i8 (vextract128_extract:$ext (v32i8 VR256:$src1), 7734 (iPTR imm))), addr:$dst), 7735 (VEXTRACTF128mr addr:$dst, VR256:$src1, 7736 (EXTRACT_get_vextract128_imm VR128:$ext))>; 7737} 7738 7739//===----------------------------------------------------------------------===// 7740// VMASKMOV - Conditional SIMD Packed Loads and Stores 7741// 7742multiclass avx_movmask_rm<bits<8> opc_rm, bits<8> opc_mr, string OpcodeStr, 7743 Intrinsic IntLd, Intrinsic IntLd256, 7744 Intrinsic IntSt, Intrinsic IntSt256> { 7745 def rm : AVX8I<opc_rm, MRMSrcMem, (outs VR128:$dst), 7746 (ins VR128:$src1, f128mem:$src2), 7747 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7748 [(set VR128:$dst, (IntLd addr:$src2, VR128:$src1))]>, 7749 VEX_4V; 7750 def Yrm : AVX8I<opc_rm, MRMSrcMem, (outs VR256:$dst), 7751 (ins VR256:$src1, f256mem:$src2), 7752 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7753 [(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))]>, 7754 VEX_4V, VEX_L; 7755 def mr : AVX8I<opc_mr, MRMDestMem, (outs), 7756 (ins f128mem:$dst, VR128:$src1, VR128:$src2), 7757 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7758 [(IntSt addr:$dst, VR128:$src1, VR128:$src2)]>, VEX_4V; 7759 def Ymr : AVX8I<opc_mr, MRMDestMem, (outs), 7760 (ins f256mem:$dst, VR256:$src1, VR256:$src2), 7761 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7762 [(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>, VEX_4V, VEX_L; 7763} 7764 7765let ExeDomain = SSEPackedSingle in 7766defm VMASKMOVPS : avx_movmask_rm<0x2C, 0x2E, "vmaskmovps", 7767 int_x86_avx_maskload_ps, 7768 int_x86_avx_maskload_ps_256, 7769 int_x86_avx_maskstore_ps, 7770 int_x86_avx_maskstore_ps_256>; 7771let ExeDomain = SSEPackedDouble in 7772defm VMASKMOVPD : avx_movmask_rm<0x2D, 0x2F, "vmaskmovpd", 7773 int_x86_avx_maskload_pd, 7774 int_x86_avx_maskload_pd_256, 7775 int_x86_avx_maskstore_pd, 7776 int_x86_avx_maskstore_pd_256>; 7777 7778//===----------------------------------------------------------------------===// 7779// VPERMIL - Permute Single and Double Floating-Point Values 7780// 7781multiclass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr, 7782 RegisterClass RC, X86MemOperand x86memop_f, 7783 X86MemOperand x86memop_i, PatFrag i_frag, 7784 Intrinsic IntVar, ValueType vt> { 7785 def rr : AVX8I<opc_rm, MRMSrcReg, (outs RC:$dst), 7786 (ins RC:$src1, RC:$src2), 7787 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7788 [(set RC:$dst, (IntVar RC:$src1, RC:$src2))]>, VEX_4V; 7789 def rm : AVX8I<opc_rm, MRMSrcMem, (outs RC:$dst), 7790 (ins RC:$src1, x86memop_i:$src2), 7791 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7792 [(set RC:$dst, (IntVar RC:$src1, 7793 (bitconvert (i_frag addr:$src2))))]>, VEX_4V; 7794 7795 def ri : AVXAIi8<opc_rmi, MRMSrcReg, (outs RC:$dst), 7796 (ins RC:$src1, i8imm:$src2), 7797 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7798 [(set RC:$dst, (vt (X86VPermilp RC:$src1, (i8 imm:$src2))))]>, VEX; 7799 def mi : AVXAIi8<opc_rmi, MRMSrcMem, (outs RC:$dst), 7800 (ins x86memop_f:$src1, i8imm:$src2), 7801 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7802 [(set RC:$dst, 7803 (vt (X86VPermilp (memop addr:$src1), (i8 imm:$src2))))]>, VEX; 7804} 7805 7806let ExeDomain = SSEPackedSingle in { 7807 defm VPERMILPS : avx_permil<0x0C, 0x04, "vpermilps", VR128, f128mem, i128mem, 7808 loadv2i64, int_x86_avx_vpermilvar_ps, v4f32>; 7809 defm VPERMILPSY : avx_permil<0x0C, 0x04, "vpermilps", VR256, f256mem, i256mem, 7810 loadv4i64, int_x86_avx_vpermilvar_ps_256, v8f32>, VEX_L; 7811} 7812let ExeDomain = SSEPackedDouble in { 7813 defm VPERMILPD : avx_permil<0x0D, 0x05, "vpermilpd", VR128, f128mem, i128mem, 7814 loadv2i64, int_x86_avx_vpermilvar_pd, v2f64>; 7815 defm VPERMILPDY : avx_permil<0x0D, 0x05, "vpermilpd", VR256, f256mem, i256mem, 7816 loadv4i64, int_x86_avx_vpermilvar_pd_256, v4f64>, VEX_L; 7817} 7818 7819let Predicates = [HasAVX] in { 7820def : Pat<(v8i32 (X86VPermilp VR256:$src1, (i8 imm:$imm))), 7821 (VPERMILPSYri VR256:$src1, imm:$imm)>; 7822def : Pat<(v4i64 (X86VPermilp VR256:$src1, (i8 imm:$imm))), 7823 (VPERMILPDYri VR256:$src1, imm:$imm)>; 7824def : Pat<(v8i32 (X86VPermilp (bc_v8i32 (loadv4i64 addr:$src1)), 7825 (i8 imm:$imm))), 7826 (VPERMILPSYmi addr:$src1, imm:$imm)>; 7827def : Pat<(v4i64 (X86VPermilp (loadv4i64 addr:$src1), (i8 imm:$imm))), 7828 (VPERMILPDYmi addr:$src1, imm:$imm)>; 7829 7830def : Pat<(v2i64 (X86VPermilp VR128:$src1, (i8 imm:$imm))), 7831 (VPERMILPDri VR128:$src1, imm:$imm)>; 7832def : Pat<(v2i64 (X86VPermilp (loadv2i64 addr:$src1), (i8 imm:$imm))), 7833 (VPERMILPDmi addr:$src1, imm:$imm)>; 7834} 7835 7836//===----------------------------------------------------------------------===// 7837// VPERM2F128 - Permute Floating-Point Values in 128-bit chunks 7838// 7839let ExeDomain = SSEPackedSingle in { 7840def VPERM2F128rr : AVXAIi8<0x06, MRMSrcReg, (outs VR256:$dst), 7841 (ins VR256:$src1, VR256:$src2, i8imm:$src3), 7842 "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 7843 [(set VR256:$dst, (v8f32 (X86VPerm2x128 VR256:$src1, VR256:$src2, 7844 (i8 imm:$src3))))]>, VEX_4V, VEX_L; 7845def VPERM2F128rm : AVXAIi8<0x06, MRMSrcMem, (outs VR256:$dst), 7846 (ins VR256:$src1, f256mem:$src2, i8imm:$src3), 7847 "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 7848 [(set VR256:$dst, (X86VPerm2x128 VR256:$src1, (loadv8f32 addr:$src2), 7849 (i8 imm:$src3)))]>, VEX_4V, VEX_L; 7850} 7851 7852let Predicates = [HasAVX] in { 7853def : Pat<(v4f64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), 7854 (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; 7855def : Pat<(v4f64 (X86VPerm2x128 VR256:$src1, 7856 (loadv4f64 addr:$src2), (i8 imm:$imm))), 7857 (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; 7858} 7859 7860let Predicates = [HasAVX1Only] in { 7861def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), 7862 (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; 7863def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), 7864 (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; 7865def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), 7866 (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; 7867def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), 7868 (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; 7869 7870def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, 7871 (bc_v8i32 (loadv4i64 addr:$src2)), (i8 imm:$imm))), 7872 (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; 7873def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1, 7874 (loadv4i64 addr:$src2), (i8 imm:$imm))), 7875 (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; 7876def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, 7877 (bc_v32i8 (loadv4i64 addr:$src2)), (i8 imm:$imm))), 7878 (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; 7879def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1, 7880 (bc_v16i16 (loadv4i64 addr:$src2)), (i8 imm:$imm))), 7881 (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; 7882} 7883 7884//===----------------------------------------------------------------------===// 7885// VZERO - Zero YMM registers 7886// 7887let Defs = [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7, 7888 YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15] in { 7889 // Zero All YMM registers 7890 def VZEROALL : I<0x77, RawFrm, (outs), (ins), "vzeroall", 7891 [(int_x86_avx_vzeroall)]>, TB, VEX, VEX_L, Requires<[HasAVX]>; 7892 7893 // Zero Upper bits of YMM registers 7894 def VZEROUPPER : I<0x77, RawFrm, (outs), (ins), "vzeroupper", 7895 [(int_x86_avx_vzeroupper)]>, TB, VEX, Requires<[HasAVX]>; 7896} 7897 7898//===----------------------------------------------------------------------===// 7899// Half precision conversion instructions 7900//===----------------------------------------------------------------------===// 7901multiclass f16c_ph2ps<RegisterClass RC, X86MemOperand x86memop, Intrinsic Int> { 7902 def rr : I<0x13, MRMSrcReg, (outs RC:$dst), (ins VR128:$src), 7903 "vcvtph2ps\t{$src, $dst|$dst, $src}", 7904 [(set RC:$dst, (Int VR128:$src))]>, 7905 T8, OpSize, VEX; 7906 let neverHasSideEffects = 1, mayLoad = 1 in 7907 def rm : I<0x13, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), 7908 "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, T8, OpSize, VEX; 7909} 7910 7911multiclass f16c_ps2ph<RegisterClass RC, X86MemOperand x86memop, Intrinsic Int> { 7912 def rr : Ii8<0x1D, MRMDestReg, (outs VR128:$dst), 7913 (ins RC:$src1, i32i8imm:$src2), 7914 "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", 7915 [(set VR128:$dst, (Int RC:$src1, imm:$src2))]>, 7916 TA, OpSize, VEX; 7917 let neverHasSideEffects = 1, mayStore = 1 in 7918 def mr : Ii8<0x1D, MRMDestMem, (outs), 7919 (ins x86memop:$dst, RC:$src1, i32i8imm:$src2), 7920 "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 7921 TA, OpSize, VEX; 7922} 7923 7924let Predicates = [HasF16C] in { 7925 defm VCVTPH2PS : f16c_ph2ps<VR128, f64mem, int_x86_vcvtph2ps_128>; 7926 defm VCVTPH2PSY : f16c_ph2ps<VR256, f128mem, int_x86_vcvtph2ps_256>, VEX_L; 7927 defm VCVTPS2PH : f16c_ps2ph<VR128, f64mem, int_x86_vcvtps2ph_128>; 7928 defm VCVTPS2PHY : f16c_ps2ph<VR256, f128mem, int_x86_vcvtps2ph_256>, VEX_L; 7929} 7930 7931//===----------------------------------------------------------------------===// 7932// AVX2 Instructions 7933//===----------------------------------------------------------------------===// 7934 7935/// AVX2_binop_rmi_int - AVX2 binary operator with 8-bit immediate 7936multiclass AVX2_binop_rmi_int<bits<8> opc, string OpcodeStr, 7937 Intrinsic IntId, RegisterClass RC, PatFrag memop_frag, 7938 X86MemOperand x86memop> { 7939 let isCommutable = 1 in 7940 def rri : AVX2AIi8<opc, MRMSrcReg, (outs RC:$dst), 7941 (ins RC:$src1, RC:$src2, u32u8imm:$src3), 7942 !strconcat(OpcodeStr, 7943 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 7944 [(set RC:$dst, (IntId RC:$src1, RC:$src2, imm:$src3))]>, 7945 VEX_4V; 7946 def rmi : AVX2AIi8<opc, MRMSrcMem, (outs RC:$dst), 7947 (ins RC:$src1, x86memop:$src2, u32u8imm:$src3), 7948 !strconcat(OpcodeStr, 7949 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 7950 [(set RC:$dst, 7951 (IntId RC:$src1, 7952 (bitconvert (memop_frag addr:$src2)), imm:$src3))]>, 7953 VEX_4V; 7954} 7955 7956let isCommutable = 0 in { 7957defm VPBLENDD : AVX2_binop_rmi_int<0x02, "vpblendd", int_x86_avx2_pblendd_128, 7958 VR128, loadv2i64, i128mem>; 7959defm VPBLENDDY : AVX2_binop_rmi_int<0x02, "vpblendd", int_x86_avx2_pblendd_256, 7960 VR256, loadv4i64, i256mem>, VEX_L; 7961} 7962 7963def : Pat<(v4i32 (X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2), 7964 imm:$mask)), 7965 (VPBLENDDrri VR128:$src1, VR128:$src2, imm:$mask)>; 7966def : Pat<(v8i32 (X86Blendi (v8i32 VR256:$src1), (v8i32 VR256:$src2), 7967 imm:$mask)), 7968 (VPBLENDDYrri VR256:$src1, VR256:$src2, imm:$mask)>; 7969 7970//===----------------------------------------------------------------------===// 7971// VPBROADCAST - Load from memory and broadcast to all elements of the 7972// destination operand 7973// 7974multiclass avx2_broadcast<bits<8> opc, string OpcodeStr, 7975 X86MemOperand x86memop, PatFrag ld_frag, 7976 Intrinsic Int128, Intrinsic Int256> { 7977 def rr : AVX28I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 7978 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 7979 [(set VR128:$dst, (Int128 VR128:$src))]>, VEX; 7980 def rm : AVX28I<opc, MRMSrcMem, (outs VR128:$dst), (ins x86memop:$src), 7981 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 7982 [(set VR128:$dst, 7983 (Int128 (scalar_to_vector (ld_frag addr:$src))))]>, VEX; 7984 def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), 7985 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 7986 [(set VR256:$dst, (Int256 VR128:$src))]>, VEX, VEX_L; 7987 def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst), (ins x86memop:$src), 7988 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 7989 [(set VR256:$dst, 7990 (Int256 (scalar_to_vector (ld_frag addr:$src))))]>, 7991 VEX, VEX_L; 7992} 7993 7994defm VPBROADCASTB : avx2_broadcast<0x78, "vpbroadcastb", i8mem, loadi8, 7995 int_x86_avx2_pbroadcastb_128, 7996 int_x86_avx2_pbroadcastb_256>; 7997defm VPBROADCASTW : avx2_broadcast<0x79, "vpbroadcastw", i16mem, loadi16, 7998 int_x86_avx2_pbroadcastw_128, 7999 int_x86_avx2_pbroadcastw_256>; 8000defm VPBROADCASTD : avx2_broadcast<0x58, "vpbroadcastd", i32mem, loadi32, 8001 int_x86_avx2_pbroadcastd_128, 8002 int_x86_avx2_pbroadcastd_256>; 8003defm VPBROADCASTQ : avx2_broadcast<0x59, "vpbroadcastq", i64mem, loadi64, 8004 int_x86_avx2_pbroadcastq_128, 8005 int_x86_avx2_pbroadcastq_256>; 8006 8007let Predicates = [HasAVX2] in { 8008 def : Pat<(v16i8 (X86VBroadcast (loadi8 addr:$src))), 8009 (VPBROADCASTBrm addr:$src)>; 8010 def : Pat<(v32i8 (X86VBroadcast (loadi8 addr:$src))), 8011 (VPBROADCASTBYrm addr:$src)>; 8012 def : Pat<(v8i16 (X86VBroadcast (loadi16 addr:$src))), 8013 (VPBROADCASTWrm addr:$src)>; 8014 def : Pat<(v16i16 (X86VBroadcast (loadi16 addr:$src))), 8015 (VPBROADCASTWYrm addr:$src)>; 8016 def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))), 8017 (VPBROADCASTDrm addr:$src)>; 8018 def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))), 8019 (VPBROADCASTDYrm addr:$src)>; 8020 def : Pat<(v2i64 (X86VBroadcast (loadi64 addr:$src))), 8021 (VPBROADCASTQrm addr:$src)>; 8022 def : Pat<(v4i64 (X86VBroadcast (loadi64 addr:$src))), 8023 (VPBROADCASTQYrm addr:$src)>; 8024 8025 def : Pat<(v16i8 (X86VBroadcast (v16i8 VR128:$src))), 8026 (VPBROADCASTBrr VR128:$src)>; 8027 def : Pat<(v32i8 (X86VBroadcast (v16i8 VR128:$src))), 8028 (VPBROADCASTBYrr VR128:$src)>; 8029 def : Pat<(v8i16 (X86VBroadcast (v8i16 VR128:$src))), 8030 (VPBROADCASTWrr VR128:$src)>; 8031 def : Pat<(v16i16 (X86VBroadcast (v8i16 VR128:$src))), 8032 (VPBROADCASTWYrr VR128:$src)>; 8033 def : Pat<(v4i32 (X86VBroadcast (v4i32 VR128:$src))), 8034 (VPBROADCASTDrr VR128:$src)>; 8035 def : Pat<(v8i32 (X86VBroadcast (v4i32 VR128:$src))), 8036 (VPBROADCASTDYrr VR128:$src)>; 8037 def : Pat<(v2i64 (X86VBroadcast (v2i64 VR128:$src))), 8038 (VPBROADCASTQrr VR128:$src)>; 8039 def : Pat<(v4i64 (X86VBroadcast (v2i64 VR128:$src))), 8040 (VPBROADCASTQYrr VR128:$src)>; 8041 def : Pat<(v4f32 (X86VBroadcast (v4f32 VR128:$src))), 8042 (VBROADCASTSSrr VR128:$src)>; 8043 def : Pat<(v8f32 (X86VBroadcast (v4f32 VR128:$src))), 8044 (VBROADCASTSSYrr VR128:$src)>; 8045 def : Pat<(v2f64 (X86VBroadcast (v2f64 VR128:$src))), 8046 (VPBROADCASTQrr VR128:$src)>; 8047 def : Pat<(v4f64 (X86VBroadcast (v2f64 VR128:$src))), 8048 (VBROADCASTSDYrr VR128:$src)>; 8049 8050 // Provide fallback in case the load node that is used in the patterns above 8051 // is used by additional users, which prevents the pattern selection. 8052 let AddedComplexity = 20 in { 8053 def : Pat<(v4f32 (X86VBroadcast FR32:$src)), 8054 (VBROADCASTSSrr (COPY_TO_REGCLASS FR32:$src, VR128))>; 8055 def : Pat<(v8f32 (X86VBroadcast FR32:$src)), 8056 (VBROADCASTSSYrr (COPY_TO_REGCLASS FR32:$src, VR128))>; 8057 def : Pat<(v4f64 (X86VBroadcast FR64:$src)), 8058 (VBROADCASTSDYrr (COPY_TO_REGCLASS FR64:$src, VR128))>; 8059 8060 def : Pat<(v4i32 (X86VBroadcast GR32:$src)), 8061 (VBROADCASTSSrr (COPY_TO_REGCLASS GR32:$src, VR128))>; 8062 def : Pat<(v8i32 (X86VBroadcast GR32:$src)), 8063 (VBROADCASTSSYrr (COPY_TO_REGCLASS GR32:$src, VR128))>; 8064 def : Pat<(v4i64 (X86VBroadcast GR64:$src)), 8065 (VBROADCASTSDYrr (COPY_TO_REGCLASS GR64:$src, VR128))>; 8066 } 8067} 8068 8069// AVX1 broadcast patterns 8070let Predicates = [HasAVX1Only] in { 8071def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))), 8072 (VBROADCASTSSYrm addr:$src)>; 8073def : Pat<(v4i64 (X86VBroadcast (loadi64 addr:$src))), 8074 (VBROADCASTSDYrm addr:$src)>; 8075def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))), 8076 (VBROADCASTSSrm addr:$src)>; 8077} 8078 8079let Predicates = [HasAVX] in { 8080def : Pat<(v8f32 (X86VBroadcast (loadf32 addr:$src))), 8081 (VBROADCASTSSYrm addr:$src)>; 8082def : Pat<(v4f64 (X86VBroadcast (loadf64 addr:$src))), 8083 (VBROADCASTSDYrm addr:$src)>; 8084def : Pat<(v4f32 (X86VBroadcast (loadf32 addr:$src))), 8085 (VBROADCASTSSrm addr:$src)>; 8086 8087 // Provide fallback in case the load node that is used in the patterns above 8088 // is used by additional users, which prevents the pattern selection. 8089 let AddedComplexity = 20 in { 8090 // 128bit broadcasts: 8091 def : Pat<(v4f32 (X86VBroadcast FR32:$src)), 8092 (VPSHUFDri (COPY_TO_REGCLASS FR32:$src, VR128), 0)>; 8093 def : Pat<(v8f32 (X86VBroadcast FR32:$src)), 8094 (VINSERTF128rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), 8095 (VPSHUFDri (COPY_TO_REGCLASS FR32:$src, VR128), 0), sub_xmm), 8096 (VPSHUFDri (COPY_TO_REGCLASS FR32:$src, VR128), 0), 1)>; 8097 def : Pat<(v4f64 (X86VBroadcast FR64:$src)), 8098 (VINSERTF128rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), 8099 (VPSHUFDri (COPY_TO_REGCLASS FR64:$src, VR128), 0x44), sub_xmm), 8100 (VPSHUFDri (COPY_TO_REGCLASS FR64:$src, VR128), 0x44), 1)>; 8101 8102 def : Pat<(v4i32 (X86VBroadcast GR32:$src)), 8103 (VPSHUFDri (COPY_TO_REGCLASS GR32:$src, VR128), 0)>; 8104 def : Pat<(v8i32 (X86VBroadcast GR32:$src)), 8105 (VINSERTF128rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), 8106 (VPSHUFDri (COPY_TO_REGCLASS GR32:$src, VR128), 0), sub_xmm), 8107 (VPSHUFDri (COPY_TO_REGCLASS GR32:$src, VR128), 0), 1)>; 8108 def : Pat<(v4i64 (X86VBroadcast GR64:$src)), 8109 (VINSERTF128rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), 8110 (VPSHUFDri (COPY_TO_REGCLASS GR64:$src, VR128), 0x44), sub_xmm), 8111 (VPSHUFDri (COPY_TO_REGCLASS GR64:$src, VR128), 0x44), 1)>; 8112 } 8113} 8114 8115//===----------------------------------------------------------------------===// 8116// VPERM - Permute instructions 8117// 8118 8119multiclass avx2_perm<bits<8> opc, string OpcodeStr, PatFrag mem_frag, 8120 ValueType OpVT> { 8121 def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst), 8122 (ins VR256:$src1, VR256:$src2), 8123 !strconcat(OpcodeStr, 8124 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 8125 [(set VR256:$dst, 8126 (OpVT (X86VPermv VR256:$src1, VR256:$src2)))]>, 8127 VEX_4V, VEX_L; 8128 def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst), 8129 (ins VR256:$src1, i256mem:$src2), 8130 !strconcat(OpcodeStr, 8131 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 8132 [(set VR256:$dst, 8133 (OpVT (X86VPermv VR256:$src1, 8134 (bitconvert (mem_frag addr:$src2)))))]>, 8135 VEX_4V, VEX_L; 8136} 8137 8138defm VPERMD : avx2_perm<0x36, "vpermd", loadv4i64, v8i32>; 8139let ExeDomain = SSEPackedSingle in 8140defm VPERMPS : avx2_perm<0x16, "vpermps", loadv8f32, v8f32>; 8141 8142multiclass avx2_perm_imm<bits<8> opc, string OpcodeStr, PatFrag mem_frag, 8143 ValueType OpVT> { 8144 def Yri : AVX2AIi8<opc, MRMSrcReg, (outs VR256:$dst), 8145 (ins VR256:$src1, i8imm:$src2), 8146 !strconcat(OpcodeStr, 8147 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 8148 [(set VR256:$dst, 8149 (OpVT (X86VPermi VR256:$src1, (i8 imm:$src2))))]>, 8150 VEX, VEX_L; 8151 def Ymi : AVX2AIi8<opc, MRMSrcMem, (outs VR256:$dst), 8152 (ins i256mem:$src1, i8imm:$src2), 8153 !strconcat(OpcodeStr, 8154 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 8155 [(set VR256:$dst, 8156 (OpVT (X86VPermi (mem_frag addr:$src1), 8157 (i8 imm:$src2))))]>, VEX, VEX_L; 8158} 8159 8160defm VPERMQ : avx2_perm_imm<0x00, "vpermq", loadv4i64, v4i64>, VEX_W; 8161let ExeDomain = SSEPackedDouble in 8162defm VPERMPD : avx2_perm_imm<0x01, "vpermpd", loadv4f64, v4f64>, VEX_W; 8163 8164//===----------------------------------------------------------------------===// 8165// VPERM2I128 - Permute Floating-Point Values in 128-bit chunks 8166// 8167def VPERM2I128rr : AVX2AIi8<0x46, MRMSrcReg, (outs VR256:$dst), 8168 (ins VR256:$src1, VR256:$src2, i8imm:$src3), 8169 "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 8170 [(set VR256:$dst, (v4i64 (X86VPerm2x128 VR256:$src1, VR256:$src2, 8171 (i8 imm:$src3))))]>, VEX_4V, VEX_L; 8172def VPERM2I128rm : AVX2AIi8<0x46, MRMSrcMem, (outs VR256:$dst), 8173 (ins VR256:$src1, f256mem:$src2, i8imm:$src3), 8174 "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 8175 [(set VR256:$dst, (X86VPerm2x128 VR256:$src1, (loadv4i64 addr:$src2), 8176 (i8 imm:$src3)))]>, VEX_4V, VEX_L; 8177 8178let Predicates = [HasAVX2] in { 8179def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), 8180 (VPERM2I128rr VR256:$src1, VR256:$src2, imm:$imm)>; 8181def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), 8182 (VPERM2I128rr VR256:$src1, VR256:$src2, imm:$imm)>; 8183def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), 8184 (VPERM2I128rr VR256:$src1, VR256:$src2, imm:$imm)>; 8185 8186def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, (bc_v32i8 (loadv4i64 addr:$src2)), 8187 (i8 imm:$imm))), 8188 (VPERM2I128rm VR256:$src1, addr:$src2, imm:$imm)>; 8189def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1, 8190 (bc_v16i16 (loadv4i64 addr:$src2)), (i8 imm:$imm))), 8191 (VPERM2I128rm VR256:$src1, addr:$src2, imm:$imm)>; 8192def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, (bc_v8i32 (loadv4i64 addr:$src2)), 8193 (i8 imm:$imm))), 8194 (VPERM2I128rm VR256:$src1, addr:$src2, imm:$imm)>; 8195} 8196 8197 8198//===----------------------------------------------------------------------===// 8199// VINSERTI128 - Insert packed integer values 8200// 8201let neverHasSideEffects = 1 in { 8202def VINSERTI128rr : AVX2AIi8<0x38, MRMSrcReg, (outs VR256:$dst), 8203 (ins VR256:$src1, VR128:$src2, i8imm:$src3), 8204 "vinserti128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 8205 []>, VEX_4V, VEX_L; 8206let mayLoad = 1 in 8207def VINSERTI128rm : AVX2AIi8<0x38, MRMSrcMem, (outs VR256:$dst), 8208 (ins VR256:$src1, i128mem:$src2, i8imm:$src3), 8209 "vinserti128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 8210 []>, VEX_4V, VEX_L; 8211} 8212 8213let Predicates = [HasAVX2] in { 8214def : Pat<(vinsert128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2), 8215 (iPTR imm)), 8216 (VINSERTI128rr VR256:$src1, VR128:$src2, 8217 (INSERT_get_vinsert128_imm VR256:$ins))>; 8218def : Pat<(vinsert128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2), 8219 (iPTR imm)), 8220 (VINSERTI128rr VR256:$src1, VR128:$src2, 8221 (INSERT_get_vinsert128_imm VR256:$ins))>; 8222def : Pat<(vinsert128_insert:$ins (v32i8 VR256:$src1), (v16i8 VR128:$src2), 8223 (iPTR imm)), 8224 (VINSERTI128rr VR256:$src1, VR128:$src2, 8225 (INSERT_get_vinsert128_imm VR256:$ins))>; 8226def : Pat<(vinsert128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2), 8227 (iPTR imm)), 8228 (VINSERTI128rr VR256:$src1, VR128:$src2, 8229 (INSERT_get_vinsert128_imm VR256:$ins))>; 8230 8231def : Pat<(vinsert128_insert:$ins (v4i64 VR256:$src1), (loadv2i64 addr:$src2), 8232 (iPTR imm)), 8233 (VINSERTI128rm VR256:$src1, addr:$src2, 8234 (INSERT_get_vinsert128_imm VR256:$ins))>; 8235def : Pat<(vinsert128_insert:$ins (v8i32 VR256:$src1), 8236 (bc_v4i32 (loadv2i64 addr:$src2)), 8237 (iPTR imm)), 8238 (VINSERTI128rm VR256:$src1, addr:$src2, 8239 (INSERT_get_vinsert128_imm VR256:$ins))>; 8240def : Pat<(vinsert128_insert:$ins (v32i8 VR256:$src1), 8241 (bc_v16i8 (loadv2i64 addr:$src2)), 8242 (iPTR imm)), 8243 (VINSERTI128rm VR256:$src1, addr:$src2, 8244 (INSERT_get_vinsert128_imm VR256:$ins))>; 8245def : Pat<(vinsert128_insert:$ins (v16i16 VR256:$src1), 8246 (bc_v8i16 (loadv2i64 addr:$src2)), 8247 (iPTR imm)), 8248 (VINSERTI128rm VR256:$src1, addr:$src2, 8249 (INSERT_get_vinsert128_imm VR256:$ins))>; 8250} 8251 8252//===----------------------------------------------------------------------===// 8253// VEXTRACTI128 - Extract packed integer values 8254// 8255def VEXTRACTI128rr : AVX2AIi8<0x39, MRMDestReg, (outs VR128:$dst), 8256 (ins VR256:$src1, i8imm:$src2), 8257 "vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}", 8258 [(set VR128:$dst, 8259 (int_x86_avx2_vextracti128 VR256:$src1, imm:$src2))]>, 8260 VEX, VEX_L; 8261let neverHasSideEffects = 1, mayStore = 1 in 8262def VEXTRACTI128mr : AVX2AIi8<0x39, MRMDestMem, (outs), 8263 (ins i128mem:$dst, VR256:$src1, i8imm:$src2), 8264 "vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 8265 VEX, VEX_L; 8266 8267let Predicates = [HasAVX2] in { 8268def : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)), 8269 (v2i64 (VEXTRACTI128rr 8270 (v4i64 VR256:$src1), 8271 (EXTRACT_get_vextract128_imm VR128:$ext)))>; 8272def : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)), 8273 (v4i32 (VEXTRACTI128rr 8274 (v8i32 VR256:$src1), 8275 (EXTRACT_get_vextract128_imm VR128:$ext)))>; 8276def : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)), 8277 (v8i16 (VEXTRACTI128rr 8278 (v16i16 VR256:$src1), 8279 (EXTRACT_get_vextract128_imm VR128:$ext)))>; 8280def : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)), 8281 (v16i8 (VEXTRACTI128rr 8282 (v32i8 VR256:$src1), 8283 (EXTRACT_get_vextract128_imm VR128:$ext)))>; 8284 8285def : Pat<(store (v2i64 (vextract128_extract:$ext (v4i64 VR256:$src1), 8286 (iPTR imm))), addr:$dst), 8287 (VEXTRACTI128mr addr:$dst, VR256:$src1, 8288 (EXTRACT_get_vextract128_imm VR128:$ext))>; 8289def : Pat<(store (v4i32 (vextract128_extract:$ext (v8i32 VR256:$src1), 8290 (iPTR imm))), addr:$dst), 8291 (VEXTRACTI128mr addr:$dst, VR256:$src1, 8292 (EXTRACT_get_vextract128_imm VR128:$ext))>; 8293def : Pat<(store (v8i16 (vextract128_extract:$ext (v16i16 VR256:$src1), 8294 (iPTR imm))), addr:$dst), 8295 (VEXTRACTI128mr addr:$dst, VR256:$src1, 8296 (EXTRACT_get_vextract128_imm VR128:$ext))>; 8297def : Pat<(store (v16i8 (vextract128_extract:$ext (v32i8 VR256:$src1), 8298 (iPTR imm))), addr:$dst), 8299 (VEXTRACTI128mr addr:$dst, VR256:$src1, 8300 (EXTRACT_get_vextract128_imm VR128:$ext))>; 8301} 8302 8303//===----------------------------------------------------------------------===// 8304// VPMASKMOV - Conditional SIMD Integer Packed Loads and Stores 8305// 8306multiclass avx2_pmovmask<string OpcodeStr, 8307 Intrinsic IntLd128, Intrinsic IntLd256, 8308 Intrinsic IntSt128, Intrinsic IntSt256> { 8309 def rm : AVX28I<0x8c, MRMSrcMem, (outs VR128:$dst), 8310 (ins VR128:$src1, i128mem:$src2), 8311 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 8312 [(set VR128:$dst, (IntLd128 addr:$src2, VR128:$src1))]>, VEX_4V; 8313 def Yrm : AVX28I<0x8c, MRMSrcMem, (outs VR256:$dst), 8314 (ins VR256:$src1, i256mem:$src2), 8315 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 8316 [(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))]>, 8317 VEX_4V, VEX_L; 8318 def mr : AVX28I<0x8e, MRMDestMem, (outs), 8319 (ins i128mem:$dst, VR128:$src1, VR128:$src2), 8320 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 8321 [(IntSt128 addr:$dst, VR128:$src1, VR128:$src2)]>, VEX_4V; 8322 def Ymr : AVX28I<0x8e, MRMDestMem, (outs), 8323 (ins i256mem:$dst, VR256:$src1, VR256:$src2), 8324 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 8325 [(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>, VEX_4V, VEX_L; 8326} 8327 8328defm VPMASKMOVD : avx2_pmovmask<"vpmaskmovd", 8329 int_x86_avx2_maskload_d, 8330 int_x86_avx2_maskload_d_256, 8331 int_x86_avx2_maskstore_d, 8332 int_x86_avx2_maskstore_d_256>; 8333defm VPMASKMOVQ : avx2_pmovmask<"vpmaskmovq", 8334 int_x86_avx2_maskload_q, 8335 int_x86_avx2_maskload_q_256, 8336 int_x86_avx2_maskstore_q, 8337 int_x86_avx2_maskstore_q_256>, VEX_W; 8338 8339 8340//===----------------------------------------------------------------------===// 8341// Variable Bit Shifts 8342// 8343multiclass avx2_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode, 8344 ValueType vt128, ValueType vt256> { 8345 def rr : AVX28I<opc, MRMSrcReg, (outs VR128:$dst), 8346 (ins VR128:$src1, VR128:$src2), 8347 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 8348 [(set VR128:$dst, 8349 (vt128 (OpNode VR128:$src1, (vt128 VR128:$src2))))]>, 8350 VEX_4V; 8351 def rm : AVX28I<opc, MRMSrcMem, (outs VR128:$dst), 8352 (ins VR128:$src1, i128mem:$src2), 8353 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 8354 [(set VR128:$dst, 8355 (vt128 (OpNode VR128:$src1, 8356 (vt128 (bitconvert (loadv2i64 addr:$src2))))))]>, 8357 VEX_4V; 8358 def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst), 8359 (ins VR256:$src1, VR256:$src2), 8360 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 8361 [(set VR256:$dst, 8362 (vt256 (OpNode VR256:$src1, (vt256 VR256:$src2))))]>, 8363 VEX_4V, VEX_L; 8364 def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst), 8365 (ins VR256:$src1, i256mem:$src2), 8366 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 8367 [(set VR256:$dst, 8368 (vt256 (OpNode VR256:$src1, 8369 (vt256 (bitconvert (loadv4i64 addr:$src2))))))]>, 8370 VEX_4V, VEX_L; 8371} 8372 8373defm VPSLLVD : avx2_var_shift<0x47, "vpsllvd", shl, v4i32, v8i32>; 8374defm VPSLLVQ : avx2_var_shift<0x47, "vpsllvq", shl, v2i64, v4i64>, VEX_W; 8375defm VPSRLVD : avx2_var_shift<0x45, "vpsrlvd", srl, v4i32, v8i32>; 8376defm VPSRLVQ : avx2_var_shift<0x45, "vpsrlvq", srl, v2i64, v4i64>, VEX_W; 8377defm VPSRAVD : avx2_var_shift<0x46, "vpsravd", sra, v4i32, v8i32>; 8378 8379//===----------------------------------------------------------------------===// 8380// VGATHER - GATHER Operations 8381multiclass avx2_gather<bits<8> opc, string OpcodeStr, RegisterClass RC256, 8382 X86MemOperand memop128, X86MemOperand memop256> { 8383 def rm : AVX28I<opc, MRMSrcMem, (outs VR128:$dst, VR128:$mask_wb), 8384 (ins VR128:$src1, memop128:$src2, VR128:$mask), 8385 !strconcat(OpcodeStr, 8386 "\t{$mask, $src2, $dst|$dst, $src2, $mask}"), 8387 []>, VEX_4VOp3; 8388 def Yrm : AVX28I<opc, MRMSrcMem, (outs RC256:$dst, RC256:$mask_wb), 8389 (ins RC256:$src1, memop256:$src2, RC256:$mask), 8390 !strconcat(OpcodeStr, 8391 "\t{$mask, $src2, $dst|$dst, $src2, $mask}"), 8392 []>, VEX_4VOp3, VEX_L; 8393} 8394 8395let mayLoad = 1, Constraints 8396 = "@earlyclobber $dst,@earlyclobber $mask_wb, $src1 = $dst, $mask = $mask_wb" 8397 in { 8398 defm VGATHERDPD : avx2_gather<0x92, "vgatherdpd", VR256, vx64mem, vx64mem>, VEX_W; 8399 defm VGATHERQPD : avx2_gather<0x93, "vgatherqpd", VR256, vx64mem, vy64mem>, VEX_W; 8400 defm VGATHERDPS : avx2_gather<0x92, "vgatherdps", VR256, vx32mem, vy32mem>; 8401 defm VGATHERQPS : avx2_gather<0x93, "vgatherqps", VR128, vx32mem, vy32mem>; 8402 defm VPGATHERDQ : avx2_gather<0x90, "vpgatherdq", VR256, vx64mem, vx64mem>, VEX_W; 8403 defm VPGATHERQQ : avx2_gather<0x91, "vpgatherqq", VR256, vx64mem, vy64mem>, VEX_W; 8404 defm VPGATHERDD : avx2_gather<0x90, "vpgatherdd", VR256, vx32mem, vy32mem>; 8405 defm VPGATHERQD : avx2_gather<0x91, "vpgatherqd", VR128, vx32mem, vy32mem>; 8406} 8407