1239310Sdim//===- NVPTXVector.td - NVPTX Vector Specific Instruction defs -*- tblgen-*-==// 2239310Sdim// 3239310Sdim// The LLVM Compiler Infrastructure 4239310Sdim// 5239310Sdim// This file is distributed under the University of Illinois Open Source 6239310Sdim// License. See LICENSE.TXT for details. 7239310Sdim// 8239310Sdim//===----------------------------------------------------------------------===// 9239310Sdim 10239310Sdim//----------------------------------- 11239310Sdim// Vector Specific 12239310Sdim//----------------------------------- 13239310Sdim 14239310Sdim// 15239310Sdim// All vector instructions derive from NVPTXVecInst 16239310Sdim// 17239310Sdim 18239310Sdimclass NVPTXVecInst<dag outs, dag ins, string asmstr, list<dag> pattern, 19239310Sdim NVPTXInst sInst=NOP> 20239310Sdim : NVPTXInst<outs, ins, asmstr, pattern> { 21239310Sdim NVPTXInst scalarInst=sInst; 22239310Sdim} 23239310Sdim 24239310Sdimlet isAsCheapAsAMove=1, VecInstType=isVecExtract.Value in { 25239310Sdim// Extract v2i16 26239310Sdimdef V2i16Extract : NVPTXVecInst<(outs Int16Regs:$dst), 27239310Sdim (ins V2I16Regs:$src, i8imm:$c), 28239310Sdim "mov.u16 \t$dst, $src${c:vecelem};", 29239310Sdim [(set Int16Regs:$dst, (vector_extract 30239310Sdim (v2i16 V2I16Regs:$src), imm:$c))], 31239310Sdim IMOV16rr>; 32239310Sdim 33239310Sdim// Extract v4i16 34239310Sdimdef V4i16Extract : NVPTXVecInst<(outs Int16Regs:$dst), 35239310Sdim (ins V4I16Regs:$src, i8imm:$c), 36239310Sdim "mov.u16 \t$dst, $src${c:vecelem};", 37239310Sdim [(set Int16Regs:$dst, (vector_extract 38239310Sdim (v4i16 V4I16Regs:$src), imm:$c))], 39239310Sdim IMOV16rr>; 40239310Sdim 41239310Sdim// Extract v2i8 42239310Sdimdef V2i8Extract : NVPTXVecInst<(outs Int8Regs:$dst), 43239310Sdim (ins V2I8Regs:$src, i8imm:$c), 44239310Sdim "mov.u16 \t$dst, $src${c:vecelem};", 45239310Sdim [(set Int8Regs:$dst, (vector_extract 46239310Sdim (v2i8 V2I8Regs:$src), imm:$c))], 47239310Sdim IMOV8rr>; 48239310Sdim 49239310Sdim// Extract v4i8 50239310Sdimdef V4i8Extract : NVPTXVecInst<(outs Int8Regs:$dst), 51239310Sdim (ins V4I8Regs:$src, i8imm:$c), 52239310Sdim "mov.u16 \t$dst, $src${c:vecelem};", 53239310Sdim [(set Int8Regs:$dst, (vector_extract 54239310Sdim (v4i8 V4I8Regs:$src), imm:$c))], 55239310Sdim IMOV8rr>; 56239310Sdim 57239310Sdim// Extract v2i32 58239310Sdimdef V2i32Extract : NVPTXVecInst<(outs Int32Regs:$dst), 59239310Sdim (ins V2I32Regs:$src, i8imm:$c), 60239310Sdim "mov.u32 \t$dst, $src${c:vecelem};", 61239310Sdim [(set Int32Regs:$dst, (vector_extract 62239310Sdim (v2i32 V2I32Regs:$src), imm:$c))], 63239310Sdim IMOV32rr>; 64239310Sdim 65239310Sdim// Extract v2f32 66239310Sdimdef V2f32Extract : NVPTXVecInst<(outs Float32Regs:$dst), 67239310Sdim (ins V2F32Regs:$src, i8imm:$c), 68239310Sdim "mov.f32 \t$dst, $src${c:vecelem};", 69239310Sdim [(set Float32Regs:$dst, (vector_extract 70239310Sdim (v2f32 V2F32Regs:$src), imm:$c))], 71239310Sdim FMOV32rr>; 72239310Sdim 73239310Sdim// Extract v2i64 74239310Sdimdef V2i64Extract : NVPTXVecInst<(outs Int64Regs:$dst), 75239310Sdim (ins V2I64Regs:$src, i8imm:$c), 76239310Sdim "mov.u64 \t$dst, $src${c:vecelem};", 77239310Sdim [(set Int64Regs:$dst, (vector_extract 78239310Sdim (v2i64 V2I64Regs:$src), imm:$c))], 79239310Sdim IMOV64rr>; 80239310Sdim 81239310Sdim// Extract v2f64 82239310Sdimdef V2f64Extract : NVPTXVecInst<(outs Float64Regs:$dst), 83239310Sdim (ins V2F64Regs:$src, i8imm:$c), 84239310Sdim "mov.f64 \t$dst, $src${c:vecelem};", 85239310Sdim [(set Float64Regs:$dst, (vector_extract 86239310Sdim (v2f64 V2F64Regs:$src), imm:$c))], 87239310Sdim FMOV64rr>; 88239310Sdim 89239310Sdim// Extract v4i32 90239310Sdimdef V4i32Extract : NVPTXVecInst<(outs Int32Regs:$dst), 91239310Sdim (ins V4I32Regs:$src, i8imm:$c), 92239310Sdim "mov.u32 \t$dst, $src${c:vecelem};", 93239310Sdim [(set Int32Regs:$dst, (vector_extract 94239310Sdim (v4i32 V4I32Regs:$src), imm:$c))], 95239310Sdim IMOV32rr>; 96239310Sdim 97239310Sdim// Extract v4f32 98239310Sdimdef V4f32Extract : NVPTXVecInst<(outs Float32Regs:$dst), 99239310Sdim (ins V4F32Regs:$src, i8imm:$c), 100239310Sdim "mov.f32 \t$dst, $src${c:vecelem};", 101239310Sdim [(set Float32Regs:$dst, (vector_extract 102239310Sdim (v4f32 V4F32Regs:$src), imm:$c))], 103239310Sdim FMOV32rr>; 104239310Sdim} 105239310Sdim 106239310Sdimlet isAsCheapAsAMove=1, VecInstType=isVecInsert.Value in { 107239310Sdim// Insert v2i8 108239310Sdimdef V2i8Insert : NVPTXVecInst<(outs V2I8Regs:$dst), 109239310Sdim (ins V2I8Regs:$src, Int8Regs:$val, i8imm:$c), 110239310Sdim "mov.v2.u16 \t${dst:vecfull}, ${src:vecfull};" 111239310Sdim "\n\tmov.u16 \t$dst${c:vecelem}, $val;", 112239310Sdim [(set V2I8Regs:$dst, 113239310Sdim (vector_insert V2I8Regs:$src, Int8Regs:$val, imm:$c))], 114239310Sdim IMOV8rr>; 115239310Sdim 116239310Sdim// Insert v4i8 117239310Sdimdef V4i8Insert : NVPTXVecInst<(outs V4I8Regs:$dst), 118239310Sdim (ins V4I8Regs:$src, Int8Regs:$val, i8imm:$c), 119239310Sdim "mov.v4.u16 \t${dst:vecfull}, ${src:vecfull};" 120239310Sdim "\n\tmov.u16 \t$dst${c:vecelem}, $val;", 121239310Sdim [(set V4I8Regs:$dst, 122239310Sdim (vector_insert V4I8Regs:$src, Int8Regs:$val, imm:$c))], 123239310Sdim IMOV8rr>; 124239310Sdim 125239310Sdim// Insert v2i16 126239310Sdimdef V2i16Insert : NVPTXVecInst<(outs V2I16Regs:$dst), 127239310Sdim (ins V2I16Regs:$src, Int16Regs:$val, i8imm:$c), 128239310Sdim "mov.v2.u16 \t${dst:vecfull}, ${src:vecfull};" 129239310Sdim "\n\tmov.u16 \t$dst${c:vecelem}, $val;", 130239310Sdim [(set V2I16Regs:$dst, 131239310Sdim (vector_insert V2I16Regs:$src, Int16Regs:$val, imm:$c))], 132239310Sdim IMOV16rr>; 133239310Sdim 134239310Sdim// Insert v4i16 135239310Sdimdef V4i16Insert : NVPTXVecInst<(outs V4I16Regs:$dst), 136239310Sdim (ins V4I16Regs:$src, Int16Regs:$val, i8imm:$c), 137239310Sdim "mov.v4.u16 \t${dst:vecfull}, ${src:vecfull};" 138239310Sdim "\n\tmov.u16 \t$dst${c:vecelem}, $val;", 139239310Sdim [(set V4I16Regs:$dst, 140239310Sdim (vector_insert V4I16Regs:$src, Int16Regs:$val, imm:$c))], 141239310Sdim IMOV16rr>; 142239310Sdim 143239310Sdim// Insert v2i32 144239310Sdimdef V2i32Insert : NVPTXVecInst<(outs V2I32Regs:$dst), 145239310Sdim (ins V2I32Regs:$src, Int32Regs:$val, i8imm:$c), 146239310Sdim "mov.v2.u32 \t${dst:vecfull}, ${src:vecfull};" 147239310Sdim "\n\tmov.u32 \t$dst${c:vecelem}, $val;", 148239310Sdim [(set V2I32Regs:$dst, 149239310Sdim (vector_insert V2I32Regs:$src, Int32Regs:$val, imm:$c))], 150239310Sdim IMOV32rr>; 151239310Sdim 152239310Sdim// Insert v2f32 153239310Sdimdef V2f32Insert : NVPTXVecInst<(outs V2F32Regs:$dst), 154239310Sdim (ins V2F32Regs:$src, Float32Regs:$val, i8imm:$c), 155239310Sdim "mov.v2.f32 \t${dst:vecfull}, ${src:vecfull};" 156239310Sdim "\n\tmov.f32 \t$dst${c:vecelem}, $val;", 157239310Sdim [(set V2F32Regs:$dst, 158239310Sdim (vector_insert V2F32Regs:$src, Float32Regs:$val, imm:$c))], 159239310Sdim FMOV32rr>; 160239310Sdim 161239310Sdim// Insert v2i64 162239310Sdimdef V2i64Insert : NVPTXVecInst<(outs V2I64Regs:$dst), 163239310Sdim (ins V2I64Regs:$src, Int64Regs:$val, i8imm:$c), 164239310Sdim "mov.v2.u64 \t${dst:vecfull}, ${src:vecfull};" 165239310Sdim "\n\tmov.u64 \t$dst${c:vecelem}, $val;", 166239310Sdim [(set V2I64Regs:$dst, 167239310Sdim (vector_insert V2I64Regs:$src, Int64Regs:$val, imm:$c))], 168239310Sdim IMOV64rr>; 169239310Sdim 170239310Sdim// Insert v2f64 171239310Sdimdef V2f64Insert : NVPTXVecInst<(outs V2F64Regs:$dst), 172239310Sdim (ins V2F64Regs:$src, Float64Regs:$val, i8imm:$c), 173239310Sdim "mov.v2.f64 \t${dst:vecfull}, ${src:vecfull};" 174239310Sdim "\n\tmov.f64 \t$dst${c:vecelem}, $val;", 175239310Sdim [(set V2F64Regs:$dst, 176239310Sdim (vector_insert V2F64Regs:$src, Float64Regs:$val, imm:$c))], 177239310Sdim FMOV64rr>; 178239310Sdim 179239310Sdim// Insert v4i32 180239310Sdimdef V4i32Insert : NVPTXVecInst<(outs V4I32Regs:$dst), 181239310Sdim (ins V4I32Regs:$src, Int32Regs:$val, i8imm:$c), 182239310Sdim "mov.v4.u32 \t${dst:vecfull}, ${src:vecfull};" 183239310Sdim "\n\tmov.u32 \t$dst${c:vecelem}, $val;", 184239310Sdim [(set V4I32Regs:$dst, 185239310Sdim (vector_insert V4I32Regs:$src, Int32Regs:$val, imm:$c))], 186239310Sdim IMOV32rr>; 187239310Sdim 188239310Sdim// Insert v4f32 189239310Sdimdef V4f32Insert : NVPTXVecInst<(outs V4F32Regs:$dst), 190239310Sdim (ins V4F32Regs:$src, Float32Regs:$val, i8imm:$c), 191239310Sdim "mov.v4.f32 \t${dst:vecfull}, ${src:vecfull};" 192239310Sdim "\n\tmov.f32 \t$dst${c:vecelem}, $val;", 193239310Sdim [(set V4F32Regs:$dst, 194239310Sdim (vector_insert V4F32Regs:$src, Float32Regs:$val, imm:$c))], 195239310Sdim FMOV32rr>; 196239310Sdim} 197239310Sdim 198239310Sdimclass BinOpAsmString<string c> { 199239310Sdim string s = c; 200239310Sdim} 201239310Sdim 202239310Sdimclass V4AsmStr<string opcode> : BinOpAsmString< 203239310Sdim !strconcat(!strconcat(!strconcat(!strconcat( 204239310Sdim !strconcat(!strconcat(!strconcat( 205239310Sdim opcode, " \t${dst}_0, ${a}_0, ${b}_0;\n\t"), 206239310Sdim opcode), " \t${dst}_1, ${a}_1, ${b}_1;\n\t"), 207239310Sdim opcode), " \t${dst}_2, ${a}_2, ${b}_2;\n\t"), 208239310Sdim opcode), " \t${dst}_3, ${a}_3, ${b}_3;")>; 209239310Sdim 210239310Sdimclass V2AsmStr<string opcode> : BinOpAsmString< 211239310Sdim !strconcat(!strconcat(!strconcat( 212239310Sdim opcode, " \t${dst}_0, ${a}_0, ${b}_0;\n\t"), 213239310Sdim opcode), " \t${dst}_1, ${a}_1, ${b}_1;")>; 214239310Sdim 215239310Sdimclass V4MADStr<string opcode> : BinOpAsmString< 216239310Sdim !strconcat(!strconcat(!strconcat(!strconcat( 217239310Sdim !strconcat(!strconcat(!strconcat( 218239310Sdim opcode, " \t${dst}_0, ${a}_0, ${b}_0, ${c}_0;\n\t"), 219239310Sdim opcode), " \t${dst}_1, ${a}_1, ${b}_1, ${c}_1;\n\t"), 220239310Sdim opcode), " \t${dst}_2, ${a}_2, ${b}_2, ${c}_2;\n\t"), 221239310Sdim opcode), " \t${dst}_3, ${a}_3, ${b}_3, ${c}_3;")>; 222239310Sdim 223239310Sdimclass V2MADStr<string opcode> : BinOpAsmString< 224239310Sdim !strconcat(!strconcat(!strconcat( 225239310Sdim opcode, " \t${dst}_0, ${a}_0, ${b}_0, ${c}_0;\n\t"), 226239310Sdim opcode), " \t${dst}_1, ${a}_1, ${b}_1, ${c}_1;")>; 227239310Sdim 228239310Sdimclass V4UnaryStr<string opcode> : BinOpAsmString< 229239310Sdim !strconcat(!strconcat(!strconcat(!strconcat( 230239310Sdim !strconcat(!strconcat(!strconcat( 231239310Sdim opcode, " \t${dst}_0, ${a}_0;\n\t"), 232239310Sdim opcode), " \t${dst}_1, ${a}_1;\n\t"), 233239310Sdim opcode), " \t${dst}_2, ${a}_2;\n\t"), 234239310Sdim opcode), " \t${dst}_3, ${a}_3;")>; 235239310Sdim 236239310Sdimclass V2UnaryStr<string opcode> : BinOpAsmString< 237239310Sdim !strconcat(!strconcat(!strconcat( 238239310Sdim opcode, " \t${dst}_0, ${a}_0;\n\t"), 239239310Sdim opcode), " \t${dst}_1, ${a}_1;")>; 240239310Sdim 241239310Sdimclass VecBinaryOp<BinOpAsmString asmstr, SDNode OpNode, NVPTXRegClass regclass, 242239310Sdim NVPTXInst sInst=NOP> : 243239310Sdim NVPTXVecInst<(outs regclass:$dst), (ins regclass:$a, regclass:$b), 244239310Sdim asmstr.s, 245239310Sdim [(set regclass:$dst, (OpNode regclass:$a, regclass:$b))], 246239310Sdim sInst>; 247239310Sdim 248239310Sdimclass VecShiftOp<BinOpAsmString asmstr, SDNode OpNode, NVPTXRegClass regclass1, 249239310Sdim NVPTXRegClass regclass2, NVPTXInst sInst=NOP> : 250239310Sdim NVPTXVecInst<(outs regclass1:$dst), (ins regclass1:$a, regclass2:$b), 251239310Sdim asmstr.s, 252239310Sdim [(set regclass1:$dst, (OpNode regclass1:$a, regclass2:$b))], 253239310Sdim sInst>; 254239310Sdim 255239310Sdimclass VecUnaryOp<BinOpAsmString asmstr, PatFrag OpNode, NVPTXRegClass regclass, 256239310Sdim NVPTXInst sInst=NOP> : 257239310Sdim NVPTXVecInst<(outs regclass:$dst), (ins regclass:$a), 258239310Sdim asmstr.s, 259239310Sdim [(set regclass:$dst, (OpNode regclass:$a))], sInst>; 260239310Sdim 261239310Sdimmulticlass IntBinVOp<string asmstr, SDNode OpNode, 262239310Sdim NVPTXInst i64op=NOP, NVPTXInst i32op=NOP, NVPTXInst 263239310Sdim i16op=NOP, NVPTXInst i8op=NOP> { 264239310Sdim def V2I64 : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "64")>, OpNode, V2I64Regs, 265239310Sdim i64op>; 266239310Sdim def V4I32 : VecBinaryOp<V4AsmStr<!strconcat(asmstr, "32")>, OpNode, V4I32Regs, 267239310Sdim i32op>; 268239310Sdim def V2I32 : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "32")>, OpNode, V2I32Regs, 269239310Sdim i32op>; 270239310Sdim def V4I16 : VecBinaryOp<V4AsmStr<!strconcat(asmstr, "16")>, OpNode, V4I16Regs, 271239310Sdim i16op>; 272239310Sdim def V2I16 : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "16")>, OpNode, V2I16Regs, 273239310Sdim i16op>; 274239310Sdim def V4I8 : VecBinaryOp<V4AsmStr<!strconcat(asmstr, "16")>, OpNode, V4I8Regs, 275239310Sdim i8op>; 276239310Sdim def V2I8 : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "16")>, OpNode, V2I8Regs, 277239310Sdim i8op>; 278239310Sdim} 279239310Sdim 280239310Sdimmulticlass FloatBinVOp<string asmstr, SDNode OpNode, 281239310Sdim NVPTXInst f64=NOP, NVPTXInst f32=NOP, 282239310Sdim NVPTXInst f32_ftz=NOP> { 283239310Sdim def V2F64 : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "f64")>, OpNode, 284239310Sdim V2F64Regs, f64>; 285239310Sdim def V4F32_ftz : VecBinaryOp<V4AsmStr<!strconcat(asmstr, "ftz.f32")>, OpNode, 286239310Sdim V4F32Regs, f32_ftz>, Requires<[doF32FTZ]>; 287239310Sdim def V2F32_ftz : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "ftz.f32")>, OpNode, 288239310Sdim V2F32Regs, f32_ftz>, Requires<[doF32FTZ]>; 289239310Sdim def V4F32 : VecBinaryOp<V4AsmStr<!strconcat(asmstr, "f32")>, OpNode, 290239310Sdim V4F32Regs, f32>; 291239310Sdim def V2F32 : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "f32")>, OpNode, 292239310Sdim V2F32Regs, f32>; 293239310Sdim} 294239310Sdim 295239310Sdimmulticlass IntUnaryVOp<string asmstr, PatFrag OpNode, 296239310Sdim NVPTXInst i64op=NOP, NVPTXInst i32op=NOP, 297239310Sdim NVPTXInst i16op=NOP, NVPTXInst i8op=NOP> { 298239310Sdim def V2I64 : VecUnaryOp<V2UnaryStr<!strconcat(asmstr, "64")>, OpNode, 299239310Sdim V2I64Regs, i64op>; 300239310Sdim def V4I32 : VecUnaryOp<V4UnaryStr<!strconcat(asmstr, "32")>, OpNode, 301239310Sdim V4I32Regs, i32op>; 302239310Sdim def V2I32 : VecUnaryOp<V2UnaryStr<!strconcat(asmstr, "32")>, OpNode, 303239310Sdim V2I32Regs, i32op>; 304239310Sdim def V4I16 : VecUnaryOp<V4UnaryStr<!strconcat(asmstr, "16")>, OpNode, 305239310Sdim V4I16Regs, i16op>; 306239310Sdim def V2I16 : VecUnaryOp<V2UnaryStr<!strconcat(asmstr, "16")>, OpNode, 307239310Sdim V2I16Regs, i16op>; 308239310Sdim def V4I8 : VecUnaryOp<V4UnaryStr<!strconcat(asmstr, "16")>, OpNode, 309239310Sdim V4I8Regs, i8op>; 310239310Sdim def V2I8 : VecUnaryOp<V2UnaryStr<!strconcat(asmstr, "16")>, OpNode, 311239310Sdim V2I8Regs, i8op>; 312239310Sdim} 313239310Sdim 314239310Sdim 315239310Sdim// Integer Arithmetic 316239310Sdimlet VecInstType=isVecOther.Value in { 317239310Sdimdefm VAdd : IntBinVOp<"add.s", add, ADDi64rr, ADDi32rr, ADDi16rr, ADDi8rr>; 318239310Sdimdefm VSub : IntBinVOp<"sub.s", sub, SUBi64rr, SUBi32rr, SUBi16rr, SUBi8rr>; 319239310Sdim 320239310Sdimdef AddCCV4I32 : VecBinaryOp<V4AsmStr<"add.cc.s32">, addc, V4I32Regs, 321239310Sdim ADDCCi32rr>; 322239310Sdimdef AddCCV2I32 : VecBinaryOp<V2AsmStr<"add.cc.s32">, addc, V2I32Regs, 323239310Sdim ADDCCi32rr>; 324239310Sdimdef SubCCV4I32 : VecBinaryOp<V4AsmStr<"sub.cc.s32">, subc, V4I32Regs, 325239310Sdim SUBCCi32rr>; 326239310Sdimdef SubCCV2I32 : VecBinaryOp<V2AsmStr<"sub.cc.s32">, subc, V2I32Regs, 327239310Sdim SUBCCi32rr>; 328239310Sdimdef AddCCCV4I32 : VecBinaryOp<V4AsmStr<"addc.cc.s32">, adde, V4I32Regs, 329239310Sdim ADDCCCi32rr>; 330239310Sdimdef AddCCCV2I32 : VecBinaryOp<V2AsmStr<"addc.cc.s32">, adde, V2I32Regs, 331239310Sdim ADDCCCi32rr>; 332239310Sdimdef SubCCCV4I32 : VecBinaryOp<V4AsmStr<"subc.cc.s32">, sube, V4I32Regs, 333239310Sdim SUBCCCi32rr>; 334239310Sdimdef SubCCCV2I32 : VecBinaryOp<V2AsmStr<"subc.cc.s32">, sube, V2I32Regs, 335239310Sdim SUBCCCi32rr>; 336239310Sdim 337239310Sdimdef ShiftLV2I64 : VecShiftOp<V2AsmStr<"shl.b64">, shl, V2I64Regs, V2I32Regs, 338239310Sdim SHLi64rr>; 339239310Sdimdef ShiftLV2I32 : VecShiftOp<V2AsmStr<"shl.b32">, shl, V2I32Regs, V2I32Regs, 340239310Sdim SHLi32rr>; 341239310Sdimdef ShiftLV4I32 : VecShiftOp<V4AsmStr<"shl.b32">, shl, V4I32Regs, V4I32Regs, 342239310Sdim SHLi32rr>; 343239310Sdimdef ShiftLV2I16 : VecShiftOp<V2AsmStr<"shl.b16">, shl, V2I16Regs, V2I32Regs, 344239310Sdim SHLi16rr>; 345239310Sdimdef ShiftLV4I16 : VecShiftOp<V4AsmStr<"shl.b16">, shl, V4I16Regs, V4I32Regs, 346239310Sdim SHLi16rr>; 347239310Sdimdef ShiftLV2I8 : VecShiftOp<V2AsmStr<"shl.b16">, shl, V2I8Regs, V2I32Regs, 348239310Sdim SHLi8rr>; 349239310Sdimdef ShiftLV4I8 : VecShiftOp<V4AsmStr<"shl.b16">, shl, V4I8Regs, V4I32Regs, 350239310Sdim SHLi8rr>; 351239310Sdim} 352239310Sdim 353239310Sdim// cvt to v*i32, helpers for shift 354239310Sdimclass CVTtoVeci32<NVPTXRegClass inclass, NVPTXRegClass outclass, string asmstr, 355239310Sdim NVPTXInst sInst=NOP> : 356239310Sdim NVPTXVecInst<(outs outclass:$d), (ins inclass:$s), asmstr, [], sInst>; 357239310Sdim 358239310Sdimclass VecCVTStrHelper<string op, string dest, string src> { 359239310Sdim string s=!strconcat(op, !strconcat("\t", 360239310Sdim !strconcat(dest, !strconcat(", ", !strconcat(src, ";"))))); 361239310Sdim} 362239310Sdim 363239310Sdimclass Vec2CVTStr<string op> { 364239310Sdim string s=!strconcat(VecCVTStrHelper<op, "${d}_0", "${s}_0">.s, 365239310Sdim !strconcat("\n\t", VecCVTStrHelper<op, "${d}_1", "${s}_1">.s)); 366239310Sdim} 367239310Sdim 368239310Sdimclass Vec4CVTStr<string op> { 369239310Sdim string s=!strconcat(VecCVTStrHelper<op, "${d}_0", "${s}_0">.s, 370239310Sdim !strconcat("\n\t", 371239310Sdim !strconcat(VecCVTStrHelper<op, "${d}_1", "${s}_1">.s, 372239310Sdim !strconcat("\n\t", 373239310Sdim !strconcat(VecCVTStrHelper<op, "${d}_2", "${s}_2">.s, 374239310Sdim !strconcat("\n\t", VecCVTStrHelper<op, "${d}_3", "${s}_3">.s)))))); 375239310Sdim} 376239310Sdim 377239310Sdimlet VecInstType=isVecOther.Value in { 378239310Sdimdef CVTv2i8tov2i32 : CVTtoVeci32<V2I8Regs, V2I32Regs, 379239310Sdim Vec2CVTStr<"cvt.u32.u16">.s, Zint_extendext8to32>; 380239310Sdimdef CVTv2i16tov2i32 : CVTtoVeci32<V2I16Regs, V2I32Regs, 381239310Sdim Vec2CVTStr<"cvt.u32.u16">.s, Zint_extendext16to32>; 382239310Sdimdef CVTv4i8tov4i32 : CVTtoVeci32<V4I8Regs, V4I32Regs, 383239310Sdim Vec4CVTStr<"cvt.u32.u16">.s, Zint_extendext8to32>; 384239310Sdimdef CVTv4i16tov4i32 : CVTtoVeci32<V4I16Regs, V4I32Regs, 385239310Sdim Vec4CVTStr<"cvt.u32.u16">.s, Zint_extendext16to32>; 386239310Sdimdef CVTv2i64tov2i32 : CVTtoVeci32<V2I64Regs, V2I32Regs, 387239310Sdim Vec2CVTStr<"cvt.u32.u64">.s, TRUNC_64to32>; 388239310Sdim} 389239310Sdim 390239310Sdimdef : Pat<(shl V2I16Regs:$src1, V2I16Regs:$src2), 391239310Sdim (ShiftLV2I16 V2I16Regs:$src1, (CVTv2i16tov2i32 V2I16Regs:$src2))>; 392239310Sdimdef : Pat<(shl V2I8Regs:$src1, V2I8Regs:$src2), 393239310Sdim (ShiftLV2I8 V2I8Regs:$src1, (CVTv2i8tov2i32 V2I8Regs:$src2))>; 394239310Sdimdef : Pat<(shl V2I64Regs:$src1, V2I64Regs:$src2), 395239310Sdim (ShiftLV2I64 V2I64Regs:$src1, (CVTv2i64tov2i32 V2I64Regs:$src2))>; 396239310Sdim 397239310Sdimdef : Pat<(shl V4I16Regs:$src1, V4I16Regs:$src2), 398239310Sdim (ShiftLV4I16 V4I16Regs:$src1, (CVTv4i16tov4i32 V4I16Regs:$src2))>; 399239310Sdimdef : Pat<(shl V4I8Regs:$src1, V4I8Regs:$src2), 400239310Sdim (ShiftLV4I8 V4I8Regs:$src1, (CVTv4i8tov4i32 V4I8Regs:$src2))>; 401239310Sdim 402239310Sdimlet VecInstType=isVecOther.Value in { 403239310Sdimdef ShiftRAV2I64 : VecShiftOp<V2AsmStr<"shr.s64">, sra, V2I64Regs, V2I32Regs, 404239310Sdim SRAi64rr>; 405239310Sdimdef ShiftRAV2I32 : VecShiftOp<V2AsmStr<"shr.s32">, sra, V2I32Regs, V2I32Regs, 406239310Sdim SRAi32rr>; 407239310Sdimdef ShiftRAV4I32 : VecShiftOp<V4AsmStr<"shr.s32">, sra, V4I32Regs, V4I32Regs, 408239310Sdim SRAi32rr>; 409239310Sdimdef ShiftRAV2I16 : VecShiftOp<V2AsmStr<"shr.s16">, sra, V2I16Regs, V2I32Regs, 410239310Sdim SRAi16rr>; 411239310Sdimdef ShiftRAV4I16 : VecShiftOp<V4AsmStr<"shr.s16">, sra, V4I16Regs, V4I32Regs, 412239310Sdim SRAi16rr>; 413239310Sdimdef ShiftRAV2I8 : VecShiftOp<V2AsmStr<"shr.s16">, sra, V2I8Regs, V2I32Regs, 414239310Sdim SRAi8rr>; 415239310Sdimdef ShiftRAV4I8 : VecShiftOp<V4AsmStr<"shr.s16">, sra, V4I8Regs, V4I32Regs, 416239310Sdim SRAi8rr>; 417239310Sdim 418239310Sdimdef ShiftRLV2I64 : VecShiftOp<V2AsmStr<"shr.u64">, srl, V2I64Regs, V2I32Regs, 419239310Sdim SRLi64rr>; 420239310Sdimdef ShiftRLV2I32 : VecShiftOp<V2AsmStr<"shr.u32">, srl, V2I32Regs, V2I32Regs, 421239310Sdim SRLi32rr>; 422239310Sdimdef ShiftRLV4I32 : VecShiftOp<V4AsmStr<"shr.u32">, srl, V4I32Regs, V4I32Regs, 423239310Sdim SRLi32rr>; 424239310Sdimdef ShiftRLV2I16 : VecShiftOp<V2AsmStr<"shr.u16">, srl, V2I16Regs, V2I32Regs, 425239310Sdim SRLi16rr>; 426239310Sdimdef ShiftRLV4I16 : VecShiftOp<V4AsmStr<"shr.u16">, srl, V4I16Regs, V4I32Regs, 427239310Sdim SRLi16rr>; 428239310Sdimdef ShiftRLV2I8 : VecShiftOp<V2AsmStr<"shr.u16">, srl, V2I8Regs, V2I32Regs, 429239310Sdim SRLi8rr>; 430239310Sdimdef ShiftRLV4I8 : VecShiftOp<V4AsmStr<"shr.u16">, srl, V4I8Regs, V4I32Regs, 431239310Sdim SRLi8rr>; 432239310Sdim 433239310Sdimdefm VMult : IntBinVOp<"mul.lo.s", mul, MULTi64rr, MULTi32rr, MULTi16rr, 434239310Sdim MULTi8rr>; 435239310Sdimdefm VMultHS : IntBinVOp<"mul.hi.s", mulhs, MULTHSi64rr, MULTHSi32rr, 436239310Sdim MULTHSi16rr, 437239310Sdim MULTHSi8rr>; 438239310Sdimdefm VMultHU : IntBinVOp<"mul.hi.u", mulhu, MULTHUi64rr, MULTHUi32rr, 439239310Sdim MULTHUi16rr, 440239310Sdim MULTHUi8rr>; 441239310Sdimdefm VSDiv : IntBinVOp<"div.s", sdiv, SDIVi64rr, SDIVi32rr, SDIVi16rr, 442239310Sdim SDIVi8rr>; 443239310Sdimdefm VUDiv : IntBinVOp<"div.u", udiv, UDIVi64rr, UDIVi32rr, UDIVi16rr, 444239310Sdim UDIVi8rr>; 445239310Sdimdefm VSRem : IntBinVOp<"rem.s", srem, SREMi64rr, SREMi32rr, SREMi16rr, 446239310Sdim SREMi8rr>; 447239310Sdimdefm VURem : IntBinVOp<"rem.u", urem, UREMi64rr, UREMi32rr, UREMi16rr, 448239310Sdim UREMi8rr>; 449239310Sdim} 450239310Sdim 451239310Sdimdef : Pat<(sra V2I16Regs:$src1, V2I16Regs:$src2), 452239310Sdim (ShiftRAV2I16 V2I16Regs:$src1, (CVTv2i16tov2i32 V2I16Regs:$src2))>; 453239310Sdimdef : Pat<(sra V2I8Regs:$src1, V2I8Regs:$src2), 454239310Sdim (ShiftRAV2I8 V2I8Regs:$src1, (CVTv2i8tov2i32 V2I8Regs:$src2))>; 455239310Sdimdef : Pat<(sra V2I64Regs:$src1, V2I64Regs:$src2), 456239310Sdim (ShiftRAV2I64 V2I64Regs:$src1, (CVTv2i64tov2i32 V2I64Regs:$src2))>; 457239310Sdim 458239310Sdimdef : Pat<(sra V4I16Regs:$src1, V4I16Regs:$src2), 459239310Sdim (ShiftRAV4I16 V4I16Regs:$src1, (CVTv4i16tov4i32 V4I16Regs:$src2))>; 460239310Sdimdef : Pat<(sra V4I8Regs:$src1, V4I8Regs:$src2), 461239310Sdim (ShiftRAV4I8 V4I8Regs:$src1, (CVTv4i8tov4i32 V4I8Regs:$src2))>; 462239310Sdim 463239310Sdimdef : Pat<(srl V2I16Regs:$src1, V2I16Regs:$src2), 464239310Sdim (ShiftRLV2I16 V2I16Regs:$src1, (CVTv2i16tov2i32 V2I16Regs:$src2))>; 465239310Sdimdef : Pat<(srl V2I8Regs:$src1, V2I8Regs:$src2), 466239310Sdim (ShiftRLV2I8 V2I8Regs:$src1, (CVTv2i8tov2i32 V2I8Regs:$src2))>; 467239310Sdimdef : Pat<(srl V2I64Regs:$src1, V2I64Regs:$src2), 468239310Sdim (ShiftRLV2I64 V2I64Regs:$src1, (CVTv2i64tov2i32 V2I64Regs:$src2))>; 469239310Sdim 470239310Sdimdef : Pat<(srl V4I16Regs:$src1, V4I16Regs:$src2), 471239310Sdim (ShiftRLV4I16 V4I16Regs:$src1, (CVTv4i16tov4i32 V4I16Regs:$src2))>; 472239310Sdimdef : Pat<(srl V4I8Regs:$src1, V4I8Regs:$src2), 473239310Sdim (ShiftRLV4I8 V4I8Regs:$src1, (CVTv4i8tov4i32 V4I8Regs:$src2))>; 474239310Sdim 475239310Sdimmulticlass VMAD<string asmstr, NVPTXRegClass regclassv4, 476239310Sdim NVPTXRegClass regclassv2, 477239310Sdim SDNode an=add, SDNode mn=mul, NVPTXInst sop=NOP, 478239310Sdim Predicate Pred> { 479239310Sdim def V4 : NVPTXVecInst<(outs regclassv4:$dst), 480239310Sdim (ins regclassv4:$a, regclassv4:$b, regclassv4:$c), 481239310Sdim V4MADStr<asmstr>.s, 482239310Sdim [(set regclassv4:$dst, 483239310Sdim (an (mn regclassv4:$a, regclassv4:$b), regclassv4:$c))], 484239310Sdim sop>, 485239310Sdim Requires<[Pred]>; 486239310Sdim def V2 : NVPTXVecInst<(outs regclassv2:$dst), 487239310Sdim (ins regclassv2:$a, regclassv2:$b, regclassv2:$c), 488239310Sdim V2MADStr<asmstr>.s, 489239310Sdim [(set regclassv2:$dst, 490239310Sdim (an (mn regclassv2:$a, regclassv2:$b), regclassv2:$c))], 491239310Sdim sop>, 492239310Sdim Requires<[Pred]>; 493239310Sdim} 494239310Sdim 495239310Sdimmulticlass VMADV2Only<string asmstr, NVPTXRegClass regclass, NVPTXInst sop=NOP, 496239310Sdim Predicate Pred> { 497239310Sdim def V2 : NVPTXVecInst<(outs regclass:$dst), 498239310Sdim (ins regclass:$a, regclass:$b, regclass:$c), 499239310Sdim V2MADStr<asmstr>.s, 500239310Sdim [(set regclass:$dst, (add 501239310Sdim (mul regclass:$a, regclass:$b), regclass:$c))], sop>, 502239310Sdim Requires<[Pred]>; 503239310Sdim} 504239310Sdimmulticlass VFMADV2Only<string asmstr, NVPTXRegClass regclass, NVPTXInst sop=NOP, 505239310Sdim Predicate Pred> { 506239310Sdim def V2 : NVPTXVecInst<(outs regclass:$dst), 507239310Sdim (ins regclass:$a, regclass:$b, regclass:$c), 508239310Sdim V2MADStr<asmstr>.s, 509239310Sdim [(set regclass:$dst, (fadd 510239310Sdim (fmul regclass:$a, regclass:$b), regclass:$c))], sop>, 511239310Sdim Requires<[Pred]>; 512239310Sdim} 513239310Sdim 514239310Sdimlet VecInstType=isVecOther.Value in { 515239310Sdimdefm I8MAD : VMAD<"mad.lo.s16", V4I8Regs, V2I8Regs, add, mul, MAD8rrr, true>; 516239310Sdimdefm I16MAD : VMAD<"mad.lo.s16", V4I16Regs, V2I16Regs, add, mul, MAD16rrr, 517239310Sdim true>; 518239310Sdimdefm I32MAD : VMAD<"mad.lo.s32", V4I32Regs, V2I32Regs, add, mul, MAD32rrr, 519239310Sdim true>; 520239310Sdimdefm I64MAD : VMADV2Only<"mad.lo.s64", V2I64Regs, MAD64rrr, true>; 521239310Sdim 522239310Sdimdefm VNeg : IntUnaryVOp<"neg.s", ineg, INEG64, INEG32, INEG16, INEG8>; 523239310Sdim 524239310Sdimdefm VAddf : FloatBinVOp<"add.", fadd, FADDf64rr, FADDf32rr, FADDf32rr_ftz>; 525239310Sdimdefm VSubf : FloatBinVOp<"sub.", fsub, FSUBf64rr, FSUBf32rr, FSUBf32rr_ftz>; 526239310Sdimdefm VMulf : FloatBinVOp<"mul.", fmul, FMULf64rr, FMULf32rr, FMULf32rr_ftz>; 527239310Sdim 528239310Sdimdefm F32MAD_ftz : VMAD<"mad.ftz.f32", V4F32Regs, V2F32Regs, fadd, fmul, 529239310Sdim FMAD32_ftzrrr, doFMADF32_ftz>; 530239310Sdimdefm F32FMA_ftz : VMAD<"fma.rn.ftz.f32", V4F32Regs, V2F32Regs, fadd, fmul, 531239310Sdim FMA32_ftzrrr, doFMAF32_ftz>; 532239310Sdimdefm F32MAD : VMAD<"mad.f32", V4F32Regs, V2F32Regs, fadd, fmul, FMAD32rrr, 533239310Sdim doFMADF32>; 534239310Sdimdefm F32FMA : VMAD<"fma.rn.f32", V4F32Regs, V2F32Regs, fadd, fmul, FMA32rrr, 535239310Sdim doFMAF32>; 536239310Sdimdefm F64FMA : VFMADV2Only<"fma.rn.f64", V2F64Regs, FMA64rrr, doFMAF64>; 537239310Sdim} 538239310Sdim 539239310Sdimlet VecInstType=isVecOther.Value in { 540239310Sdimdef V4F32Div_prec_ftz : VecBinaryOp<V4AsmStr<"div.rn.ftz.f32">, fdiv, V4F32Regs, 541239310Sdim FDIV32rr_prec_ftz>, Requires<[doF32FTZ, reqPTX20]>; 542239310Sdimdef V2F32Div_prec_ftz : VecBinaryOp<V2AsmStr<"div.rn.ftz.f32">, fdiv, V2F32Regs, 543239310Sdim FDIV32rr_prec_ftz>, Requires<[doF32FTZ, reqPTX20]>; 544239310Sdimdef V4F32Div_prec : VecBinaryOp<V4AsmStr<"div.rn.f32">, fdiv, V4F32Regs, 545239310Sdim FDIV32rr_prec>, Requires<[reqPTX20]>; 546239310Sdimdef V2F32Div_prec : VecBinaryOp<V2AsmStr<"div.rn.f32">, fdiv, V2F32Regs, 547239310Sdim FDIV32rr_prec>, Requires<[reqPTX20]>; 548239310Sdimdef V2F32Div_ftz : VecBinaryOp<V2AsmStr<"div.full.ftz.f32">, fdiv, V2F32Regs, 549239310Sdim FDIV32rr_ftz>, Requires<[doF32FTZ]>; 550239310Sdimdef V4F32Div_ftz : VecBinaryOp<V4AsmStr<"div.full.ftz.f32">, fdiv, V4F32Regs, 551239310Sdim FDIV32rr_ftz>, Requires<[doF32FTZ]>; 552239310Sdimdef V2F32Div : VecBinaryOp<V2AsmStr<"div.full.f32">, fdiv, V2F32Regs, FDIV32rr>; 553239310Sdimdef V4F32Div : VecBinaryOp<V4AsmStr<"div.full.f32">, fdiv, V4F32Regs, FDIV32rr>; 554239310Sdimdef V2F64Div : VecBinaryOp<V2AsmStr<"div.rn.f64">, fdiv, V2F64Regs, FDIV64rr>; 555239310Sdim} 556239310Sdim 557239310Sdimdef fnegpat : PatFrag<(ops node:$in), (fneg node:$in)>; 558239310Sdim 559239310Sdimlet VecInstType=isVecOther.Value in { 560239310Sdimdef VNegv2f32_ftz : VecUnaryOp<V2UnaryStr<"neg.ftz.f32">, fnegpat, V2F32Regs, 561239310Sdim FNEGf32_ftz>, Requires<[doF32FTZ]>; 562239310Sdimdef VNegv4f32_ftz : VecUnaryOp<V4UnaryStr<"neg.ftz.f32">, fnegpat, V4F32Regs, 563239310Sdim FNEGf32_ftz>, Requires<[doF32FTZ]>; 564239310Sdimdef VNegv2f32 : VecUnaryOp<V2UnaryStr<"neg.f32">, fnegpat, V2F32Regs, FNEGf32>; 565239310Sdimdef VNegv4f32 : VecUnaryOp<V4UnaryStr<"neg.f32">, fnegpat, V4F32Regs, FNEGf32>; 566239310Sdimdef VNegv2f64 : VecUnaryOp<V2UnaryStr<"neg.f64">, fnegpat, V2F64Regs, FNEGf64>; 567239310Sdim 568239310Sdim// Logical Arithmetic 569239310Sdimdefm VAnd : IntBinVOp<"and.b", and, ANDb64rr, ANDb32rr, ANDb16rr, ANDb8rr>; 570239310Sdimdefm VOr : IntBinVOp<"or.b", or, ORb64rr, ORb32rr, ORb16rr, ORb8rr>; 571239310Sdimdefm VXor : IntBinVOp<"xor.b", xor, XORb64rr, XORb32rr, XORb16rr, XORb8rr>; 572239310Sdim 573239310Sdimdefm VNot : IntUnaryVOp<"not.b", not, NOT64, NOT32, NOT16, NOT8>; 574239310Sdim} 575239310Sdim 576239310Sdim 577239310Sdimmulticlass V2FPCONTRACT32_SUB_PAT<NVPTXInst Inst, Predicate Pred> { 578239310Sdim def : Pat<(fsub V2F32Regs:$a, (fmul V2F32Regs:$b, V2F32Regs:$c)), 579239310Sdim (Inst (VNegv2f32 V2F32Regs:$b), V2F32Regs:$c, V2F32Regs:$a)>, 580239310Sdim Requires<[Pred]>; 581239310Sdim 582239310Sdim def : Pat<(fsub (fmul V2F32Regs:$a, V2F32Regs:$b), V2F32Regs:$c), 583239310Sdim (Inst V2F32Regs:$a, V2F32Regs:$b, (VNegv2f32 V2F32Regs:$c))>, 584239310Sdim Requires<[Pred]>; 585239310Sdim} 586239310Sdim 587239310Sdimdefm V2FMAF32ext_ftz : V2FPCONTRACT32_SUB_PAT<F32FMA_ftzV2, doFMAF32AGG_ftz>; 588239310Sdimdefm V2FMADF32ext_ftz : V2FPCONTRACT32_SUB_PAT<F32MAD_ftzV2, doFMADF32_ftz>; 589239310Sdimdefm V2FMAF32ext : V2FPCONTRACT32_SUB_PAT<F32FMAV2, doFMAF32AGG>; 590239310Sdimdefm V2FMADF32ext : V2FPCONTRACT32_SUB_PAT<F32MADV2, doFMADF32>; 591239310Sdim 592239310Sdimmulticlass V4FPCONTRACT32_SUB_PAT<NVPTXInst Inst, Predicate Pred> { 593239310Sdim def : Pat<(fsub V4F32Regs:$a, (fmul V4F32Regs:$b, V4F32Regs:$c)), 594239310Sdim (Inst (VNegv4f32 V4F32Regs:$b), V4F32Regs:$c, V4F32Regs:$a)>, 595239310Sdim Requires<[Pred]>; 596239310Sdim 597239310Sdim def : Pat<(fsub (fmul V4F32Regs:$a, V4F32Regs:$b), V4F32Regs:$c), 598239310Sdim (Inst V4F32Regs:$a, V4F32Regs:$b, (VNegv4f32 V4F32Regs:$c))>, 599239310Sdim Requires<[Pred]>; 600239310Sdim} 601239310Sdim 602239310Sdimdefm V4FMAF32ext_ftz : V4FPCONTRACT32_SUB_PAT<F32FMA_ftzV4, doFMAF32AGG_ftz>; 603239310Sdimdefm V4FMADF32ext_ftz : V4FPCONTRACT32_SUB_PAT<F32MAD_ftzV4, doFMADF32_ftz>; 604239310Sdimdefm V4FMAF32ext : V4FPCONTRACT32_SUB_PAT<F32FMAV4, doFMAF32AGG>; 605239310Sdimdefm V4FMADF32ext : V4FPCONTRACT32_SUB_PAT<F32MADV4, doFMADF32>; 606239310Sdim 607239310Sdimmulticlass V2FPCONTRACT64_SUB_PAT<NVPTXInst Inst, Predicate Pred> { 608239310Sdim def : Pat<(fsub V2F64Regs:$a, (fmul V2F64Regs:$b, V2F64Regs:$c)), 609239310Sdim (Inst (VNegv2f64 V2F64Regs:$b), V2F64Regs:$c, V2F64Regs:$a)>, 610239310Sdim Requires<[Pred]>; 611239310Sdim 612239310Sdim def : Pat<(fsub (fmul V2F64Regs:$a, V2F64Regs:$b), V2F64Regs:$c), 613239310Sdim (Inst V2F64Regs:$a, V2F64Regs:$b, (VNegv2f64 V2F64Regs:$c))>, 614239310Sdim Requires<[Pred]>; 615239310Sdim} 616239310Sdim 617239310Sdimdefm V2FMAF64ext : V2FPCONTRACT64_SUB_PAT<F64FMAV2, doFMAF64AGG>; 618239310Sdim 619239310Sdimclass VecModStr<string vecsize, string elem, string extra, string l=""> 620239310Sdim{ 621239310Sdim string t1 = !strconcat("${c", elem); 622239310Sdim string t2 = !strconcat(t1, ":vecv"); 623239310Sdim string t3 = !strconcat(t2, vecsize); 624239310Sdim string t4 = !strconcat(t3, extra); 625239310Sdim string t5 = !strconcat(t4, l); 626239310Sdim string s = !strconcat(t5, "}"); 627239310Sdim} 628239310Sdimclass ShuffleOneLine<string vecsize, string elem, string type> 629239310Sdim{ 630239310Sdim string t1 = VecModStr<vecsize, elem, "comm", "1">.s; 631239310Sdim string t2 = !strconcat(t1, "mov."); 632239310Sdim string t3 = !strconcat(t2, type); 633239310Sdim string t4 = !strconcat(t3, " \t${dst}_"); 634239310Sdim string t5 = !strconcat(t4, elem); 635239310Sdim string t6 = !strconcat(t5, ", $src1"); 636239310Sdim string t7 = !strconcat(t6, VecModStr<vecsize, elem, "pos">.s); 637239310Sdim string t8 = !strconcat(t7, ";\n\t"); 638239310Sdim string t9 = !strconcat(t8, VecModStr<vecsize, elem, "comm", "2">.s); 639239310Sdim string t10 = !strconcat(t9, "mov."); 640239310Sdim string t11 = !strconcat(t10, type); 641239310Sdim string t12 = !strconcat(t11, " \t${dst}_"); 642239310Sdim string t13 = !strconcat(t12, elem); 643239310Sdim string t14 = !strconcat(t13, ", $src2"); 644239310Sdim string t15 = !strconcat(t14, VecModStr<vecsize, elem, "pos">.s); 645239310Sdim string s = !strconcat(t15, ";"); 646239310Sdim} 647239310Sdimclass ShuffleAsmStr2<string type> 648239310Sdim{ 649239310Sdim string t1 = ShuffleOneLine<"2", "0", type>.s; 650239310Sdim string t2 = !strconcat(t1, "\n\t"); 651239310Sdim string s = !strconcat(t2, ShuffleOneLine<"2", "1", type>.s); 652239310Sdim} 653239310Sdimclass ShuffleAsmStr4<string type> 654239310Sdim{ 655239310Sdim string t1 = ShuffleOneLine<"4", "0", type>.s; 656239310Sdim string t2 = !strconcat(t1, "\n\t"); 657239310Sdim string t3 = !strconcat(t2, ShuffleOneLine<"4", "1", type>.s); 658239310Sdim string t4 = !strconcat(t3, "\n\t"); 659239310Sdim string t5 = !strconcat(t4, ShuffleOneLine<"4", "2", type>.s); 660239310Sdim string t6 = !strconcat(t5, "\n\t"); 661239310Sdim string s = !strconcat(t6, ShuffleOneLine<"4", "3", type>.s); 662239310Sdim} 663239310Sdim 664239310Sdimlet neverHasSideEffects=1, VecInstType=isVecShuffle.Value in { 665239310Sdimdef VecShuffle_v4f32 : NVPTXVecInst<(outs V4F32Regs:$dst), 666239310Sdim (ins V4F32Regs:$src1, V4F32Regs:$src2, 667239310Sdim i8imm:$c0, i8imm:$c1, i8imm:$c2, i8imm:$c3), 668239310Sdim !strconcat("//Mov $dst, $src1, $src2, $c0, $c1, $c2, $c3;\n\t", 669239310Sdim ShuffleAsmStr4<"f32">.s), 670239310Sdim [], FMOV32rr>; 671239310Sdim 672239310Sdimdef VecShuffle_v4i32 : NVPTXVecInst<(outs V4I32Regs:$dst), 673239310Sdim (ins V4I32Regs:$src1, V4I32Regs:$src2, 674239310Sdim i8imm:$c0, i8imm:$c1, i8imm:$c2, i8imm:$c3), 675239310Sdim !strconcat("//Mov $dst, $src1, $src2, $c0, $c1, $c2, $c3;\n\t", 676239310Sdim ShuffleAsmStr4<"u32">.s), 677239310Sdim [], IMOV32rr>; 678239310Sdim 679239310Sdimdef VecShuffle_v4i16 : NVPTXVecInst<(outs V4I16Regs:$dst), 680239310Sdim (ins V4I16Regs:$src1, V4I16Regs:$src2, 681239310Sdim i8imm:$c0, i8imm:$c1, i8imm:$c2, i8imm:$c3), 682239310Sdim !strconcat("//Mov $dst, $src1, $src2, $c0, $c1, $c2, $c3;\n\t", 683239310Sdim ShuffleAsmStr4<"u16">.s), 684239310Sdim [], IMOV16rr>; 685239310Sdim 686239310Sdimdef VecShuffle_v4i8 : NVPTXVecInst<(outs V4I8Regs:$dst), 687239310Sdim (ins V4I8Regs:$src1, V4I8Regs:$src2, 688239310Sdim i8imm:$c0, i8imm:$c1, i8imm:$c2, i8imm:$c3), 689239310Sdim !strconcat("//Mov $dst, $src1, $src2, $c0, $c1, $c2, $c3;\n\t", 690239310Sdim ShuffleAsmStr4<"u16">.s), 691239310Sdim [], IMOV8rr>; 692239310Sdim 693239310Sdimdef VecShuffle_v2f32 : NVPTXVecInst<(outs V2F32Regs:$dst), 694239310Sdim (ins V2F32Regs:$src1, V2F32Regs:$src2, 695239310Sdim i8imm:$c0, i8imm:$c1), 696239310Sdim !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t", 697239310Sdim ShuffleAsmStr2<"f32">.s), 698239310Sdim [], FMOV32rr>; 699239310Sdim 700239310Sdimdef VecShuffle_v2i32 : NVPTXVecInst<(outs V2I32Regs:$dst), 701239310Sdim (ins V2I32Regs:$src1, V2I32Regs:$src2, 702239310Sdim i8imm:$c0, i8imm:$c1), 703239310Sdim !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t", 704239310Sdim ShuffleAsmStr2<"u32">.s), 705239310Sdim [], IMOV32rr>; 706239310Sdim 707239310Sdimdef VecShuffle_v2i8 : NVPTXVecInst<(outs V2I8Regs:$dst), 708239310Sdim (ins V2I8Regs:$src1, V2I8Regs:$src2, 709239310Sdim i8imm:$c0, i8imm:$c1), 710239310Sdim !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t", 711239310Sdim ShuffleAsmStr2<"u16">.s), 712239310Sdim [], IMOV8rr>; 713239310Sdim 714239310Sdimdef VecShuffle_v2i16 : NVPTXVecInst<(outs V2I16Regs:$dst), 715239310Sdim (ins V2I16Regs:$src1, V2I16Regs:$src2, 716239310Sdim i8imm:$c0, i8imm:$c1), 717239310Sdim !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t", 718239310Sdim ShuffleAsmStr2<"u16">.s), 719239310Sdim [], IMOV16rr>; 720239310Sdim 721239310Sdimdef VecShuffle_v2f64 : NVPTXVecInst<(outs V2F64Regs:$dst), 722239310Sdim (ins V2F64Regs:$src1, V2F64Regs:$src2, 723239310Sdim i8imm:$c0, i8imm:$c1), 724239310Sdim !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t", 725239310Sdim ShuffleAsmStr2<"f64">.s), 726239310Sdim [], FMOV64rr>; 727239310Sdim 728239310Sdimdef VecShuffle_v2i64 : NVPTXVecInst<(outs V2I64Regs:$dst), 729239310Sdim (ins V2I64Regs:$src1, V2I64Regs:$src2, 730239310Sdim i8imm:$c0, i8imm:$c1), 731239310Sdim !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t", 732239310Sdim ShuffleAsmStr2<"u64">.s), 733239310Sdim [], IMOV64rr>; 734239310Sdim} 735239310Sdim 736239310Sdimdef ShuffleMask0 : SDNodeXForm<vector_shuffle, [{ 737239310Sdim ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); 738239310Sdim return CurDAG->getTargetConstant(SVOp->getMaskElt(0), MVT::i32); 739239310Sdim}]>; 740239310Sdimdef ShuffleMask1 : SDNodeXForm<vector_shuffle, [{ 741239310Sdim ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); 742239310Sdim return CurDAG->getTargetConstant(SVOp->getMaskElt(1), MVT::i32); 743239310Sdim}]>; 744239310Sdimdef ShuffleMask2 : SDNodeXForm<vector_shuffle, [{ 745239310Sdim ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); 746239310Sdim return CurDAG->getTargetConstant(SVOp->getMaskElt(2), MVT::i32); 747239310Sdim}]>; 748239310Sdimdef ShuffleMask3 : SDNodeXForm<vector_shuffle, [{ 749239310Sdim ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); 750239310Sdim return CurDAG->getTargetConstant(SVOp->getMaskElt(3), MVT::i32); 751239310Sdim}]>; 752239310Sdim 753239310Sdim// The spurious call is here to silence a compiler warning about N being 754239310Sdim// unused. 755239310Sdimdef vec_shuf : PatFrag<(ops node:$lhs, node:$rhs), 756239310Sdim (vector_shuffle node:$lhs, node:$rhs), 757239310Sdim [{ N->getGluedNode(); return true; }]>; 758239310Sdim 759239310Sdimdef : Pat<(v2f64 (vec_shuf:$op V2F64Regs:$src1, V2F64Regs:$src2)), 760239310Sdim (VecShuffle_v2f64 V2F64Regs:$src1, V2F64Regs:$src2, 761239310Sdim (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>; 762239310Sdim 763239310Sdimdef : Pat<(v4f32 (vec_shuf:$op V4F32Regs:$src1, V4F32Regs:$src2)), 764239310Sdim (VecShuffle_v4f32 V4F32Regs:$src1, V4F32Regs:$src2, 765239310Sdim (ShuffleMask0 node:$op), (ShuffleMask1 node:$op), 766239310Sdim (ShuffleMask2 node:$op), (ShuffleMask3 node:$op))>; 767239310Sdim 768239310Sdimdef : Pat<(v2f32 (vec_shuf:$op V2F32Regs:$src1, V2F32Regs:$src2)), 769239310Sdim (VecShuffle_v2f32 V2F32Regs:$src1, V2F32Regs:$src2, 770239310Sdim (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>; 771239310Sdim 772239310Sdimdef : Pat<(v2i64 (vec_shuf:$op V2I64Regs:$src1, V2I64Regs:$src2)), 773239310Sdim (VecShuffle_v2i64 V2I64Regs:$src1, V2I64Regs:$src2, 774239310Sdim (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>; 775239310Sdim 776239310Sdimdef : Pat<(v4i32 (vec_shuf:$op V4I32Regs:$src1, V4I32Regs:$src2)), 777239310Sdim (VecShuffle_v4i32 V4I32Regs:$src1, V4I32Regs:$src2, 778239310Sdim (ShuffleMask0 node:$op), (ShuffleMask1 node:$op), 779239310Sdim (ShuffleMask2 node:$op), (ShuffleMask3 node:$op))>; 780239310Sdim 781239310Sdimdef : Pat<(v2i32 (vec_shuf:$op V2I32Regs:$src1, V2I32Regs:$src2)), 782239310Sdim (VecShuffle_v2i32 V2I32Regs:$src1, V2I32Regs:$src2, 783239310Sdim (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>; 784239310Sdim 785239310Sdimdef : Pat<(v4i16 (vec_shuf:$op V4I16Regs:$src1, V4I16Regs:$src2)), 786239310Sdim (VecShuffle_v4i16 V4I16Regs:$src1, V4I16Regs:$src2, 787239310Sdim (ShuffleMask0 node:$op), (ShuffleMask1 node:$op), 788239310Sdim (ShuffleMask2 node:$op), (ShuffleMask3 node:$op))>; 789239310Sdim 790239310Sdimdef : Pat<(v2i16 (vec_shuf:$op V2I16Regs:$src1, V2I16Regs:$src2)), 791239310Sdim (VecShuffle_v2i16 V2I16Regs:$src1, V2I16Regs:$src2, 792239310Sdim (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>; 793239310Sdim 794239310Sdimdef : Pat<(v4i8 (vec_shuf:$op V4I8Regs:$src1, V4I8Regs:$src2)), 795239310Sdim (VecShuffle_v4i8 V4I8Regs:$src1, V4I8Regs:$src2, 796239310Sdim (ShuffleMask0 node:$op), (ShuffleMask1 node:$op), 797239310Sdim (ShuffleMask2 node:$op), (ShuffleMask3 node:$op))>; 798239310Sdim 799239310Sdimdef : Pat<(v2i8 (vec_shuf:$op V2I8Regs:$src1, V2I8Regs:$src2)), 800239310Sdim (VecShuffle_v2i8 V2I8Regs:$src1, V2I8Regs:$src2, 801239310Sdim (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>; 802239310Sdim 803239310Sdimclass Build_Vector2<string asmstr, NVPTXRegClass vclass, NVPTXRegClass sclass, 804239310Sdim NVPTXInst si> 805239310Sdim : NVPTXVecInst<(outs vclass:$dst), 806239310Sdim (ins sclass:$a1, sclass:$a2), 807239310Sdim !strconcat(asmstr, "\t${dst:vecfull}, {{$a1, $a2}};"), 808239310Sdim [(set vclass:$dst, (build_vector sclass:$a1, sclass:$a2))], 809239310Sdim si>; 810239310Sdimclass Build_Vector4<string asmstr, NVPTXRegClass vclass, NVPTXRegClass sclass, 811239310Sdim NVPTXInst si> 812239310Sdim : NVPTXVecInst<(outs vclass:$dst), 813239310Sdim (ins sclass:$a1, sclass:$a2, sclass:$a3, sclass:$a4), 814239310Sdim !strconcat(asmstr, "\t${dst:vecfull}, {{$a1, $a2, $a3, $a4}};"), 815239310Sdim [(set vclass:$dst, 816239310Sdim (build_vector sclass:$a1, sclass:$a2, 817239310Sdim sclass:$a3, sclass:$a4))], si>; 818239310Sdim 819239310Sdimlet isAsCheapAsAMove=1, VecInstType=isVecBuild.Value in { 820239310Sdimdef Build_Vector2_f32 : Build_Vector2<"mov.v2.f32", V2F32Regs, Float32Regs, 821239310Sdim FMOV32rr>; 822239310Sdimdef Build_Vector2_f64 : Build_Vector2<"mov.v2.f64", V2F64Regs, Float64Regs, 823239310Sdim FMOV64rr>; 824239310Sdim 825239310Sdimdef Build_Vector2_i32 : Build_Vector2<"mov.v2.u32", V2I32Regs, Int32Regs, 826239310Sdim IMOV32rr>; 827239310Sdimdef Build_Vector2_i64 : Build_Vector2<"mov.v2.u64", V2I64Regs, Int64Regs, 828239310Sdim IMOV64rr>; 829239310Sdimdef Build_Vector2_i16 : Build_Vector2<"mov.v2.u16", V2I16Regs, Int16Regs, 830239310Sdim IMOV16rr>; 831239310Sdimdef Build_Vector2_i8 : Build_Vector2<"mov.v2.u16", V2I8Regs, Int8Regs, 832239310Sdim IMOV8rr>; 833239310Sdim 834239310Sdimdef Build_Vector4_f32 : Build_Vector4<"mov.v4.f32", V4F32Regs, Float32Regs, 835239310Sdim FMOV32rr>; 836239310Sdim 837239310Sdimdef Build_Vector4_i32 : Build_Vector4<"mov.v4.u32", V4I32Regs, Int32Regs, 838239310Sdim IMOV32rr>; 839239310Sdimdef Build_Vector4_i16 : Build_Vector4<"mov.v4.u16", V4I16Regs, Int16Regs, 840239310Sdim IMOV16rr>; 841239310Sdimdef Build_Vector4_i8 : Build_Vector4<"mov.v4.u16", V4I8Regs, Int8Regs, 842239310Sdim IMOV8rr>; 843239310Sdim} 844239310Sdim 845239310Sdimclass Vec_Move<string asmstr, NVPTXRegClass vclass, NVPTXInst sop=NOP> 846239310Sdim : NVPTXVecInst<(outs vclass:$dst), (ins vclass:$src), 847239310Sdim !strconcat(asmstr, "\t${dst:vecfull}, ${src:vecfull};"), 848239310Sdim [], sop>; 849239310Sdim 850239310Sdimlet isAsCheapAsAMove=1, neverHasSideEffects=1, IsSimpleMove=1, 851239310Sdim VecInstType=isVecOther.Value in { 852239310Sdimdef V4f32Mov : Vec_Move<"mov.v4.f32", V4F32Regs, FMOV32rr>; 853239310Sdimdef V2f32Mov : Vec_Move<"mov.v2.f32", V2F32Regs, FMOV32rr>; 854239310Sdim 855239310Sdimdef V4i32Mov : Vec_Move<"mov.v4.u32", V4I32Regs, IMOV32rr>; 856239310Sdimdef V2i32Mov : Vec_Move<"mov.v2.u32", V2I32Regs, IMOV32rr>; 857239310Sdim 858239310Sdimdef V4i16Mov : Vec_Move<"mov.v4.u16", V4I16Regs, IMOV16rr>; 859239310Sdimdef V2i16Mov : Vec_Move<"mov.v2.u16", V2I16Regs, IMOV16rr>; 860239310Sdim 861239310Sdimdef V4i8Mov : Vec_Move<"mov.v4.u16", V4I8Regs, IMOV8rr>; 862239310Sdimdef V2i8Mov : Vec_Move<"mov.v2.u16", V2I8Regs, IMOV8rr>; 863239310Sdim 864239310Sdimdef V2f64Mov : Vec_Move<"mov.v2.f64", V2F64Regs, FMOV64rr>; 865239310Sdimdef V2i64Mov : Vec_Move<"mov.v2.u64", V2I64Regs, IMOV64rr>; 866239310Sdim} 867239310Sdim 868239310Sdim// extract subvector patterns 869239310Sdimdef extract_subvec : SDNode<"ISD::EXTRACT_SUBVECTOR", 870239310Sdim SDTypeProfile<1, 2, [SDTCisPtrTy<2>]>>; 871239310Sdim 872239310Sdimdef : Pat<(v2f32 (extract_subvec V4F32Regs:$src, 0)), 873239310Sdim (Build_Vector2_f32 (V4f32Extract V4F32Regs:$src, 0), 874239310Sdim (V4f32Extract V4F32Regs:$src, 1))>; 875239310Sdimdef : Pat<(v2f32 (extract_subvec V4F32Regs:$src, 2)), 876239310Sdim (Build_Vector2_f32 (V4f32Extract V4F32Regs:$src, 2), 877239310Sdim (V4f32Extract V4F32Regs:$src, 3))>; 878239310Sdimdef : Pat<(v2i32 (extract_subvec V4I32Regs:$src, 0)), 879239310Sdim (Build_Vector2_i32 (V4i32Extract V4I32Regs:$src, 0), 880239310Sdim (V4i32Extract V4I32Regs:$src, 1))>; 881239310Sdimdef : Pat<(v2i32 (extract_subvec V4I32Regs:$src, 2)), 882239310Sdim (Build_Vector2_i32 (V4i32Extract V4I32Regs:$src, 2), 883239310Sdim (V4i32Extract V4I32Regs:$src, 3))>; 884239310Sdimdef : Pat<(v2i16 (extract_subvec V4I16Regs:$src, 0)), 885239310Sdim (Build_Vector2_i16 (V4i16Extract V4I16Regs:$src, 0), 886239310Sdim (V4i16Extract V4I16Regs:$src, 1))>; 887239310Sdimdef : Pat<(v2i16 (extract_subvec V4I16Regs:$src, 2)), 888239310Sdim (Build_Vector2_i16 (V4i16Extract V4I16Regs:$src, 2), 889239310Sdim (V4i16Extract V4I16Regs:$src, 3))>; 890239310Sdimdef : Pat<(v2i8 (extract_subvec V4I8Regs:$src, 0)), 891239310Sdim (Build_Vector2_i8 (V4i8Extract V4I8Regs:$src, 0), 892239310Sdim (V4i8Extract V4I8Regs:$src, 1))>; 893239310Sdimdef : Pat<(v2i8 (extract_subvec V4I8Regs:$src, 2)), 894239310Sdim (Build_Vector2_i8 (V4i8Extract V4I8Regs:$src, 2), 895239310Sdim (V4i8Extract V4I8Regs:$src, 3))>; 896239310Sdim 897239310Sdim// Select instructions 898239310Sdimclass Select_OneLine<string type, string pos> { 899239310Sdim string t1 = !strconcat("selp.", type); 900239310Sdim string t2 = !strconcat(t1, " \t${dst}_"); 901239310Sdim string t3 = !strconcat(t2, pos); 902239310Sdim string t4 = !strconcat(t3, ", ${src1}_"); 903239310Sdim string t5 = !strconcat(t4, pos); 904239310Sdim string t6 = !strconcat(t5, ", ${src2}_"); 905239310Sdim string t7 = !strconcat(t6, pos); 906239310Sdim string s = !strconcat(t7, ", $p;"); 907239310Sdim} 908239310Sdim 909239310Sdimclass Select_Str2<string type> { 910239310Sdim string t1 = Select_OneLine<type, "0">.s; 911239310Sdim string t2 = !strconcat(t1, "\n\t"); 912239310Sdim string s = !strconcat(t2, Select_OneLine<type, "1">.s); 913239310Sdim} 914239310Sdim 915239310Sdimclass Select_Str4<string type> { 916239310Sdim string t1 = Select_OneLine<type, "0">.s; 917239310Sdim string t2 = !strconcat(t1, "\n\t"); 918239310Sdim string t3 = !strconcat(t2, Select_OneLine<type, "1">.s); 919239310Sdim string t4 = !strconcat(t3, "\n\t"); 920239310Sdim string t5 = !strconcat(t4, Select_OneLine<type, "2">.s); 921239310Sdim string t6 = !strconcat(t5, "\n\t"); 922239310Sdim string s = !strconcat(t6, Select_OneLine<type, "3">.s); 923239310Sdim 924239310Sdim} 925239310Sdim 926239310Sdimclass Vec_Select<NVPTXRegClass vclass, string asmstr, NVPTXInst sop> 927239310Sdim : NVPTXVecInst<(outs vclass:$dst), 928239310Sdim (ins vclass:$src1, vclass:$src2, Int1Regs:$p), 929239310Sdim asmstr, 930239310Sdim [(set vclass:$dst, (select Int1Regs:$p, vclass:$src1, 931239310Sdim vclass:$src2))], 932239310Sdim sop>; 933239310Sdim 934239310Sdimlet VecInstType=isVecOther.Value in { 935239310Sdimdef V2I64_Select : Vec_Select<V2I64Regs, Select_Str2<"b64">.s, SELECTi64rr>; 936239310Sdimdef V4I32_Select : Vec_Select<V4I32Regs, Select_Str4<"b32">.s, SELECTi32rr>; 937239310Sdimdef V2I32_Select : Vec_Select<V2I32Regs, Select_Str2<"b32">.s, SELECTi32rr>; 938239310Sdimdef V4I16_Select : Vec_Select<V4I16Regs, Select_Str4<"b16">.s, SELECTi16rr>; 939239310Sdimdef V2I16_Select : Vec_Select<V2I16Regs, Select_Str2<"b16">.s, SELECTi16rr>; 940239310Sdimdef V4I8_Select : Vec_Select<V4I8Regs, Select_Str4<"b16">.s, SELECTi8rr>; 941239310Sdimdef V2I8_Select : Vec_Select<V2I8Regs, Select_Str2<"b16">.s, SELECTi8rr>; 942239310Sdim 943239310Sdimdef V2F64_Select : Vec_Select<V2F64Regs, Select_Str2<"f64">.s, SELECTf64rr>; 944239310Sdimdef V4F32_Select : Vec_Select<V4F32Regs, Select_Str4<"f32">.s, SELECTf32rr>; 945239310Sdimdef V2F32_Select : Vec_Select<V2F32Regs, Select_Str2<"f32">.s, SELECTf32rr>; 946239310Sdim} 947239310Sdim 948239310Sdim// Comparison instructions 949239310Sdim 950239310Sdim// setcc convenience fragments. 951239310Sdimdef vsetoeq : PatFrag<(ops node:$lhs, node:$rhs), 952239310Sdim (setcc node:$lhs, node:$rhs, SETOEQ)>; 953239310Sdimdef vsetogt : PatFrag<(ops node:$lhs, node:$rhs), 954239310Sdim (setcc node:$lhs, node:$rhs, SETOGT)>; 955239310Sdimdef vsetoge : PatFrag<(ops node:$lhs, node:$rhs), 956239310Sdim (setcc node:$lhs, node:$rhs, SETOGE)>; 957239310Sdimdef vsetolt : PatFrag<(ops node:$lhs, node:$rhs), 958239310Sdim (setcc node:$lhs, node:$rhs, SETOLT)>; 959239310Sdimdef vsetole : PatFrag<(ops node:$lhs, node:$rhs), 960239310Sdim (setcc node:$lhs, node:$rhs, SETOLE)>; 961239310Sdimdef vsetone : PatFrag<(ops node:$lhs, node:$rhs), 962239310Sdim (setcc node:$lhs, node:$rhs, SETONE)>; 963239310Sdimdef vseto : PatFrag<(ops node:$lhs, node:$rhs), 964239310Sdim (setcc node:$lhs, node:$rhs, SETO)>; 965239310Sdimdef vsetuo : PatFrag<(ops node:$lhs, node:$rhs), 966239310Sdim (setcc node:$lhs, node:$rhs, SETUO)>; 967239310Sdimdef vsetueq : PatFrag<(ops node:$lhs, node:$rhs), 968239310Sdim (setcc node:$lhs, node:$rhs, SETUEQ)>; 969239310Sdimdef vsetugt : PatFrag<(ops node:$lhs, node:$rhs), 970239310Sdim (setcc node:$lhs, node:$rhs, SETUGT)>; 971239310Sdimdef vsetuge : PatFrag<(ops node:$lhs, node:$rhs), 972239310Sdim (setcc node:$lhs, node:$rhs, SETUGE)>; 973239310Sdimdef vsetult : PatFrag<(ops node:$lhs, node:$rhs), 974239310Sdim (setcc node:$lhs, node:$rhs, SETULT)>; 975239310Sdimdef vsetule : PatFrag<(ops node:$lhs, node:$rhs), 976239310Sdim (setcc node:$lhs, node:$rhs, SETULE)>; 977239310Sdimdef vsetune : PatFrag<(ops node:$lhs, node:$rhs), 978239310Sdim (setcc node:$lhs, node:$rhs, SETUNE)>; 979239310Sdimdef vseteq : PatFrag<(ops node:$lhs, node:$rhs), 980239310Sdim (setcc node:$lhs, node:$rhs, SETEQ)>; 981239310Sdimdef vsetgt : PatFrag<(ops node:$lhs, node:$rhs), 982239310Sdim (setcc node:$lhs, node:$rhs, SETGT)>; 983239310Sdimdef vsetge : PatFrag<(ops node:$lhs, node:$rhs), 984239310Sdim (setcc node:$lhs, node:$rhs, SETGE)>; 985239310Sdimdef vsetlt : PatFrag<(ops node:$lhs, node:$rhs), 986239310Sdim (setcc node:$lhs, node:$rhs, SETLT)>; 987239310Sdimdef vsetle : PatFrag<(ops node:$lhs, node:$rhs), 988239310Sdim (setcc node:$lhs, node:$rhs, SETLE)>; 989239310Sdimdef vsetne : PatFrag<(ops node:$lhs, node:$rhs), 990239310Sdim (setcc node:$lhs, node:$rhs, SETNE)>; 991239310Sdim 992239310Sdimclass Vec_Compare<PatFrag op, NVPTXRegClass outrclass, NVPTXRegClass inrclass, 993239310Sdim NVPTXInst sop> 994239310Sdim : NVPTXVecInst<(outs outrclass:$dst), 995239310Sdim (ins inrclass:$a, inrclass:$b), 996239310Sdim "Unsupported", 997239310Sdim [(set outrclass:$dst, (op inrclass:$a, inrclass:$b))], 998239310Sdim sop>; 999239310Sdim 1000239310Sdimmulticlass Vec_Compare_All<PatFrag op, 1001239310Sdim NVPTXInst inst8, 1002239310Sdim NVPTXInst inst16, 1003239310Sdim NVPTXInst inst32, 1004239310Sdim NVPTXInst inst64> 1005239310Sdim{ 1006239310Sdim def V2I8 : Vec_Compare<op, V2I8Regs, V2I8Regs, inst8>; 1007239310Sdim def V4I8 : Vec_Compare<op, V4I8Regs, V4I8Regs, inst8>; 1008239310Sdim def V2I16 : Vec_Compare<op, V2I16Regs, V2I16Regs, inst16>; 1009239310Sdim def V4I16 : Vec_Compare<op, V4I16Regs, V4I16Regs, inst16>; 1010239310Sdim def V2I32 : Vec_Compare<op, V2I32Regs, V2I32Regs, inst32>; 1011239310Sdim def V4I32 : Vec_Compare<op, V4I32Regs, V4I32Regs, inst32>; 1012239310Sdim def V2I64 : Vec_Compare<op, V2I64Regs, V2I64Regs, inst64>; 1013239310Sdim} 1014239310Sdim 1015239310Sdimlet VecInstType=isVecOther.Value in { 1016239310Sdim defm VecSGT : Vec_Compare_All<vsetgt, ISetSGTi8rr_toi8, ISetSGTi16rr_toi16, 1017239310Sdim ISetSGTi32rr_toi32, ISetSGTi64rr_toi64>; 1018239310Sdim defm VecUGT : Vec_Compare_All<vsetugt, ISetUGTi8rr_toi8, ISetUGTi16rr_toi16, 1019239310Sdim ISetUGTi32rr_toi32, ISetUGTi64rr_toi64>; 1020239310Sdim defm VecSLT : Vec_Compare_All<vsetlt, ISetSLTi8rr_toi8, ISetSLTi16rr_toi16, 1021239310Sdim ISetSLTi32rr_toi32, ISetSLTi64rr_toi64>; 1022239310Sdim defm VecULT : Vec_Compare_All<vsetult, ISetULTi8rr_toi8, ISetULTi16rr_toi16, 1023239310Sdim ISetULTi32rr_toi32, ISetULTi64rr_toi64>; 1024239310Sdim defm VecSGE : Vec_Compare_All<vsetge, ISetSGEi8rr_toi8, ISetSGEi16rr_toi16, 1025239310Sdim ISetSGEi32rr_toi32, ISetSGEi64rr_toi64>; 1026239310Sdim defm VecUGE : Vec_Compare_All<vsetuge, ISetUGEi8rr_toi8, ISetUGEi16rr_toi16, 1027239310Sdim ISetUGEi32rr_toi32, ISetUGEi64rr_toi64>; 1028239310Sdim defm VecSLE : Vec_Compare_All<vsetle, ISetSLEi8rr_toi8, ISetSLEi16rr_toi16, 1029239310Sdim ISetSLEi32rr_toi32, ISetSLEi64rr_toi64>; 1030239310Sdim defm VecULE : Vec_Compare_All<vsetule, ISetULEi8rr_toi8, ISetULEi16rr_toi16, 1031239310Sdim ISetULEi32rr_toi32, ISetULEi64rr_toi64>; 1032239310Sdim defm VecSEQ : Vec_Compare_All<vseteq, ISetSEQi8rr_toi8, ISetSEQi16rr_toi16, 1033239310Sdim ISetSEQi32rr_toi32, ISetSEQi64rr_toi64>; 1034239310Sdim defm VecUEQ : Vec_Compare_All<vsetueq, ISetUEQi8rr_toi8, ISetUEQi16rr_toi16, 1035239310Sdim ISetUEQi32rr_toi32, ISetUEQi64rr_toi64>; 1036239310Sdim defm VecSNE : Vec_Compare_All<vsetne, ISetSNEi8rr_toi8, ISetSNEi16rr_toi16, 1037239310Sdim ISetSNEi32rr_toi32, ISetSNEi64rr_toi64>; 1038239310Sdim defm VecUNE : Vec_Compare_All<vsetune, ISetUNEi8rr_toi8, ISetUNEi16rr_toi16, 1039239310Sdim ISetUNEi32rr_toi32, ISetUNEi64rr_toi64>; 1040239310Sdim} 1041239310Sdim 1042239310Sdimmulticlass FVec_Compare_All<PatFrag op, 1043239310Sdim NVPTXInst instf32, 1044239310Sdim NVPTXInst instf64> 1045239310Sdim{ 1046239310Sdim def V2F32 : Vec_Compare<op, V2I32Regs, V2F32Regs, instf32>; 1047239310Sdim def V4F32 : Vec_Compare<op, V4I32Regs, V4F32Regs, instf32>; 1048239310Sdim def V2F64 : Vec_Compare<op, V2I64Regs, V2F64Regs, instf64>; 1049239310Sdim} 1050239310Sdim 1051239310Sdimlet VecInstType=isVecOther.Value in { 1052239310Sdim defm FVecGT : FVec_Compare_All<vsetogt, FSetGTf32rr_toi32, 1053239310Sdim FSetGTf64rr_toi64>; 1054239310Sdim defm FVecLT : FVec_Compare_All<vsetolt, FSetLTf32rr_toi32, 1055239310Sdim FSetLTf64rr_toi64>; 1056239310Sdim defm FVecGE : FVec_Compare_All<vsetoge, FSetGEf32rr_toi32, 1057239310Sdim FSetGEf64rr_toi64>; 1058239310Sdim defm FVecLE : FVec_Compare_All<vsetole, FSetLEf32rr_toi32, 1059239310Sdim FSetLEf64rr_toi64>; 1060239310Sdim defm FVecEQ : FVec_Compare_All<vsetoeq, FSetEQf32rr_toi32, 1061239310Sdim FSetEQf64rr_toi64>; 1062239310Sdim defm FVecNE : FVec_Compare_All<vsetone, FSetNEf32rr_toi32, 1063239310Sdim FSetNEf64rr_toi64>; 1064239310Sdim 1065239310Sdim defm FVecUGT : FVec_Compare_All<vsetugt, FSetUGTf32rr_toi32, 1066239310Sdim FSetUGTf64rr_toi64>; 1067239310Sdim defm FVecULT : FVec_Compare_All<vsetult, FSetULTf32rr_toi32, 1068239310Sdim FSetULTf64rr_toi64>; 1069239310Sdim defm FVecUGE : FVec_Compare_All<vsetuge, FSetUGEf32rr_toi32, 1070239310Sdim FSetUGEf64rr_toi64>; 1071239310Sdim defm FVecULE : FVec_Compare_All<vsetule, FSetULEf32rr_toi32, 1072239310Sdim FSetULEf64rr_toi64>; 1073239310Sdim defm FVecUEQ : FVec_Compare_All<vsetueq, FSetUEQf32rr_toi32, 1074239310Sdim FSetUEQf64rr_toi64>; 1075239310Sdim defm FVecUNE : FVec_Compare_All<vsetune, FSetUNEf32rr_toi32, 1076239310Sdim FSetUNEf64rr_toi64>; 1077239310Sdim 1078239310Sdim defm FVecNUM : FVec_Compare_All<vseto, FSetNUMf32rr_toi32, 1079239310Sdim FSetNUMf64rr_toi64>; 1080239310Sdim defm FVecNAN : FVec_Compare_All<vsetuo, FSetNANf32rr_toi32, 1081239310Sdim FSetNANf64rr_toi64>; 1082239310Sdim} 1083239310Sdim 1084239310Sdimclass LoadParamScalar4Inst<NVPTXRegClass regclass, string opstr> : 1085239310Sdim NVPTXInst<(outs regclass:$d1, regclass:$d2, regclass:$d3, regclass:$d4), 1086239310Sdim (ins i32imm:$a, i32imm:$b), 1087239310Sdim !strconcat(!strconcat("ld.param", opstr), 1088239310Sdim "\t{{$d1, $d2, $d3, $d4}}, [retval0+$b];"), []>; 1089239310Sdim 1090239310Sdimclass LoadParamScalar2Inst<NVPTXRegClass regclass, string opstr> : 1091239310Sdim NVPTXInst<(outs regclass:$d1, regclass:$d2), 1092239310Sdim (ins i32imm:$a, i32imm:$b), 1093239310Sdim !strconcat(!strconcat("ld.param", opstr), 1094239310Sdim "\t{{$d1, $d2}}, [retval0+$b];"), []>; 1095239310Sdim 1096239310Sdim 1097239310Sdimclass StoreParamScalar4Inst<NVPTXRegClass regclass, string opstr> : 1098239310Sdim NVPTXInst<(outs), 1099239310Sdim (ins regclass:$s1, regclass:$s2, regclass:$s3, regclass:$s4, 1100239310Sdim i32imm:$a, i32imm:$b), 1101239310Sdim !strconcat(!strconcat("st.param", opstr), 1102239310Sdim "\t[param$a+$b], {{$s1, $s2, $s3, $s4}};"), []>; 1103239310Sdim 1104239310Sdimclass StoreParamScalar2Inst<NVPTXRegClass regclass, string opstr> : 1105239310Sdim NVPTXInst<(outs), 1106239310Sdim (ins regclass:$s1, regclass:$s2, i32imm:$a, i32imm:$b), 1107239310Sdim !strconcat(!strconcat("st.param", opstr), 1108239310Sdim "\t[param$a+$b], {{$s1, $s2}};"), []>; 1109239310Sdim 1110239310Sdimclass StoreRetvalScalar4Inst<NVPTXRegClass regclass, string opstr> : 1111239310Sdim NVPTXInst<(outs), 1112239310Sdim (ins regclass:$s1, regclass:$s2, regclass:$s3, regclass:$s4, 1113239310Sdim i32imm:$a), 1114239310Sdim !strconcat(!strconcat("st.param", opstr), 1115239310Sdim "\t[func_retval+$a], {{$s1, $s2, $s3, $s4}};"), []>; 1116239310Sdim 1117239310Sdimclass StoreRetvalScalar2Inst<NVPTXRegClass regclass, string opstr> : 1118239310Sdim NVPTXInst<(outs), 1119239310Sdim (ins regclass:$s1, regclass:$s2, i32imm:$a), 1120239310Sdim !strconcat(!strconcat("st.param", opstr), 1121239310Sdim "\t[func_retval+$a], {{$s1, $s2}};"), []>; 1122239310Sdim 1123239310Sdimdef LoadParamScalar4I32 : LoadParamScalar4Inst<Int32Regs, ".v4.b32">; 1124239310Sdimdef LoadParamScalar4I16 : LoadParamScalar4Inst<Int16Regs, ".v4.b16">; 1125239310Sdimdef LoadParamScalar4I8 : LoadParamScalar4Inst<Int8Regs, ".v4.b8">; 1126239310Sdim 1127239310Sdimdef LoadParamScalar2I64 : LoadParamScalar2Inst<Int32Regs, ".v2.b64">; 1128239310Sdimdef LoadParamScalar2I32 : LoadParamScalar2Inst<Int32Regs, ".v2.b32">; 1129239310Sdimdef LoadParamScalar2I16 : LoadParamScalar2Inst<Int32Regs, ".v2.b16">; 1130239310Sdimdef LoadParamScalar2I8 : LoadParamScalar2Inst<Int32Regs, ".v2.b8">; 1131239310Sdim 1132239310Sdimdef LoadParamScalar4F32 : LoadParamScalar4Inst<Float32Regs, ".v4.f32">; 1133239310Sdimdef LoadParamScalar2F32 : LoadParamScalar2Inst<Float32Regs, ".v2.f32">; 1134239310Sdimdef LoadParamScalar2F64 : LoadParamScalar2Inst<Float64Regs, ".v2.f64">; 1135239310Sdim 1136239310Sdimdef StoreParamScalar4I32 : StoreParamScalar4Inst<Int32Regs, ".v4.b32">; 1137239310Sdimdef StoreParamScalar4I16 : StoreParamScalar4Inst<Int16Regs, ".v4.b16">; 1138239310Sdimdef StoreParamScalar4I8 : StoreParamScalar4Inst<Int8Regs, ".v4.b8">; 1139239310Sdim 1140239310Sdimdef StoreParamScalar2I64 : StoreParamScalar2Inst<Int64Regs, ".v2.b64">; 1141239310Sdimdef StoreParamScalar2I32 : StoreParamScalar2Inst<Int32Regs, ".v2.b32">; 1142239310Sdimdef StoreParamScalar2I16 : StoreParamScalar2Inst<Int16Regs, ".v2.b16">; 1143239310Sdimdef StoreParamScalar2I8 : StoreParamScalar2Inst<Int8Regs, ".v2.b8">; 1144239310Sdim 1145239310Sdimdef StoreParamScalar4F32 : StoreParamScalar4Inst<Float32Regs, ".v4.f32">; 1146239310Sdimdef StoreParamScalar2F32 : StoreParamScalar2Inst<Float32Regs, ".v2.f32">; 1147239310Sdimdef StoreParamScalar2F64 : StoreParamScalar2Inst<Float64Regs, ".v2.f64">; 1148239310Sdim 1149239310Sdimdef StoreRetvalScalar4I32 : StoreRetvalScalar4Inst<Int32Regs, ".v4.b32">; 1150239310Sdimdef StoreRetvalScalar4I16 : StoreRetvalScalar4Inst<Int16Regs, ".v4.b16">; 1151239310Sdimdef StoreRetvalScalar4I8 : StoreRetvalScalar4Inst<Int8Regs, ".v4.b8">; 1152239310Sdim 1153239310Sdimdef StoreRetvalScalar2I64 : StoreRetvalScalar2Inst<Int64Regs, ".v2.b64">; 1154239310Sdimdef StoreRetvalScalar2I32 : StoreRetvalScalar2Inst<Int32Regs, ".v2.b32">; 1155239310Sdimdef StoreRetvalScalar2I16 : StoreRetvalScalar2Inst<Int16Regs, ".v2.b16">; 1156239310Sdimdef StoreRetvalScalar2I8 : StoreRetvalScalar2Inst<Int8Regs, ".v2.b8">; 1157239310Sdim 1158239310Sdimdef StoreRetvalScalar4F32 : StoreRetvalScalar4Inst<Float32Regs, ".v4.f32">; 1159239310Sdimdef StoreRetvalScalar2F32 : StoreRetvalScalar2Inst<Float32Regs, ".v2.f32">; 1160239310Sdimdef StoreRetvalScalar2F64 : StoreRetvalScalar2Inst<Float64Regs, ".v2.f64">; 1161239310Sdim 1162239310Sdimclass LoadParamVecInst<NVPTXRegClass regclass, string opstr, NVPTXInst sop=NOP>: 1163239310Sdim NVPTXVecInst<(outs regclass:$dst), (ins i32imm:$a, i32imm:$b), 1164239310Sdim "loadparam : $dst <- [$a, $b]", 1165239310Sdim [(set regclass:$dst, (LoadParam (i32 imm:$a), (i32 imm:$b)))], 1166239310Sdim sop>; 1167239310Sdim 1168239310Sdimclass StoreParamVecInst<NVPTXRegClass regclass, string opstr, NVPTXInst sop=NOP> 1169239310Sdim : NVPTXVecInst<(outs), (ins regclass:$val, i32imm:$a, i32imm:$b), 1170239310Sdim "storeparam : [$a, $b] <- $val", 1171239310Sdim [(StoreParam (i32 imm:$a), (i32 imm:$b), regclass:$val)], sop>; 1172239310Sdim 1173239310Sdimclass StoreRetvalVecInst<NVPTXRegClass regclass, string opstr, 1174239310Sdim NVPTXInst sop=NOP> 1175239310Sdim : NVPTXVecInst<(outs), (ins regclass:$val, i32imm:$a), 1176239310Sdim "storeretval : retval[$a] <- $val", 1177239310Sdim [(StoreRetval (i32 imm:$a), regclass:$val)], sop>; 1178239310Sdim 1179239310Sdimlet VecInstType=isVecLD.Value in { 1180239310Sdimdef LoadParamV4I32 : LoadParamVecInst<V4I32Regs, ".v4.b32", 1181239310Sdim LoadParamScalar4I32>; 1182239310Sdimdef LoadParamV4I16 : LoadParamVecInst<V4I16Regs, ".v4.b16", 1183239310Sdim LoadParamScalar4I16>; 1184239310Sdimdef LoadParamV4I8 : LoadParamVecInst<V4I8Regs, ".v4.b8", 1185239310Sdim LoadParamScalar4I8>; 1186239310Sdim 1187239310Sdimdef LoadParamV2I64 : LoadParamVecInst<V2I64Regs, ".v2.b64", 1188239310Sdim LoadParamScalar2I64>; 1189239310Sdimdef LoadParamV2I32 : LoadParamVecInst<V2I32Regs, ".v2.b32", 1190239310Sdim LoadParamScalar2I32>; 1191239310Sdimdef LoadParamV2I16 : LoadParamVecInst<V2I16Regs, ".v2.b16", 1192239310Sdim LoadParamScalar2I16>; 1193239310Sdimdef LoadParamV2I8 : LoadParamVecInst<V2I8Regs, ".v2.b8", 1194239310Sdim LoadParamScalar2I8>; 1195239310Sdim 1196239310Sdimdef LoadParamV4F32 : LoadParamVecInst<V4F32Regs, ".v4.f32", 1197239310Sdim LoadParamScalar4F32>; 1198239310Sdimdef LoadParamV2F32 : LoadParamVecInst<V2F32Regs, ".v2.f32", 1199239310Sdim LoadParamScalar2F32>; 1200239310Sdimdef LoadParamV2F64 : LoadParamVecInst<V2F64Regs, ".v2.f64", 1201239310Sdim LoadParamScalar2F64>; 1202239310Sdim} 1203239310Sdim 1204239310Sdimlet VecInstType=isVecST.Value in { 1205239310Sdimdef StoreParamV4I32 : StoreParamVecInst<V4I32Regs, ".v4.b32", 1206239310Sdim StoreParamScalar4I32>; 1207239310Sdimdef StoreParamV4I16 : StoreParamVecInst<V4I16Regs, ".v4.b16", 1208239310Sdim StoreParamScalar4I16>; 1209239310Sdimdef StoreParamV4I8 : StoreParamVecInst<V4I8Regs, ".v4.b8", 1210239310Sdim StoreParamScalar4I8>; 1211239310Sdim 1212239310Sdimdef StoreParamV2I64 : StoreParamVecInst<V2I64Regs, ".v2.b64", 1213239310Sdim StoreParamScalar2I64>; 1214239310Sdimdef StoreParamV2I32 : StoreParamVecInst<V2I32Regs, ".v2.b32", 1215239310Sdim StoreParamScalar2I32>; 1216239310Sdimdef StoreParamV2I16 : StoreParamVecInst<V2I16Regs, ".v2.b16", 1217239310Sdim StoreParamScalar2I16>; 1218239310Sdimdef StoreParamV2I8 : StoreParamVecInst<V2I8Regs, ".v2.b8", 1219239310Sdim StoreParamScalar2I8>; 1220239310Sdim 1221239310Sdimdef StoreParamV4F32 : StoreParamVecInst<V4F32Regs, ".v4.f32", 1222239310Sdim StoreParamScalar4F32>; 1223239310Sdimdef StoreParamV2F32 : StoreParamVecInst<V2F32Regs, ".v2.f32", 1224239310Sdim StoreParamScalar2F32>; 1225239310Sdimdef StoreParamV2F64 : StoreParamVecInst<V2F64Regs, ".v2.f64", 1226239310Sdim StoreParamScalar2F64>; 1227239310Sdim 1228239310Sdimdef StoreRetvalV4I32 : StoreRetvalVecInst<V4I32Regs, ".v4.b32", 1229239310Sdim StoreRetvalScalar4I32>; 1230239310Sdimdef StoreRetvalV4I16 : StoreRetvalVecInst<V4I16Regs, ".v4.b16", 1231239310Sdim StoreRetvalScalar4I16>; 1232239310Sdimdef StoreRetvalV4I8 : StoreRetvalVecInst<V4I8Regs, ".v4.b8", 1233239310Sdim StoreRetvalScalar4I8>; 1234239310Sdim 1235239310Sdimdef StoreRetvalV2I64 : StoreRetvalVecInst<V2I64Regs, ".v2.b64", 1236239310Sdim StoreRetvalScalar2I64>; 1237239310Sdimdef StoreRetvalV2I32 : StoreRetvalVecInst<V2I32Regs, ".v2.b32", 1238239310Sdim StoreRetvalScalar2I32>; 1239239310Sdimdef StoreRetvalV2I16 : StoreRetvalVecInst<V2I16Regs, ".v2.b16", 1240239310Sdim StoreRetvalScalar2I16>; 1241239310Sdimdef StoreRetvalV2I8 : StoreRetvalVecInst<V2I8Regs, ".v2.b8", 1242239310Sdim StoreRetvalScalar2I8>; 1243239310Sdim 1244239310Sdimdef StoreRetvalV4F32 : StoreRetvalVecInst<V4F32Regs, ".v4.f32", 1245239310Sdim StoreRetvalScalar4F32>; 1246239310Sdimdef StoreRetvalV2F32 : StoreRetvalVecInst<V2F32Regs, ".v2.f32", 1247239310Sdim StoreRetvalScalar2F32>; 1248239310Sdimdef StoreRetvalV2F64 : StoreRetvalVecInst<V2F64Regs, ".v2.f64", 1249239310Sdim StoreRetvalScalar2F64>; 1250239310Sdim 1251239310Sdim} 1252239310Sdim 1253239310Sdim 1254239310Sdim// Int vector to int scalar bit convert 1255239310Sdim// v4i8 -> i32 1256239310Sdimdef : Pat<(i32 (bitconvert V4I8Regs:$s)), 1257239310Sdim (V4I8toI32 (V4i8Extract V4I8Regs:$s,0), (V4i8Extract V4I8Regs:$s,1), 1258239310Sdim (V4i8Extract V4I8Regs:$s,2), (V4i8Extract V4I8Regs:$s,3))>; 1259239310Sdim// v4i16 -> i64 1260239310Sdimdef : Pat<(i64 (bitconvert V4I16Regs:$s)), 1261239310Sdim (V4I16toI64 (V4i16Extract V4I16Regs:$s,0), 1262239310Sdim (V4i16Extract V4I16Regs:$s,1), 1263239310Sdim (V4i16Extract V4I16Regs:$s,2), 1264239310Sdim (V4i16Extract V4I16Regs:$s,3))>; 1265239310Sdim// v2i8 -> i16 1266239310Sdimdef : Pat<(i16 (bitconvert V2I8Regs:$s)), 1267239310Sdim (V2I8toI16 (V2i8Extract V2I8Regs:$s,0), (V2i8Extract V2I8Regs:$s,1))>; 1268239310Sdim// v2i16 -> i32 1269239310Sdimdef : Pat<(i32 (bitconvert V2I16Regs:$s)), 1270239310Sdim (V2I16toI32 (V2i16Extract V2I16Regs:$s,0), 1271239310Sdim (V2i16Extract V2I16Regs:$s,1))>; 1272239310Sdim// v2i32 -> i64 1273239310Sdimdef : Pat<(i64 (bitconvert V2I32Regs:$s)), 1274239310Sdim (V2I32toI64 (V2i32Extract V2I32Regs:$s,0), 1275239310Sdim (V2i32Extract V2I32Regs:$s,1))>; 1276239310Sdim 1277239310Sdim// Int scalar to int vector bit convert 1278239310Sdimlet VecInstType=isVecDest.Value in { 1279239310Sdim// i32 -> v4i8 1280239310Sdimdef VecI32toV4I8 : NVPTXVecInst<(outs V4I8Regs:$d), (ins Int32Regs:$s), 1281239310Sdim "Error!", 1282239310Sdim [(set V4I8Regs:$d, (bitconvert Int32Regs:$s))], 1283239310Sdim I32toV4I8>; 1284239310Sdim// i64 -> v4i16 1285239310Sdimdef VecI64toV4I16 : NVPTXVecInst<(outs V4I16Regs:$d), (ins Int64Regs:$s), 1286239310Sdim "Error!", 1287239310Sdim [(set V4I16Regs:$d, (bitconvert Int64Regs:$s))], 1288239310Sdim I64toV4I16>; 1289239310Sdim// i16 -> v2i8 1290239310Sdimdef VecI16toV2I8 : NVPTXVecInst<(outs V2I8Regs:$d), (ins Int16Regs:$s), 1291239310Sdim "Error!", 1292239310Sdim [(set V2I8Regs:$d, (bitconvert Int16Regs:$s))], 1293239310Sdim I16toV2I8>; 1294239310Sdim// i32 -> v2i16 1295239310Sdimdef VecI32toV2I16 : NVPTXVecInst<(outs V2I16Regs:$d), (ins Int32Regs:$s), 1296239310Sdim "Error!", 1297239310Sdim [(set V2I16Regs:$d, (bitconvert Int32Regs:$s))], 1298239310Sdim I32toV2I16>; 1299239310Sdim// i64 -> v2i32 1300239310Sdimdef VecI64toV2I32 : NVPTXVecInst<(outs V2I32Regs:$d), (ins Int64Regs:$s), 1301239310Sdim "Error!", 1302239310Sdim [(set V2I32Regs:$d, (bitconvert Int64Regs:$s))], 1303239310Sdim I64toV2I32>; 1304239310Sdim} 1305239310Sdim 1306239310Sdim// Int vector to int vector bit convert 1307239310Sdim// v4i8 -> v2i16 1308239310Sdimdef : Pat<(v2i16 (bitconvert V4I8Regs:$s)), 1309239310Sdim (VecI32toV2I16 1310239310Sdim (V4I8toI32 (V4i8Extract V4I8Regs:$s,0), (V4i8Extract V4I8Regs:$s,1), 1311239310Sdim (V4i8Extract V4I8Regs:$s,2), (V4i8Extract V4I8Regs:$s,3)))>; 1312239310Sdim// v4i16 -> v2i32 1313239310Sdimdef : Pat<(v2i32 (bitconvert V4I16Regs:$s)), 1314239310Sdim (VecI64toV2I32 1315239310Sdim (V4I16toI64 (V4i16Extract V4I16Regs:$s,0), (V4i16Extract V4I16Regs:$s,1), 1316239310Sdim (V4i16Extract V4I16Regs:$s,2), (V4i16Extract V4I16Regs:$s,3)))>; 1317239310Sdim// v2i16 -> v4i8 1318239310Sdimdef : Pat<(v4i8 (bitconvert V2I16Regs:$s)), 1319239310Sdim (VecI32toV4I8 1320239310Sdim (V2I16toI32 (V2i16Extract V2I16Regs:$s,0), (V2i16Extract V2I16Regs:$s,1)))>; 1321239310Sdim// v2i32 -> v4i16 1322239310Sdimdef : Pat<(v4i16 (bitconvert V2I32Regs:$s)), 1323239310Sdim (VecI64toV4I16 1324239310Sdim (V2I32toI64 (V2i32Extract V2I32Regs:$s,0), (V2i32Extract V2I32Regs:$s,1)))>; 1325239310Sdim// v2i64 -> v4i32 1326239310Sdimdef : Pat<(v4i32 (bitconvert V2I64Regs:$s)), 1327239310Sdim (Build_Vector4_i32 1328239310Sdim (V2i32Extract (VecI64toV2I32 (V2i64Extract V2I64Regs:$s, 0)), 0), 1329239310Sdim (V2i32Extract (VecI64toV2I32 (V2i64Extract V2I64Regs:$s, 0)), 1), 1330239310Sdim (V2i32Extract (VecI64toV2I32 (V2i64Extract V2I64Regs:$s, 1)), 0), 1331239310Sdim (V2i32Extract (VecI64toV2I32 (V2i64Extract V2I64Regs:$s, 1)), 1))>; 1332239310Sdim// v4i32 -> v2i64 1333239310Sdimdef : Pat<(v2i64 (bitconvert V4I32Regs:$s)), 1334239310Sdim (Build_Vector2_i64 1335239310Sdim (V2I32toI64 (V4i32Extract V4I32Regs:$s,0), (V4i32Extract V4I32Regs:$s,1)), 1336239310Sdim (V2I32toI64 (V4i32Extract V4I32Regs:$s,2), (V4i32Extract V4I32Regs:$s,3)))>; 1337239310Sdim 1338239310Sdim// Fp scalar to fp vector convert 1339239310Sdim// f64 -> v2f32 1340239310Sdimlet VecInstType=isVecDest.Value in { 1341239310Sdimdef VecF64toV2F32 : NVPTXVecInst<(outs V2F32Regs:$d), (ins Float64Regs:$s), 1342239310Sdim "Error!", 1343239310Sdim [(set V2F32Regs:$d, (bitconvert Float64Regs:$s))], 1344239310Sdim F64toV2F32>; 1345239310Sdim} 1346239310Sdim 1347239310Sdim// Fp vector to fp scalar convert 1348239310Sdim// v2f32 -> f64 1349239310Sdimdef : Pat<(f64 (bitconvert V2F32Regs:$s)), 1350239310Sdim (V2F32toF64 (V2f32Extract V2F32Regs:$s,0), (V2f32Extract V2F32Regs:$s,1))>; 1351239310Sdim 1352239310Sdim// Fp scalar to int vector convert 1353239310Sdim// f32 -> v4i8 1354239310Sdimdef : Pat<(v4i8 (bitconvert Float32Regs:$s)), 1355239310Sdim (VecI32toV4I8 (BITCONVERT_32_F2I Float32Regs:$s))>; 1356239310Sdim// f32 -> v2i16 1357239310Sdimdef : Pat<(v2i16 (bitconvert Float32Regs:$s)), 1358239310Sdim (VecI32toV2I16 (BITCONVERT_32_F2I Float32Regs:$s))>; 1359239310Sdim// f64 -> v4i16 1360239310Sdimdef : Pat<(v4i16 (bitconvert Float64Regs:$s)), 1361239310Sdim (VecI64toV4I16 (BITCONVERT_64_F2I Float64Regs:$s))>; 1362239310Sdim// f64 -> v2i32 1363239310Sdimdef : Pat<(v2i32 (bitconvert Float64Regs:$s)), 1364239310Sdim (VecI64toV2I32 (BITCONVERT_64_F2I Float64Regs:$s))>; 1365239310Sdim 1366239310Sdim// Int vector to fp scalar convert 1367239310Sdim// v4i8 -> f32 1368239310Sdimdef : Pat<(f32 (bitconvert V4I8Regs:$s)), 1369239310Sdim (BITCONVERT_32_I2F 1370239310Sdim (V4I8toI32 (V4i8Extract V4I8Regs:$s,0), (V4i8Extract V4I8Regs:$s,1), 1371239310Sdim (V4i8Extract V4I8Regs:$s,2), (V4i8Extract V4I8Regs:$s,3)))>; 1372239310Sdim// v4i16 -> f64 1373239310Sdimdef : Pat<(f64 (bitconvert V4I16Regs:$s)), 1374239310Sdim (BITCONVERT_64_I2F 1375239310Sdim (V4I16toI64 (V4i16Extract V4I16Regs:$s,0), (V4i16Extract V4I16Regs:$s,1), 1376239310Sdim (V4i16Extract V4I16Regs:$s,2), (V4i16Extract V4I16Regs:$s,3)))>; 1377239310Sdim// v2i16 -> f32 1378239310Sdimdef : Pat<(f32 (bitconvert V2I16Regs:$s)), 1379239310Sdim (BITCONVERT_32_I2F 1380239310Sdim (V2I16toI32 (V2i16Extract V2I16Regs:$s,0), (V2i16Extract V2I16Regs:$s,1)))>; 1381239310Sdim// v2i32 -> f64 1382239310Sdimdef : Pat<(f64 (bitconvert V2I32Regs:$s)), 1383239310Sdim (BITCONVERT_64_I2F 1384239310Sdim (V2I32toI64 (V2i32Extract V2I32Regs:$s,0), (V2i32Extract V2I32Regs:$s,1)))>; 1385239310Sdim 1386239310Sdim// Int scalar to fp vector convert 1387239310Sdim// i64 -> v2f32 1388239310Sdimdef : Pat<(v2f32 (bitconvert Int64Regs:$s)), 1389239310Sdim (VecF64toV2F32 (BITCONVERT_64_I2F Int64Regs:$s))>; 1390239310Sdim 1391239310Sdim// Fp vector to int scalar convert 1392239310Sdim// v2f32 -> i64 1393239310Sdimdef : Pat<(i64 (bitconvert V2F32Regs:$s)), 1394239310Sdim (BITCONVERT_64_F2I 1395239310Sdim (V2F32toF64 (V2f32Extract V2F32Regs:$s,0), (V2f32Extract V2F32Regs:$s,1)))>; 1396239310Sdim 1397239310Sdim// Int vector to fp vector convert 1398239310Sdim// v2i64 -> v4f32 1399239310Sdimdef : Pat<(v4f32 (bitconvert V2I64Regs:$s)), 1400239310Sdim (Build_Vector4_f32 1401239310Sdim (BITCONVERT_32_I2F (V2i32Extract (VecI64toV2I32 1402239310Sdim (V2i64Extract V2I64Regs:$s, 0)), 0)), 1403239310Sdim (BITCONVERT_32_I2F (V2i32Extract (VecI64toV2I32 1404239310Sdim (V2i64Extract V2I64Regs:$s, 0)), 1)), 1405239310Sdim (BITCONVERT_32_I2F (V2i32Extract (VecI64toV2I32 1406239310Sdim (V2i64Extract V2I64Regs:$s, 1)), 0)), 1407239310Sdim (BITCONVERT_32_I2F (V2i32Extract (VecI64toV2I32 1408239310Sdim (V2i64Extract V2I64Regs:$s, 1)), 1)))>; 1409239310Sdim// v2i64 -> v2f64 1410239310Sdimdef : Pat<(v2f64 (bitconvert V2I64Regs:$s)), 1411239310Sdim (Build_Vector2_f64 1412239310Sdim (BITCONVERT_64_I2F (V2i64Extract V2I64Regs:$s,0)), 1413239310Sdim (BITCONVERT_64_I2F (V2i64Extract V2I64Regs:$s,1)))>; 1414239310Sdim// v2i32 -> v2f32 1415239310Sdimdef : Pat<(v2f32 (bitconvert V2I32Regs:$s)), 1416239310Sdim (Build_Vector2_f32 1417239310Sdim (BITCONVERT_32_I2F (V2i32Extract V2I32Regs:$s,0)), 1418239310Sdim (BITCONVERT_32_I2F (V2i32Extract V2I32Regs:$s,1)))>; 1419239310Sdim// v4i32 -> v2f64 1420239310Sdimdef : Pat<(v2f64 (bitconvert V4I32Regs:$s)), 1421239310Sdim (Build_Vector2_f64 1422239310Sdim (BITCONVERT_64_I2F (V2I32toI64 (V4i32Extract V4I32Regs:$s,0), 1423239310Sdim (V4i32Extract V4I32Regs:$s,1))), 1424239310Sdim (BITCONVERT_64_I2F (V2I32toI64 (V4i32Extract V4I32Regs:$s,2), 1425239310Sdim (V4i32Extract V4I32Regs:$s,3))))>; 1426239310Sdim// v4i32 -> v4f32 1427239310Sdimdef : Pat<(v4f32 (bitconvert V4I32Regs:$s)), 1428239310Sdim (Build_Vector4_f32 1429239310Sdim (BITCONVERT_32_I2F (V4i32Extract V4I32Regs:$s,0)), 1430239310Sdim (BITCONVERT_32_I2F (V4i32Extract V4I32Regs:$s,1)), 1431239310Sdim (BITCONVERT_32_I2F (V4i32Extract V4I32Regs:$s,2)), 1432239310Sdim (BITCONVERT_32_I2F (V4i32Extract V4I32Regs:$s,3)))>; 1433239310Sdim// v4i16 -> v2f32 1434239310Sdimdef : Pat<(v2f32 (bitconvert V4I16Regs:$s)), 1435239310Sdim (VecF64toV2F32 (BITCONVERT_64_I2F 1436239310Sdim (V4I16toI64 (V4i16Extract V4I16Regs:$s,0), 1437239310Sdim (V4i16Extract V4I16Regs:$s,1), 1438239310Sdim (V4i16Extract V4I16Regs:$s,2), 1439239310Sdim (V4i16Extract V4I16Regs:$s,3))))>; 1440239310Sdim 1441239310Sdim// Fp vector to int vector convert 1442239310Sdim// v2i64 <- v4f32 1443239310Sdimdef : Pat<(v2i64 (bitconvert V4F32Regs:$s)), 1444239310Sdim (Build_Vector2_i64 1445239310Sdim (BITCONVERT_64_F2I (V2F32toF64 (V4f32Extract V4F32Regs:$s,0), 1446239310Sdim (V4f32Extract V4F32Regs:$s,1))), 1447239310Sdim (BITCONVERT_64_F2I (V2F32toF64 (V4f32Extract V4F32Regs:$s,2), 1448239310Sdim (V4f32Extract V4F32Regs:$s,3))))>; 1449239310Sdim// v2i64 <- v2f64 1450239310Sdimdef : Pat<(v2i64 (bitconvert V2F64Regs:$s)), 1451239310Sdim (Build_Vector2_i64 1452239310Sdim (BITCONVERT_64_F2I (V2f64Extract V2F64Regs:$s,0)), 1453239310Sdim (BITCONVERT_64_F2I (V2f64Extract V2F64Regs:$s,1)))>; 1454239310Sdim// v2i32 <- v2f32 1455239310Sdimdef : Pat<(v2i32 (bitconvert V2F32Regs:$s)), 1456239310Sdim (Build_Vector2_i32 1457239310Sdim (BITCONVERT_32_F2I (V2f32Extract V2F32Regs:$s,0)), 1458239310Sdim (BITCONVERT_32_F2I (V2f32Extract V2F32Regs:$s,1)))>; 1459239310Sdim// v4i32 <- v2f64 1460239310Sdimdef : Pat<(v4i32 (bitconvert V2F64Regs:$s)), 1461239310Sdim (Build_Vector4_i32 1462239310Sdim (BITCONVERT_32_F2I (V2f32Extract (VecF64toV2F32 1463239310Sdim (V2f64Extract V2F64Regs:$s, 0)), 0)), 1464239310Sdim (BITCONVERT_32_F2I (V2f32Extract (VecF64toV2F32 1465239310Sdim (V2f64Extract V2F64Regs:$s, 0)), 1)), 1466239310Sdim (BITCONVERT_32_F2I (V2f32Extract (VecF64toV2F32 1467239310Sdim (V2f64Extract V2F64Regs:$s, 1)), 0)), 1468239310Sdim (BITCONVERT_32_F2I (V2f32Extract (VecF64toV2F32 1469239310Sdim (V2f64Extract V2F64Regs:$s, 1)), 1)))>; 1470239310Sdim// v4i32 <- v4f32 1471239310Sdimdef : Pat<(v4i32 (bitconvert V4F32Regs:$s)), 1472239310Sdim (Build_Vector4_i32 1473239310Sdim (BITCONVERT_32_F2I (V4f32Extract V4F32Regs:$s,0)), 1474239310Sdim (BITCONVERT_32_F2I (V4f32Extract V4F32Regs:$s,1)), 1475239310Sdim (BITCONVERT_32_F2I (V4f32Extract V4F32Regs:$s,2)), 1476239310Sdim (BITCONVERT_32_F2I (V4f32Extract V4F32Regs:$s,3)))>; 1477239310Sdim// v4i16 <- v2f32 1478239310Sdimdef : Pat<(v4i16 (bitconvert V2F32Regs:$s)), 1479239310Sdim (VecI64toV4I16 (BITCONVERT_64_F2I 1480239310Sdim (V2F32toF64 (V2f32Extract V2F32Regs:$s,0), 1481239310Sdim (V2f32Extract V2F32Regs:$s,1))))>; 1482