1239310Sdim//===- NVPTXVector.td - NVPTX Vector Specific Instruction defs -*- tblgen-*-==//
2239310Sdim//
3239310Sdim//                     The LLVM Compiler Infrastructure
4239310Sdim//
5239310Sdim// This file is distributed under the University of Illinois Open Source
6239310Sdim// License. See LICENSE.TXT for details.
7239310Sdim//
8239310Sdim//===----------------------------------------------------------------------===//
9239310Sdim
10239310Sdim//-----------------------------------
11239310Sdim// Vector Specific
12239310Sdim//-----------------------------------
13239310Sdim
14239310Sdim//
15239310Sdim// All vector instructions derive from NVPTXVecInst
16239310Sdim//
17239310Sdim
18239310Sdimclass NVPTXVecInst<dag outs, dag ins, string asmstr, list<dag> pattern,
19239310Sdim  NVPTXInst sInst=NOP>
20239310Sdim  : NVPTXInst<outs, ins, asmstr, pattern> {
21239310Sdim  NVPTXInst scalarInst=sInst;
22239310Sdim}
23239310Sdim
24239310Sdimlet isAsCheapAsAMove=1, VecInstType=isVecExtract.Value in {
25239310Sdim// Extract v2i16
26239310Sdimdef V2i16Extract : NVPTXVecInst<(outs Int16Regs:$dst),
27239310Sdim  (ins V2I16Regs:$src, i8imm:$c),
28239310Sdim                         "mov.u16 \t$dst, $src${c:vecelem};",
29239310Sdim                         [(set Int16Regs:$dst, (vector_extract
30239310Sdim                           (v2i16 V2I16Regs:$src), imm:$c))],
31239310Sdim                         IMOV16rr>;
32239310Sdim
33239310Sdim// Extract v4i16
34239310Sdimdef V4i16Extract : NVPTXVecInst<(outs Int16Regs:$dst),
35239310Sdim  (ins V4I16Regs:$src, i8imm:$c),
36239310Sdim                         "mov.u16 \t$dst, $src${c:vecelem};",
37239310Sdim                         [(set Int16Regs:$dst, (vector_extract
38239310Sdim                           (v4i16 V4I16Regs:$src), imm:$c))],
39239310Sdim                         IMOV16rr>;
40239310Sdim
41239310Sdim// Extract v2i8
42239310Sdimdef V2i8Extract : NVPTXVecInst<(outs Int8Regs:$dst),
43239310Sdim  (ins V2I8Regs:$src, i8imm:$c),
44239310Sdim                         "mov.u16 \t$dst, $src${c:vecelem};",
45239310Sdim                         [(set Int8Regs:$dst, (vector_extract
46239310Sdim                           (v2i8 V2I8Regs:$src), imm:$c))],
47239310Sdim                         IMOV8rr>;
48239310Sdim
49239310Sdim// Extract v4i8
50239310Sdimdef V4i8Extract : NVPTXVecInst<(outs Int8Regs:$dst),
51239310Sdim  (ins V4I8Regs:$src, i8imm:$c),
52239310Sdim                         "mov.u16 \t$dst, $src${c:vecelem};",
53239310Sdim                         [(set Int8Regs:$dst, (vector_extract
54239310Sdim                           (v4i8 V4I8Regs:$src), imm:$c))],
55239310Sdim                         IMOV8rr>;
56239310Sdim
57239310Sdim// Extract v2i32
58239310Sdimdef V2i32Extract : NVPTXVecInst<(outs Int32Regs:$dst),
59239310Sdim  (ins V2I32Regs:$src, i8imm:$c),
60239310Sdim                         "mov.u32 \t$dst, $src${c:vecelem};",
61239310Sdim                         [(set Int32Regs:$dst, (vector_extract
62239310Sdim                           (v2i32 V2I32Regs:$src), imm:$c))],
63239310Sdim                         IMOV32rr>;
64239310Sdim
65239310Sdim// Extract v2f32
66239310Sdimdef V2f32Extract : NVPTXVecInst<(outs Float32Regs:$dst),
67239310Sdim  (ins V2F32Regs:$src, i8imm:$c),
68239310Sdim                         "mov.f32 \t$dst, $src${c:vecelem};",
69239310Sdim                         [(set Float32Regs:$dst, (vector_extract
70239310Sdim                           (v2f32 V2F32Regs:$src), imm:$c))],
71239310Sdim                         FMOV32rr>;
72239310Sdim
73239310Sdim// Extract v2i64
74239310Sdimdef V2i64Extract : NVPTXVecInst<(outs Int64Regs:$dst),
75239310Sdim  (ins V2I64Regs:$src, i8imm:$c),
76239310Sdim                         "mov.u64 \t$dst, $src${c:vecelem};",
77239310Sdim                         [(set Int64Regs:$dst, (vector_extract
78239310Sdim                           (v2i64 V2I64Regs:$src), imm:$c))],
79239310Sdim                         IMOV64rr>;
80239310Sdim
81239310Sdim// Extract v2f64
82239310Sdimdef V2f64Extract : NVPTXVecInst<(outs Float64Regs:$dst),
83239310Sdim  (ins V2F64Regs:$src, i8imm:$c),
84239310Sdim                         "mov.f64 \t$dst, $src${c:vecelem};",
85239310Sdim                         [(set Float64Regs:$dst, (vector_extract
86239310Sdim                           (v2f64 V2F64Regs:$src), imm:$c))],
87239310Sdim                         FMOV64rr>;
88239310Sdim
89239310Sdim// Extract v4i32
90239310Sdimdef V4i32Extract : NVPTXVecInst<(outs Int32Regs:$dst),
91239310Sdim  (ins V4I32Regs:$src, i8imm:$c),
92239310Sdim                         "mov.u32 \t$dst, $src${c:vecelem};",
93239310Sdim                         [(set Int32Regs:$dst, (vector_extract
94239310Sdim                           (v4i32 V4I32Regs:$src), imm:$c))],
95239310Sdim                         IMOV32rr>;
96239310Sdim
97239310Sdim// Extract v4f32
98239310Sdimdef V4f32Extract : NVPTXVecInst<(outs Float32Regs:$dst),
99239310Sdim  (ins V4F32Regs:$src, i8imm:$c),
100239310Sdim                         "mov.f32 \t$dst, $src${c:vecelem};",
101239310Sdim                         [(set Float32Regs:$dst, (vector_extract
102239310Sdim                           (v4f32 V4F32Regs:$src), imm:$c))],
103239310Sdim                         FMOV32rr>;
104239310Sdim}
105239310Sdim
106239310Sdimlet isAsCheapAsAMove=1, VecInstType=isVecInsert.Value in {
107239310Sdim// Insert v2i8
108239310Sdimdef V2i8Insert : NVPTXVecInst<(outs V2I8Regs:$dst),
109239310Sdim  (ins V2I8Regs:$src, Int8Regs:$val, i8imm:$c),
110239310Sdim        "mov.v2.u16 \t${dst:vecfull}, ${src:vecfull};"
111239310Sdim        "\n\tmov.u16 \t$dst${c:vecelem}, $val;",
112239310Sdim       [(set V2I8Regs:$dst,
113239310Sdim         (vector_insert V2I8Regs:$src, Int8Regs:$val, imm:$c))],
114239310Sdim                         IMOV8rr>;
115239310Sdim
116239310Sdim// Insert v4i8
117239310Sdimdef V4i8Insert : NVPTXVecInst<(outs V4I8Regs:$dst),
118239310Sdim  (ins V4I8Regs:$src, Int8Regs:$val, i8imm:$c),
119239310Sdim                       "mov.v4.u16 \t${dst:vecfull}, ${src:vecfull};"
120239310Sdim                       "\n\tmov.u16 \t$dst${c:vecelem}, $val;",
121239310Sdim       [(set V4I8Regs:$dst,
122239310Sdim         (vector_insert V4I8Regs:$src, Int8Regs:$val, imm:$c))],
123239310Sdim                         IMOV8rr>;
124239310Sdim
125239310Sdim// Insert v2i16
126239310Sdimdef V2i16Insert : NVPTXVecInst<(outs V2I16Regs:$dst),
127239310Sdim  (ins V2I16Regs:$src, Int16Regs:$val, i8imm:$c),
128239310Sdim                       "mov.v2.u16 \t${dst:vecfull}, ${src:vecfull};"
129239310Sdim                       "\n\tmov.u16 \t$dst${c:vecelem}, $val;",
130239310Sdim       [(set V2I16Regs:$dst,
131239310Sdim         (vector_insert V2I16Regs:$src, Int16Regs:$val, imm:$c))],
132239310Sdim                         IMOV16rr>;
133239310Sdim
134239310Sdim// Insert v4i16
135239310Sdimdef V4i16Insert : NVPTXVecInst<(outs V4I16Regs:$dst),
136239310Sdim  (ins V4I16Regs:$src, Int16Regs:$val, i8imm:$c),
137239310Sdim                       "mov.v4.u16 \t${dst:vecfull}, ${src:vecfull};"
138239310Sdim                       "\n\tmov.u16 \t$dst${c:vecelem}, $val;",
139239310Sdim       [(set V4I16Regs:$dst,
140239310Sdim         (vector_insert V4I16Regs:$src, Int16Regs:$val, imm:$c))],
141239310Sdim                         IMOV16rr>;
142239310Sdim
143239310Sdim// Insert v2i32
144239310Sdimdef V2i32Insert : NVPTXVecInst<(outs V2I32Regs:$dst),
145239310Sdim  (ins V2I32Regs:$src, Int32Regs:$val, i8imm:$c),
146239310Sdim                       "mov.v2.u32 \t${dst:vecfull}, ${src:vecfull};"
147239310Sdim                       "\n\tmov.u32 \t$dst${c:vecelem}, $val;",
148239310Sdim       [(set V2I32Regs:$dst,
149239310Sdim         (vector_insert V2I32Regs:$src, Int32Regs:$val, imm:$c))],
150239310Sdim                         IMOV32rr>;
151239310Sdim
152239310Sdim// Insert v2f32
153239310Sdimdef V2f32Insert : NVPTXVecInst<(outs V2F32Regs:$dst),
154239310Sdim  (ins V2F32Regs:$src, Float32Regs:$val, i8imm:$c),
155239310Sdim                       "mov.v2.f32 \t${dst:vecfull}, ${src:vecfull};"
156239310Sdim                       "\n\tmov.f32 \t$dst${c:vecelem}, $val;",
157239310Sdim       [(set V2F32Regs:$dst,
158239310Sdim         (vector_insert V2F32Regs:$src, Float32Regs:$val, imm:$c))],
159239310Sdim                         FMOV32rr>;
160239310Sdim
161239310Sdim// Insert v2i64
162239310Sdimdef V2i64Insert : NVPTXVecInst<(outs V2I64Regs:$dst),
163239310Sdim  (ins V2I64Regs:$src, Int64Regs:$val, i8imm:$c),
164239310Sdim                       "mov.v2.u64 \t${dst:vecfull}, ${src:vecfull};"
165239310Sdim                       "\n\tmov.u64 \t$dst${c:vecelem}, $val;",
166239310Sdim       [(set V2I64Regs:$dst,
167239310Sdim         (vector_insert V2I64Regs:$src, Int64Regs:$val, imm:$c))],
168239310Sdim                         IMOV64rr>;
169239310Sdim
170239310Sdim// Insert v2f64
171239310Sdimdef V2f64Insert : NVPTXVecInst<(outs V2F64Regs:$dst),
172239310Sdim  (ins V2F64Regs:$src, Float64Regs:$val, i8imm:$c),
173239310Sdim                       "mov.v2.f64 \t${dst:vecfull}, ${src:vecfull};"
174239310Sdim                       "\n\tmov.f64 \t$dst${c:vecelem}, $val;",
175239310Sdim       [(set V2F64Regs:$dst,
176239310Sdim         (vector_insert V2F64Regs:$src, Float64Regs:$val, imm:$c))],
177239310Sdim                         FMOV64rr>;
178239310Sdim
179239310Sdim// Insert v4i32
180239310Sdimdef V4i32Insert : NVPTXVecInst<(outs V4I32Regs:$dst),
181239310Sdim  (ins V4I32Regs:$src, Int32Regs:$val, i8imm:$c),
182239310Sdim                       "mov.v4.u32 \t${dst:vecfull}, ${src:vecfull};"
183239310Sdim                       "\n\tmov.u32 \t$dst${c:vecelem}, $val;",
184239310Sdim       [(set V4I32Regs:$dst,
185239310Sdim         (vector_insert V4I32Regs:$src, Int32Regs:$val, imm:$c))],
186239310Sdim                         IMOV32rr>;
187239310Sdim
188239310Sdim// Insert v4f32
189239310Sdimdef V4f32Insert : NVPTXVecInst<(outs V4F32Regs:$dst),
190239310Sdim  (ins V4F32Regs:$src, Float32Regs:$val, i8imm:$c),
191239310Sdim                       "mov.v4.f32 \t${dst:vecfull}, ${src:vecfull};"
192239310Sdim                       "\n\tmov.f32 \t$dst${c:vecelem}, $val;",
193239310Sdim       [(set V4F32Regs:$dst,
194239310Sdim         (vector_insert V4F32Regs:$src, Float32Regs:$val, imm:$c))],
195239310Sdim                         FMOV32rr>;
196239310Sdim}
197239310Sdim
198239310Sdimclass BinOpAsmString<string c> {
199239310Sdim  string s = c;
200239310Sdim}
201239310Sdim
202239310Sdimclass V4AsmStr<string opcode> : BinOpAsmString<
203239310Sdim                          !strconcat(!strconcat(!strconcat(!strconcat(
204239310Sdim                            !strconcat(!strconcat(!strconcat(
205239310Sdim                          opcode,  " \t${dst}_0, ${a}_0, ${b}_0;\n\t"),
206239310Sdim                          opcode), " \t${dst}_1, ${a}_1, ${b}_1;\n\t"),
207239310Sdim                          opcode), " \t${dst}_2, ${a}_2, ${b}_2;\n\t"),
208239310Sdim                          opcode), " \t${dst}_3, ${a}_3, ${b}_3;")>;
209239310Sdim
210239310Sdimclass V2AsmStr<string opcode> : BinOpAsmString<
211239310Sdim                           !strconcat(!strconcat(!strconcat(
212239310Sdim                           opcode,  " \t${dst}_0, ${a}_0, ${b}_0;\n\t"),
213239310Sdim                           opcode), " \t${dst}_1, ${a}_1, ${b}_1;")>;
214239310Sdim
215239310Sdimclass V4MADStr<string opcode> : BinOpAsmString<
216239310Sdim                          !strconcat(!strconcat(!strconcat(!strconcat(
217239310Sdim                            !strconcat(!strconcat(!strconcat(
218239310Sdim                          opcode,  " \t${dst}_0, ${a}_0, ${b}_0, ${c}_0;\n\t"),
219239310Sdim                          opcode), " \t${dst}_1, ${a}_1, ${b}_1, ${c}_1;\n\t"),
220239310Sdim                          opcode), " \t${dst}_2, ${a}_2, ${b}_2, ${c}_2;\n\t"),
221239310Sdim                          opcode), " \t${dst}_3, ${a}_3, ${b}_3, ${c}_3;")>;
222239310Sdim
223239310Sdimclass V2MADStr<string opcode> : BinOpAsmString<
224239310Sdim                           !strconcat(!strconcat(!strconcat(
225239310Sdim                           opcode,  " \t${dst}_0, ${a}_0, ${b}_0, ${c}_0;\n\t"),
226239310Sdim                           opcode), " \t${dst}_1, ${a}_1, ${b}_1, ${c}_1;")>;
227239310Sdim
228239310Sdimclass V4UnaryStr<string opcode> : BinOpAsmString<
229239310Sdim                          !strconcat(!strconcat(!strconcat(!strconcat(
230239310Sdim                            !strconcat(!strconcat(!strconcat(
231239310Sdim                          opcode,  " \t${dst}_0, ${a}_0;\n\t"),
232239310Sdim                          opcode), " \t${dst}_1, ${a}_1;\n\t"),
233239310Sdim                          opcode), " \t${dst}_2, ${a}_2;\n\t"),
234239310Sdim                          opcode), " \t${dst}_3, ${a}_3;")>;
235239310Sdim
236239310Sdimclass V2UnaryStr<string opcode> : BinOpAsmString<
237239310Sdim                           !strconcat(!strconcat(!strconcat(
238239310Sdim                           opcode,  " \t${dst}_0, ${a}_0;\n\t"),
239239310Sdim                           opcode), " \t${dst}_1, ${a}_1;")>;
240239310Sdim
241239310Sdimclass VecBinaryOp<BinOpAsmString asmstr, SDNode OpNode, NVPTXRegClass regclass,
242239310Sdim  NVPTXInst sInst=NOP> :
243239310Sdim      NVPTXVecInst<(outs regclass:$dst), (ins regclass:$a, regclass:$b),
244239310Sdim                 asmstr.s,
245239310Sdim                 [(set regclass:$dst, (OpNode regclass:$a, regclass:$b))],
246239310Sdim                 sInst>;
247239310Sdim
248239310Sdimclass VecShiftOp<BinOpAsmString asmstr, SDNode OpNode, NVPTXRegClass regclass1,
249239310Sdim                 NVPTXRegClass regclass2, NVPTXInst sInst=NOP> :
250239310Sdim      NVPTXVecInst<(outs regclass1:$dst), (ins regclass1:$a, regclass2:$b),
251239310Sdim                 asmstr.s,
252239310Sdim                 [(set regclass1:$dst, (OpNode regclass1:$a, regclass2:$b))],
253239310Sdim                 sInst>;
254239310Sdim
255239310Sdimclass VecUnaryOp<BinOpAsmString asmstr, PatFrag OpNode, NVPTXRegClass regclass,
256239310Sdim  NVPTXInst sInst=NOP> :
257239310Sdim      NVPTXVecInst<(outs regclass:$dst), (ins regclass:$a),
258239310Sdim                 asmstr.s,
259239310Sdim                 [(set regclass:$dst, (OpNode regclass:$a))], sInst>;
260239310Sdim
261239310Sdimmulticlass IntBinVOp<string asmstr, SDNode OpNode,
262239310Sdim                     NVPTXInst i64op=NOP, NVPTXInst i32op=NOP, NVPTXInst
263239310Sdim                     i16op=NOP, NVPTXInst i8op=NOP> {
264239310Sdim  def V2I64 : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "64")>, OpNode, V2I64Regs,
265239310Sdim    i64op>;
266239310Sdim  def V4I32 : VecBinaryOp<V4AsmStr<!strconcat(asmstr, "32")>, OpNode, V4I32Regs,
267239310Sdim    i32op>;
268239310Sdim  def V2I32 : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "32")>, OpNode, V2I32Regs,
269239310Sdim    i32op>;
270239310Sdim  def V4I16 : VecBinaryOp<V4AsmStr<!strconcat(asmstr, "16")>, OpNode, V4I16Regs,
271239310Sdim    i16op>;
272239310Sdim  def V2I16 : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "16")>, OpNode, V2I16Regs,
273239310Sdim    i16op>;
274239310Sdim  def V4I8 : VecBinaryOp<V4AsmStr<!strconcat(asmstr, "16")>, OpNode, V4I8Regs,
275239310Sdim    i8op>;
276239310Sdim  def V2I8 : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "16")>, OpNode, V2I8Regs,
277239310Sdim    i8op>;
278239310Sdim}
279239310Sdim
280239310Sdimmulticlass FloatBinVOp<string asmstr, SDNode OpNode,
281239310Sdim                       NVPTXInst f64=NOP, NVPTXInst f32=NOP,
282239310Sdim                       NVPTXInst f32_ftz=NOP> {
283239310Sdim  def V2F64 : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "f64")>, OpNode,
284239310Sdim    V2F64Regs, f64>;
285239310Sdim  def V4F32_ftz : VecBinaryOp<V4AsmStr<!strconcat(asmstr, "ftz.f32")>, OpNode,
286239310Sdim    V4F32Regs, f32_ftz>, Requires<[doF32FTZ]>;
287239310Sdim  def V2F32_ftz : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "ftz.f32")>, OpNode,
288239310Sdim    V2F32Regs, f32_ftz>, Requires<[doF32FTZ]>;
289239310Sdim  def V4F32 : VecBinaryOp<V4AsmStr<!strconcat(asmstr, "f32")>, OpNode,
290239310Sdim    V4F32Regs, f32>;
291239310Sdim  def V2F32 : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "f32")>, OpNode,
292239310Sdim    V2F32Regs, f32>;
293239310Sdim}
294239310Sdim
295239310Sdimmulticlass IntUnaryVOp<string asmstr, PatFrag OpNode,
296239310Sdim                       NVPTXInst i64op=NOP, NVPTXInst i32op=NOP,
297239310Sdim                       NVPTXInst i16op=NOP, NVPTXInst i8op=NOP> {
298239310Sdim  def V2I64 : VecUnaryOp<V2UnaryStr<!strconcat(asmstr, "64")>, OpNode,
299239310Sdim    V2I64Regs, i64op>;
300239310Sdim  def V4I32 : VecUnaryOp<V4UnaryStr<!strconcat(asmstr, "32")>, OpNode,
301239310Sdim    V4I32Regs, i32op>;
302239310Sdim  def V2I32 : VecUnaryOp<V2UnaryStr<!strconcat(asmstr, "32")>, OpNode,
303239310Sdim    V2I32Regs, i32op>;
304239310Sdim  def V4I16 : VecUnaryOp<V4UnaryStr<!strconcat(asmstr, "16")>, OpNode,
305239310Sdim    V4I16Regs, i16op>;
306239310Sdim  def V2I16 : VecUnaryOp<V2UnaryStr<!strconcat(asmstr, "16")>, OpNode,
307239310Sdim    V2I16Regs, i16op>;
308239310Sdim  def V4I8  : VecUnaryOp<V4UnaryStr<!strconcat(asmstr, "16")>, OpNode,
309239310Sdim    V4I8Regs,   i8op>;
310239310Sdim  def V2I8  : VecUnaryOp<V2UnaryStr<!strconcat(asmstr, "16")>, OpNode,
311239310Sdim    V2I8Regs,   i8op>;
312239310Sdim}
313239310Sdim
314239310Sdim
315239310Sdim// Integer Arithmetic
316239310Sdimlet VecInstType=isVecOther.Value in {
317239310Sdimdefm VAdd : IntBinVOp<"add.s", add, ADDi64rr, ADDi32rr, ADDi16rr, ADDi8rr>;
318239310Sdimdefm VSub : IntBinVOp<"sub.s", sub, SUBi64rr, SUBi32rr, SUBi16rr, SUBi8rr>;
319239310Sdim
320239310Sdimdef AddCCV4I32 : VecBinaryOp<V4AsmStr<"add.cc.s32">, addc, V4I32Regs,
321239310Sdim  ADDCCi32rr>;
322239310Sdimdef AddCCV2I32 : VecBinaryOp<V2AsmStr<"add.cc.s32">, addc, V2I32Regs,
323239310Sdim  ADDCCi32rr>;
324239310Sdimdef SubCCV4I32 : VecBinaryOp<V4AsmStr<"sub.cc.s32">, subc, V4I32Regs,
325239310Sdim  SUBCCi32rr>;
326239310Sdimdef SubCCV2I32 : VecBinaryOp<V2AsmStr<"sub.cc.s32">, subc, V2I32Regs,
327239310Sdim  SUBCCi32rr>;
328239310Sdimdef AddCCCV4I32 : VecBinaryOp<V4AsmStr<"addc.cc.s32">, adde, V4I32Regs,
329239310Sdim  ADDCCCi32rr>;
330239310Sdimdef AddCCCV2I32 : VecBinaryOp<V2AsmStr<"addc.cc.s32">, adde, V2I32Regs,
331239310Sdim  ADDCCCi32rr>;
332239310Sdimdef SubCCCV4I32 : VecBinaryOp<V4AsmStr<"subc.cc.s32">, sube, V4I32Regs,
333239310Sdim  SUBCCCi32rr>;
334239310Sdimdef SubCCCV2I32 : VecBinaryOp<V2AsmStr<"subc.cc.s32">, sube, V2I32Regs,
335239310Sdim  SUBCCCi32rr>;
336239310Sdim
337239310Sdimdef ShiftLV2I64 : VecShiftOp<V2AsmStr<"shl.b64">, shl, V2I64Regs, V2I32Regs,
338239310Sdim  SHLi64rr>;
339239310Sdimdef ShiftLV2I32 : VecShiftOp<V2AsmStr<"shl.b32">, shl, V2I32Regs, V2I32Regs,
340239310Sdim  SHLi32rr>;
341239310Sdimdef ShiftLV4I32 : VecShiftOp<V4AsmStr<"shl.b32">, shl, V4I32Regs, V4I32Regs,
342239310Sdim  SHLi32rr>;
343239310Sdimdef ShiftLV2I16 : VecShiftOp<V2AsmStr<"shl.b16">, shl, V2I16Regs, V2I32Regs,
344239310Sdim  SHLi16rr>;
345239310Sdimdef ShiftLV4I16 : VecShiftOp<V4AsmStr<"shl.b16">, shl, V4I16Regs, V4I32Regs,
346239310Sdim  SHLi16rr>;
347239310Sdimdef ShiftLV2I8  : VecShiftOp<V2AsmStr<"shl.b16">, shl, V2I8Regs,  V2I32Regs,
348239310Sdim  SHLi8rr>;
349239310Sdimdef ShiftLV4I8  : VecShiftOp<V4AsmStr<"shl.b16">, shl, V4I8Regs,  V4I32Regs,
350239310Sdim  SHLi8rr>;
351239310Sdim}
352239310Sdim
353239310Sdim// cvt to v*i32, helpers for shift
354239310Sdimclass CVTtoVeci32<NVPTXRegClass inclass, NVPTXRegClass outclass, string asmstr,
355239310Sdim  NVPTXInst sInst=NOP> :
356239310Sdim      NVPTXVecInst<(outs outclass:$d), (ins inclass:$s), asmstr, [], sInst>;
357239310Sdim
358239310Sdimclass VecCVTStrHelper<string op, string dest, string src> {
359239310Sdim  string s=!strconcat(op, !strconcat("\t",
360239310Sdim           !strconcat(dest, !strconcat(", ", !strconcat(src, ";")))));
361239310Sdim}
362239310Sdim
363239310Sdimclass Vec2CVTStr<string op> {
364239310Sdim  string s=!strconcat(VecCVTStrHelper<op, "${d}_0", "${s}_0">.s,
365239310Sdim           !strconcat("\n\t", VecCVTStrHelper<op, "${d}_1", "${s}_1">.s));
366239310Sdim}
367239310Sdim
368239310Sdimclass Vec4CVTStr<string op> {
369239310Sdim  string s=!strconcat(VecCVTStrHelper<op, "${d}_0", "${s}_0">.s,
370239310Sdim           !strconcat("\n\t",
371239310Sdim           !strconcat(VecCVTStrHelper<op, "${d}_1", "${s}_1">.s,
372239310Sdim           !strconcat("\n\t",
373239310Sdim           !strconcat(VecCVTStrHelper<op, "${d}_2", "${s}_2">.s,
374239310Sdim           !strconcat("\n\t", VecCVTStrHelper<op, "${d}_3", "${s}_3">.s))))));
375239310Sdim}
376239310Sdim
377239310Sdimlet VecInstType=isVecOther.Value in {
378239310Sdimdef CVTv2i8tov2i32 : CVTtoVeci32<V2I8Regs, V2I32Regs,
379239310Sdim  Vec2CVTStr<"cvt.u32.u16">.s, Zint_extendext8to32>;
380239310Sdimdef CVTv2i16tov2i32 : CVTtoVeci32<V2I16Regs, V2I32Regs,
381239310Sdim  Vec2CVTStr<"cvt.u32.u16">.s, Zint_extendext16to32>;
382239310Sdimdef CVTv4i8tov4i32 : CVTtoVeci32<V4I8Regs, V4I32Regs,
383239310Sdim  Vec4CVTStr<"cvt.u32.u16">.s, Zint_extendext8to32>;
384239310Sdimdef CVTv4i16tov4i32 : CVTtoVeci32<V4I16Regs, V4I32Regs,
385239310Sdim  Vec4CVTStr<"cvt.u32.u16">.s, Zint_extendext16to32>;
386239310Sdimdef CVTv2i64tov2i32 : CVTtoVeci32<V2I64Regs, V2I32Regs,
387239310Sdim  Vec2CVTStr<"cvt.u32.u64">.s, TRUNC_64to32>;
388239310Sdim}
389239310Sdim
390239310Sdimdef : Pat<(shl V2I16Regs:$src1, V2I16Regs:$src2),
391239310Sdim          (ShiftLV2I16 V2I16Regs:$src1, (CVTv2i16tov2i32 V2I16Regs:$src2))>;
392239310Sdimdef : Pat<(shl V2I8Regs:$src1, V2I8Regs:$src2),
393239310Sdim          (ShiftLV2I8 V2I8Regs:$src1, (CVTv2i8tov2i32 V2I8Regs:$src2))>;
394239310Sdimdef : Pat<(shl V2I64Regs:$src1, V2I64Regs:$src2),
395239310Sdim          (ShiftLV2I64 V2I64Regs:$src1, (CVTv2i64tov2i32 V2I64Regs:$src2))>;
396239310Sdim
397239310Sdimdef : Pat<(shl V4I16Regs:$src1, V4I16Regs:$src2),
398239310Sdim          (ShiftLV4I16 V4I16Regs:$src1, (CVTv4i16tov4i32 V4I16Regs:$src2))>;
399239310Sdimdef : Pat<(shl V4I8Regs:$src1, V4I8Regs:$src2),
400239310Sdim          (ShiftLV4I8 V4I8Regs:$src1, (CVTv4i8tov4i32 V4I8Regs:$src2))>;
401239310Sdim
402239310Sdimlet VecInstType=isVecOther.Value in {
403239310Sdimdef ShiftRAV2I64 : VecShiftOp<V2AsmStr<"shr.s64">, sra, V2I64Regs, V2I32Regs,
404239310Sdim  SRAi64rr>;
405239310Sdimdef ShiftRAV2I32 : VecShiftOp<V2AsmStr<"shr.s32">, sra, V2I32Regs, V2I32Regs,
406239310Sdim  SRAi32rr>;
407239310Sdimdef ShiftRAV4I32 : VecShiftOp<V4AsmStr<"shr.s32">, sra, V4I32Regs, V4I32Regs,
408239310Sdim  SRAi32rr>;
409239310Sdimdef ShiftRAV2I16 : VecShiftOp<V2AsmStr<"shr.s16">, sra, V2I16Regs, V2I32Regs,
410239310Sdim  SRAi16rr>;
411239310Sdimdef ShiftRAV4I16 : VecShiftOp<V4AsmStr<"shr.s16">, sra, V4I16Regs, V4I32Regs,
412239310Sdim  SRAi16rr>;
413239310Sdimdef ShiftRAV2I8  : VecShiftOp<V2AsmStr<"shr.s16">, sra, V2I8Regs,  V2I32Regs,
414239310Sdim  SRAi8rr>;
415239310Sdimdef ShiftRAV4I8  : VecShiftOp<V4AsmStr<"shr.s16">, sra, V4I8Regs,  V4I32Regs,
416239310Sdim  SRAi8rr>;
417239310Sdim
418239310Sdimdef ShiftRLV2I64 : VecShiftOp<V2AsmStr<"shr.u64">, srl, V2I64Regs, V2I32Regs,
419239310Sdim  SRLi64rr>;
420239310Sdimdef ShiftRLV2I32 : VecShiftOp<V2AsmStr<"shr.u32">, srl, V2I32Regs, V2I32Regs,
421239310Sdim  SRLi32rr>;
422239310Sdimdef ShiftRLV4I32 : VecShiftOp<V4AsmStr<"shr.u32">, srl, V4I32Regs, V4I32Regs,
423239310Sdim  SRLi32rr>;
424239310Sdimdef ShiftRLV2I16 : VecShiftOp<V2AsmStr<"shr.u16">, srl, V2I16Regs, V2I32Regs,
425239310Sdim  SRLi16rr>;
426239310Sdimdef ShiftRLV4I16 : VecShiftOp<V4AsmStr<"shr.u16">, srl, V4I16Regs, V4I32Regs,
427239310Sdim  SRLi16rr>;
428239310Sdimdef ShiftRLV2I8  : VecShiftOp<V2AsmStr<"shr.u16">, srl, V2I8Regs,  V2I32Regs,
429239310Sdim  SRLi8rr>;
430239310Sdimdef ShiftRLV4I8  : VecShiftOp<V4AsmStr<"shr.u16">, srl, V4I8Regs,  V4I32Regs,
431239310Sdim  SRLi8rr>;
432239310Sdim
433239310Sdimdefm VMult   : IntBinVOp<"mul.lo.s", mul, MULTi64rr, MULTi32rr, MULTi16rr,
434239310Sdim  MULTi8rr>;
435239310Sdimdefm VMultHS : IntBinVOp<"mul.hi.s", mulhs, MULTHSi64rr, MULTHSi32rr,
436239310Sdim  MULTHSi16rr,
437239310Sdim  MULTHSi8rr>;
438239310Sdimdefm VMultHU : IntBinVOp<"mul.hi.u", mulhu, MULTHUi64rr, MULTHUi32rr,
439239310Sdim  MULTHUi16rr,
440239310Sdim  MULTHUi8rr>;
441239310Sdimdefm VSDiv   : IntBinVOp<"div.s", sdiv, SDIVi64rr, SDIVi32rr, SDIVi16rr,
442239310Sdim  SDIVi8rr>;
443239310Sdimdefm VUDiv   : IntBinVOp<"div.u", udiv, UDIVi64rr, UDIVi32rr, UDIVi16rr,
444239310Sdim  UDIVi8rr>;
445239310Sdimdefm VSRem   : IntBinVOp<"rem.s", srem, SREMi64rr, SREMi32rr, SREMi16rr,
446239310Sdim  SREMi8rr>;
447239310Sdimdefm VURem   : IntBinVOp<"rem.u", urem, UREMi64rr, UREMi32rr, UREMi16rr,
448239310Sdim  UREMi8rr>;
449239310Sdim}
450239310Sdim
451239310Sdimdef : Pat<(sra V2I16Regs:$src1, V2I16Regs:$src2),
452239310Sdim          (ShiftRAV2I16 V2I16Regs:$src1, (CVTv2i16tov2i32 V2I16Regs:$src2))>;
453239310Sdimdef : Pat<(sra V2I8Regs:$src1, V2I8Regs:$src2),
454239310Sdim          (ShiftRAV2I8 V2I8Regs:$src1, (CVTv2i8tov2i32 V2I8Regs:$src2))>;
455239310Sdimdef : Pat<(sra V2I64Regs:$src1, V2I64Regs:$src2),
456239310Sdim          (ShiftRAV2I64 V2I64Regs:$src1, (CVTv2i64tov2i32 V2I64Regs:$src2))>;
457239310Sdim
458239310Sdimdef : Pat<(sra V4I16Regs:$src1, V4I16Regs:$src2),
459239310Sdim          (ShiftRAV4I16 V4I16Regs:$src1, (CVTv4i16tov4i32 V4I16Regs:$src2))>;
460239310Sdimdef : Pat<(sra V4I8Regs:$src1, V4I8Regs:$src2),
461239310Sdim          (ShiftRAV4I8 V4I8Regs:$src1, (CVTv4i8tov4i32 V4I8Regs:$src2))>;
462239310Sdim
463239310Sdimdef : Pat<(srl V2I16Regs:$src1, V2I16Regs:$src2),
464239310Sdim          (ShiftRLV2I16 V2I16Regs:$src1, (CVTv2i16tov2i32 V2I16Regs:$src2))>;
465239310Sdimdef : Pat<(srl V2I8Regs:$src1, V2I8Regs:$src2),
466239310Sdim          (ShiftRLV2I8 V2I8Regs:$src1, (CVTv2i8tov2i32 V2I8Regs:$src2))>;
467239310Sdimdef : Pat<(srl V2I64Regs:$src1, V2I64Regs:$src2),
468239310Sdim          (ShiftRLV2I64 V2I64Regs:$src1, (CVTv2i64tov2i32 V2I64Regs:$src2))>;
469239310Sdim
470239310Sdimdef : Pat<(srl V4I16Regs:$src1, V4I16Regs:$src2),
471239310Sdim          (ShiftRLV4I16 V4I16Regs:$src1, (CVTv4i16tov4i32 V4I16Regs:$src2))>;
472239310Sdimdef : Pat<(srl V4I8Regs:$src1, V4I8Regs:$src2),
473239310Sdim          (ShiftRLV4I8 V4I8Regs:$src1, (CVTv4i8tov4i32 V4I8Regs:$src2))>;
474239310Sdim
475239310Sdimmulticlass VMAD<string asmstr, NVPTXRegClass regclassv4,
476239310Sdim  NVPTXRegClass regclassv2,
477239310Sdim                SDNode an=add, SDNode mn=mul, NVPTXInst sop=NOP,
478239310Sdim                Predicate Pred> {
479239310Sdim  def V4 : NVPTXVecInst<(outs regclassv4:$dst),
480239310Sdim    (ins regclassv4:$a, regclassv4:$b, regclassv4:$c),
481239310Sdim                      V4MADStr<asmstr>.s,
482239310Sdim                      [(set regclassv4:$dst,
483239310Sdim                        (an (mn regclassv4:$a, regclassv4:$b), regclassv4:$c))],
484239310Sdim                      sop>,
485239310Sdim           Requires<[Pred]>;
486239310Sdim  def V2 : NVPTXVecInst<(outs regclassv2:$dst),
487239310Sdim    (ins regclassv2:$a, regclassv2:$b, regclassv2:$c),
488239310Sdim                      V2MADStr<asmstr>.s,
489239310Sdim                      [(set regclassv2:$dst,
490239310Sdim                        (an (mn regclassv2:$a, regclassv2:$b), regclassv2:$c))],
491239310Sdim                      sop>,
492239310Sdim           Requires<[Pred]>;
493239310Sdim}
494239310Sdim
495239310Sdimmulticlass VMADV2Only<string asmstr, NVPTXRegClass regclass, NVPTXInst sop=NOP,
496239310Sdim  Predicate Pred> {
497239310Sdim  def V2 : NVPTXVecInst<(outs regclass:$dst),
498239310Sdim    (ins regclass:$a, regclass:$b, regclass:$c),
499239310Sdim                      V2MADStr<asmstr>.s,
500239310Sdim                      [(set regclass:$dst, (add
501239310Sdim                        (mul regclass:$a, regclass:$b), regclass:$c))], sop>,
502239310Sdim           Requires<[Pred]>;
503239310Sdim}
504239310Sdimmulticlass VFMADV2Only<string asmstr, NVPTXRegClass regclass, NVPTXInst sop=NOP,
505239310Sdim  Predicate Pred> {
506239310Sdim  def V2 : NVPTXVecInst<(outs regclass:$dst),
507239310Sdim    (ins regclass:$a, regclass:$b, regclass:$c),
508239310Sdim                      V2MADStr<asmstr>.s,
509239310Sdim                      [(set regclass:$dst, (fadd
510239310Sdim                        (fmul regclass:$a, regclass:$b), regclass:$c))], sop>,
511239310Sdim           Requires<[Pred]>;
512239310Sdim}
513239310Sdim
514239310Sdimlet VecInstType=isVecOther.Value in {
515239310Sdimdefm I8MAD  : VMAD<"mad.lo.s16", V4I8Regs, V2I8Regs, add, mul, MAD8rrr, true>;
516239310Sdimdefm I16MAD : VMAD<"mad.lo.s16", V4I16Regs, V2I16Regs, add, mul, MAD16rrr,
517239310Sdim  true>;
518239310Sdimdefm I32MAD : VMAD<"mad.lo.s32", V4I32Regs, V2I32Regs, add, mul, MAD32rrr,
519239310Sdim  true>;
520239310Sdimdefm I64MAD : VMADV2Only<"mad.lo.s64", V2I64Regs, MAD64rrr, true>;
521239310Sdim
522239310Sdimdefm VNeg : IntUnaryVOp<"neg.s", ineg, INEG64, INEG32, INEG16, INEG8>;
523239310Sdim
524239310Sdimdefm VAddf : FloatBinVOp<"add.", fadd, FADDf64rr, FADDf32rr, FADDf32rr_ftz>;
525239310Sdimdefm VSubf : FloatBinVOp<"sub.", fsub, FSUBf64rr, FSUBf32rr, FSUBf32rr_ftz>;
526239310Sdimdefm VMulf : FloatBinVOp<"mul.", fmul, FMULf64rr, FMULf32rr, FMULf32rr_ftz>;
527239310Sdim
528239310Sdimdefm F32MAD_ftz : VMAD<"mad.ftz.f32", V4F32Regs, V2F32Regs, fadd, fmul,
529239310Sdim  FMAD32_ftzrrr, doFMADF32_ftz>;
530239310Sdimdefm F32FMA_ftz : VMAD<"fma.rn.ftz.f32", V4F32Regs, V2F32Regs, fadd, fmul,
531239310Sdim  FMA32_ftzrrr, doFMAF32_ftz>;
532239310Sdimdefm F32MAD : VMAD<"mad.f32", V4F32Regs, V2F32Regs, fadd, fmul, FMAD32rrr,
533239310Sdim  doFMADF32>;
534239310Sdimdefm F32FMA : VMAD<"fma.rn.f32", V4F32Regs, V2F32Regs, fadd, fmul, FMA32rrr,
535239310Sdim  doFMAF32>;
536239310Sdimdefm F64FMA : VFMADV2Only<"fma.rn.f64", V2F64Regs, FMA64rrr, doFMAF64>;
537239310Sdim}
538239310Sdim
539239310Sdimlet VecInstType=isVecOther.Value in {
540239310Sdimdef V4F32Div_prec_ftz : VecBinaryOp<V4AsmStr<"div.rn.ftz.f32">, fdiv, V4F32Regs,
541239310Sdim  FDIV32rr_prec_ftz>, Requires<[doF32FTZ, reqPTX20]>;
542239310Sdimdef V2F32Div_prec_ftz : VecBinaryOp<V2AsmStr<"div.rn.ftz.f32">, fdiv, V2F32Regs,
543239310Sdim  FDIV32rr_prec_ftz>, Requires<[doF32FTZ, reqPTX20]>;
544239310Sdimdef V4F32Div_prec : VecBinaryOp<V4AsmStr<"div.rn.f32">, fdiv, V4F32Regs,
545239310Sdim  FDIV32rr_prec>, Requires<[reqPTX20]>;
546239310Sdimdef V2F32Div_prec : VecBinaryOp<V2AsmStr<"div.rn.f32">, fdiv, V2F32Regs,
547239310Sdim  FDIV32rr_prec>, Requires<[reqPTX20]>;
548239310Sdimdef V2F32Div_ftz : VecBinaryOp<V2AsmStr<"div.full.ftz.f32">, fdiv, V2F32Regs,
549239310Sdim  FDIV32rr_ftz>, Requires<[doF32FTZ]>;
550239310Sdimdef V4F32Div_ftz : VecBinaryOp<V4AsmStr<"div.full.ftz.f32">, fdiv, V4F32Regs,
551239310Sdim  FDIV32rr_ftz>, Requires<[doF32FTZ]>;
552239310Sdimdef V2F32Div : VecBinaryOp<V2AsmStr<"div.full.f32">, fdiv, V2F32Regs, FDIV32rr>;
553239310Sdimdef V4F32Div : VecBinaryOp<V4AsmStr<"div.full.f32">, fdiv, V4F32Regs, FDIV32rr>;
554239310Sdimdef V2F64Div : VecBinaryOp<V2AsmStr<"div.rn.f64">, fdiv, V2F64Regs, FDIV64rr>;
555239310Sdim}
556239310Sdim
557239310Sdimdef fnegpat : PatFrag<(ops node:$in), (fneg node:$in)>;
558239310Sdim
559239310Sdimlet VecInstType=isVecOther.Value in {
560239310Sdimdef VNegv2f32_ftz : VecUnaryOp<V2UnaryStr<"neg.ftz.f32">, fnegpat, V2F32Regs,
561239310Sdim  FNEGf32_ftz>, Requires<[doF32FTZ]>;
562239310Sdimdef VNegv4f32_ftz : VecUnaryOp<V4UnaryStr<"neg.ftz.f32">, fnegpat, V4F32Regs,
563239310Sdim  FNEGf32_ftz>, Requires<[doF32FTZ]>;
564239310Sdimdef VNegv2f32 : VecUnaryOp<V2UnaryStr<"neg.f32">, fnegpat, V2F32Regs, FNEGf32>;
565239310Sdimdef VNegv4f32 : VecUnaryOp<V4UnaryStr<"neg.f32">, fnegpat, V4F32Regs, FNEGf32>;
566239310Sdimdef VNegv2f64 : VecUnaryOp<V2UnaryStr<"neg.f64">, fnegpat, V2F64Regs, FNEGf64>;
567239310Sdim
568239310Sdim// Logical Arithmetic
569239310Sdimdefm VAnd : IntBinVOp<"and.b", and, ANDb64rr, ANDb32rr, ANDb16rr, ANDb8rr>;
570239310Sdimdefm VOr  : IntBinVOp<"or.b", or, ORb64rr, ORb32rr, ORb16rr, ORb8rr>;
571239310Sdimdefm VXor : IntBinVOp<"xor.b", xor, XORb64rr, XORb32rr, XORb16rr, XORb8rr>;
572239310Sdim
573239310Sdimdefm VNot : IntUnaryVOp<"not.b", not, NOT64, NOT32, NOT16, NOT8>;
574239310Sdim}
575239310Sdim
576239310Sdim
577239310Sdimmulticlass V2FPCONTRACT32_SUB_PAT<NVPTXInst Inst, Predicate Pred> {
578239310Sdim  def : Pat<(fsub V2F32Regs:$a, (fmul V2F32Regs:$b, V2F32Regs:$c)),
579239310Sdim          (Inst (VNegv2f32 V2F32Regs:$b), V2F32Regs:$c,  V2F32Regs:$a)>,
580239310Sdim          Requires<[Pred]>;
581239310Sdim
582239310Sdim  def : Pat<(fsub (fmul V2F32Regs:$a, V2F32Regs:$b), V2F32Regs:$c),
583239310Sdim          (Inst V2F32Regs:$a, V2F32Regs:$b, (VNegv2f32 V2F32Regs:$c))>,
584239310Sdim          Requires<[Pred]>;
585239310Sdim}
586239310Sdim
587239310Sdimdefm V2FMAF32ext_ftz  : V2FPCONTRACT32_SUB_PAT<F32FMA_ftzV2, doFMAF32AGG_ftz>;
588239310Sdimdefm V2FMADF32ext_ftz : V2FPCONTRACT32_SUB_PAT<F32MAD_ftzV2, doFMADF32_ftz>;
589239310Sdimdefm V2FMAF32ext  : V2FPCONTRACT32_SUB_PAT<F32FMAV2, doFMAF32AGG>;
590239310Sdimdefm V2FMADF32ext : V2FPCONTRACT32_SUB_PAT<F32MADV2, doFMADF32>;
591239310Sdim
592239310Sdimmulticlass V4FPCONTRACT32_SUB_PAT<NVPTXInst Inst, Predicate Pred> {
593239310Sdim  def : Pat<(fsub V4F32Regs:$a, (fmul V4F32Regs:$b, V4F32Regs:$c)),
594239310Sdim          (Inst (VNegv4f32 V4F32Regs:$b), V4F32Regs:$c,  V4F32Regs:$a)>,
595239310Sdim          Requires<[Pred]>;
596239310Sdim
597239310Sdim  def : Pat<(fsub (fmul V4F32Regs:$a, V4F32Regs:$b), V4F32Regs:$c),
598239310Sdim          (Inst V4F32Regs:$a, V4F32Regs:$b, (VNegv4f32 V4F32Regs:$c))>,
599239310Sdim          Requires<[Pred]>;
600239310Sdim}
601239310Sdim
602239310Sdimdefm V4FMAF32ext_ftz  : V4FPCONTRACT32_SUB_PAT<F32FMA_ftzV4, doFMAF32AGG_ftz>;
603239310Sdimdefm V4FMADF32ext_ftz : V4FPCONTRACT32_SUB_PAT<F32MAD_ftzV4, doFMADF32_ftz>;
604239310Sdimdefm V4FMAF32ext  : V4FPCONTRACT32_SUB_PAT<F32FMAV4, doFMAF32AGG>;
605239310Sdimdefm V4FMADF32ext : V4FPCONTRACT32_SUB_PAT<F32MADV4, doFMADF32>;
606239310Sdim
607239310Sdimmulticlass V2FPCONTRACT64_SUB_PAT<NVPTXInst Inst, Predicate Pred> {
608239310Sdim  def : Pat<(fsub V2F64Regs:$a, (fmul V2F64Regs:$b, V2F64Regs:$c)),
609239310Sdim          (Inst (VNegv2f64 V2F64Regs:$b), V2F64Regs:$c, V2F64Regs:$a)>,
610239310Sdim          Requires<[Pred]>;
611239310Sdim
612239310Sdim  def : Pat<(fsub (fmul V2F64Regs:$a, V2F64Regs:$b), V2F64Regs:$c),
613239310Sdim          (Inst V2F64Regs:$a, V2F64Regs:$b, (VNegv2f64 V2F64Regs:$c))>,
614239310Sdim          Requires<[Pred]>;
615239310Sdim}
616239310Sdim
617239310Sdimdefm V2FMAF64ext : V2FPCONTRACT64_SUB_PAT<F64FMAV2, doFMAF64AGG>;
618239310Sdim
619239310Sdimclass VecModStr<string vecsize, string elem, string extra, string l="">
620239310Sdim{
621239310Sdim  string t1 = !strconcat("${c", elem);
622239310Sdim  string t2 = !strconcat(t1, ":vecv");
623239310Sdim  string t3 = !strconcat(t2, vecsize);
624239310Sdim  string t4 = !strconcat(t3, extra);
625239310Sdim  string t5 = !strconcat(t4, l);
626239310Sdim  string s =  !strconcat(t5, "}");
627239310Sdim}
628239310Sdimclass ShuffleOneLine<string vecsize, string elem, string type>
629239310Sdim{
630239310Sdim  string t1 = VecModStr<vecsize, elem, "comm", "1">.s;
631239310Sdim  string t2 = !strconcat(t1, "mov.");
632239310Sdim  string t3 = !strconcat(t2, type);
633239310Sdim  string t4 = !strconcat(t3, " \t${dst}_");
634239310Sdim  string t5 = !strconcat(t4, elem);
635239310Sdim  string t6 = !strconcat(t5, ", $src1");
636239310Sdim  string t7 = !strconcat(t6, VecModStr<vecsize, elem, "pos">.s);
637239310Sdim  string t8 = !strconcat(t7, ";\n\t");
638239310Sdim  string t9 = !strconcat(t8, VecModStr<vecsize, elem, "comm", "2">.s);
639239310Sdim  string t10 = !strconcat(t9, "mov.");
640239310Sdim  string t11 = !strconcat(t10, type);
641239310Sdim  string t12 = !strconcat(t11, " \t${dst}_");
642239310Sdim  string t13 = !strconcat(t12, elem);
643239310Sdim  string t14 = !strconcat(t13, ", $src2");
644239310Sdim  string t15 = !strconcat(t14, VecModStr<vecsize, elem, "pos">.s);
645239310Sdim  string s =   !strconcat(t15, ";");
646239310Sdim}
647239310Sdimclass ShuffleAsmStr2<string type>
648239310Sdim{
649239310Sdim  string t1 = ShuffleOneLine<"2", "0", type>.s;
650239310Sdim  string t2 = !strconcat(t1, "\n\t");
651239310Sdim  string s  = !strconcat(t2, ShuffleOneLine<"2", "1", type>.s);
652239310Sdim}
653239310Sdimclass ShuffleAsmStr4<string type>
654239310Sdim{
655239310Sdim  string t1 = ShuffleOneLine<"4", "0", type>.s;
656239310Sdim  string t2 = !strconcat(t1, "\n\t");
657239310Sdim  string t3 = !strconcat(t2, ShuffleOneLine<"4", "1", type>.s);
658239310Sdim  string t4 = !strconcat(t3, "\n\t");
659239310Sdim  string t5 = !strconcat(t4, ShuffleOneLine<"4", "2", type>.s);
660239310Sdim  string t6 = !strconcat(t5, "\n\t");
661239310Sdim  string s  = !strconcat(t6, ShuffleOneLine<"4", "3", type>.s);
662239310Sdim}
663239310Sdim
664239310Sdimlet neverHasSideEffects=1, VecInstType=isVecShuffle.Value in {
665239310Sdimdef VecShuffle_v4f32 : NVPTXVecInst<(outs V4F32Regs:$dst),
666239310Sdim                       (ins  V4F32Regs:$src1, V4F32Regs:$src2,
667239310Sdim                             i8imm:$c0, i8imm:$c1, i8imm:$c2, i8imm:$c3),
668239310Sdim                 !strconcat("//Mov $dst, $src1, $src2, $c0, $c1, $c2, $c3;\n\t",
669239310Sdim                                 ShuffleAsmStr4<"f32">.s),
670239310Sdim                       [], FMOV32rr>;
671239310Sdim
672239310Sdimdef VecShuffle_v4i32 : NVPTXVecInst<(outs V4I32Regs:$dst),
673239310Sdim                       (ins  V4I32Regs:$src1, V4I32Regs:$src2,
674239310Sdim                             i8imm:$c0, i8imm:$c1, i8imm:$c2, i8imm:$c3),
675239310Sdim                 !strconcat("//Mov $dst, $src1, $src2, $c0, $c1, $c2, $c3;\n\t",
676239310Sdim                                 ShuffleAsmStr4<"u32">.s),
677239310Sdim                       [], IMOV32rr>;
678239310Sdim
679239310Sdimdef VecShuffle_v4i16 : NVPTXVecInst<(outs V4I16Regs:$dst),
680239310Sdim                       (ins  V4I16Regs:$src1, V4I16Regs:$src2,
681239310Sdim                             i8imm:$c0, i8imm:$c1, i8imm:$c2, i8imm:$c3),
682239310Sdim                 !strconcat("//Mov $dst, $src1, $src2, $c0, $c1, $c2, $c3;\n\t",
683239310Sdim                                 ShuffleAsmStr4<"u16">.s),
684239310Sdim                       [], IMOV16rr>;
685239310Sdim
686239310Sdimdef VecShuffle_v4i8 : NVPTXVecInst<(outs V4I8Regs:$dst),
687239310Sdim                       (ins  V4I8Regs:$src1, V4I8Regs:$src2,
688239310Sdim                             i8imm:$c0, i8imm:$c1, i8imm:$c2, i8imm:$c3),
689239310Sdim                 !strconcat("//Mov $dst, $src1, $src2, $c0, $c1, $c2, $c3;\n\t",
690239310Sdim                                 ShuffleAsmStr4<"u16">.s),
691239310Sdim                       [], IMOV8rr>;
692239310Sdim
693239310Sdimdef VecShuffle_v2f32 : NVPTXVecInst<(outs V2F32Regs:$dst),
694239310Sdim                       (ins  V2F32Regs:$src1, V2F32Regs:$src2,
695239310Sdim                             i8imm:$c0, i8imm:$c1),
696239310Sdim                       !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t",
697239310Sdim                                 ShuffleAsmStr2<"f32">.s),
698239310Sdim                       [], FMOV32rr>;
699239310Sdim
700239310Sdimdef VecShuffle_v2i32 : NVPTXVecInst<(outs V2I32Regs:$dst),
701239310Sdim                       (ins  V2I32Regs:$src1, V2I32Regs:$src2,
702239310Sdim                             i8imm:$c0, i8imm:$c1),
703239310Sdim                       !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t",
704239310Sdim                                 ShuffleAsmStr2<"u32">.s),
705239310Sdim                       [], IMOV32rr>;
706239310Sdim
707239310Sdimdef VecShuffle_v2i8 : NVPTXVecInst<(outs V2I8Regs:$dst),
708239310Sdim                       (ins  V2I8Regs:$src1, V2I8Regs:$src2,
709239310Sdim                             i8imm:$c0, i8imm:$c1),
710239310Sdim                       !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t",
711239310Sdim                                 ShuffleAsmStr2<"u16">.s),
712239310Sdim                       [], IMOV8rr>;
713239310Sdim
714239310Sdimdef VecShuffle_v2i16 : NVPTXVecInst<(outs V2I16Regs:$dst),
715239310Sdim                       (ins  V2I16Regs:$src1, V2I16Regs:$src2,
716239310Sdim                             i8imm:$c0, i8imm:$c1),
717239310Sdim                       !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t",
718239310Sdim                                 ShuffleAsmStr2<"u16">.s),
719239310Sdim                       [], IMOV16rr>;
720239310Sdim
721239310Sdimdef VecShuffle_v2f64 : NVPTXVecInst<(outs V2F64Regs:$dst),
722239310Sdim                       (ins  V2F64Regs:$src1, V2F64Regs:$src2,
723239310Sdim                             i8imm:$c0, i8imm:$c1),
724239310Sdim                       !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t",
725239310Sdim                                 ShuffleAsmStr2<"f64">.s),
726239310Sdim                       [], FMOV64rr>;
727239310Sdim
728239310Sdimdef VecShuffle_v2i64 : NVPTXVecInst<(outs V2I64Regs:$dst),
729239310Sdim                       (ins  V2I64Regs:$src1, V2I64Regs:$src2,
730239310Sdim                             i8imm:$c0, i8imm:$c1),
731239310Sdim                       !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t",
732239310Sdim                                 ShuffleAsmStr2<"u64">.s),
733239310Sdim                       [], IMOV64rr>;
734239310Sdim}
735239310Sdim
736239310Sdimdef ShuffleMask0 : SDNodeXForm<vector_shuffle, [{
737239310Sdim  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
738239310Sdim  return CurDAG->getTargetConstant(SVOp->getMaskElt(0), MVT::i32);
739239310Sdim}]>;
740239310Sdimdef ShuffleMask1 : SDNodeXForm<vector_shuffle, [{
741239310Sdim  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
742239310Sdim  return CurDAG->getTargetConstant(SVOp->getMaskElt(1), MVT::i32);
743239310Sdim}]>;
744239310Sdimdef ShuffleMask2 : SDNodeXForm<vector_shuffle, [{
745239310Sdim  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
746239310Sdim  return CurDAG->getTargetConstant(SVOp->getMaskElt(2), MVT::i32);
747239310Sdim}]>;
748239310Sdimdef ShuffleMask3 : SDNodeXForm<vector_shuffle, [{
749239310Sdim  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
750239310Sdim  return CurDAG->getTargetConstant(SVOp->getMaskElt(3), MVT::i32);
751239310Sdim}]>;
752239310Sdim
753239310Sdim// The spurious call is here to silence a compiler warning about N being
754239310Sdim// unused.
755239310Sdimdef vec_shuf : PatFrag<(ops node:$lhs, node:$rhs),
756239310Sdim                       (vector_shuffle node:$lhs, node:$rhs),
757239310Sdim                       [{ N->getGluedNode(); return true; }]>;
758239310Sdim
759239310Sdimdef : Pat<(v2f64 (vec_shuf:$op V2F64Regs:$src1, V2F64Regs:$src2)),
760239310Sdim          (VecShuffle_v2f64 V2F64Regs:$src1, V2F64Regs:$src2,
761239310Sdim                            (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>;
762239310Sdim
763239310Sdimdef : Pat<(v4f32 (vec_shuf:$op V4F32Regs:$src1, V4F32Regs:$src2)),
764239310Sdim          (VecShuffle_v4f32 V4F32Regs:$src1, V4F32Regs:$src2,
765239310Sdim                            (ShuffleMask0 node:$op), (ShuffleMask1 node:$op),
766239310Sdim                            (ShuffleMask2 node:$op), (ShuffleMask3 node:$op))>;
767239310Sdim
768239310Sdimdef : Pat<(v2f32 (vec_shuf:$op V2F32Regs:$src1, V2F32Regs:$src2)),
769239310Sdim          (VecShuffle_v2f32 V2F32Regs:$src1, V2F32Regs:$src2,
770239310Sdim                            (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>;
771239310Sdim
772239310Sdimdef : Pat<(v2i64 (vec_shuf:$op V2I64Regs:$src1, V2I64Regs:$src2)),
773239310Sdim          (VecShuffle_v2i64 V2I64Regs:$src1, V2I64Regs:$src2,
774239310Sdim                            (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>;
775239310Sdim
776239310Sdimdef : Pat<(v4i32 (vec_shuf:$op V4I32Regs:$src1, V4I32Regs:$src2)),
777239310Sdim          (VecShuffle_v4i32 V4I32Regs:$src1, V4I32Regs:$src2,
778239310Sdim                            (ShuffleMask0 node:$op), (ShuffleMask1 node:$op),
779239310Sdim                            (ShuffleMask2 node:$op), (ShuffleMask3 node:$op))>;
780239310Sdim
781239310Sdimdef : Pat<(v2i32 (vec_shuf:$op V2I32Regs:$src1, V2I32Regs:$src2)),
782239310Sdim          (VecShuffle_v2i32 V2I32Regs:$src1, V2I32Regs:$src2,
783239310Sdim                            (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>;
784239310Sdim
785239310Sdimdef : Pat<(v4i16 (vec_shuf:$op V4I16Regs:$src1, V4I16Regs:$src2)),
786239310Sdim          (VecShuffle_v4i16 V4I16Regs:$src1, V4I16Regs:$src2,
787239310Sdim                            (ShuffleMask0 node:$op), (ShuffleMask1 node:$op),
788239310Sdim                            (ShuffleMask2 node:$op), (ShuffleMask3 node:$op))>;
789239310Sdim
790239310Sdimdef : Pat<(v2i16 (vec_shuf:$op V2I16Regs:$src1, V2I16Regs:$src2)),
791239310Sdim          (VecShuffle_v2i16 V2I16Regs:$src1, V2I16Regs:$src2,
792239310Sdim                            (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>;
793239310Sdim
794239310Sdimdef : Pat<(v4i8 (vec_shuf:$op V4I8Regs:$src1, V4I8Regs:$src2)),
795239310Sdim          (VecShuffle_v4i8 V4I8Regs:$src1, V4I8Regs:$src2,
796239310Sdim                            (ShuffleMask0 node:$op), (ShuffleMask1 node:$op),
797239310Sdim                            (ShuffleMask2 node:$op), (ShuffleMask3 node:$op))>;
798239310Sdim
799239310Sdimdef : Pat<(v2i8 (vec_shuf:$op V2I8Regs:$src1, V2I8Regs:$src2)),
800239310Sdim          (VecShuffle_v2i8 V2I8Regs:$src1, V2I8Regs:$src2,
801239310Sdim                            (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>;
802239310Sdim
803239310Sdimclass Build_Vector2<string asmstr, NVPTXRegClass vclass, NVPTXRegClass sclass,
804239310Sdim  NVPTXInst si>
805239310Sdim                   : NVPTXVecInst<(outs vclass:$dst),
806239310Sdim                   (ins  sclass:$a1, sclass:$a2),
807239310Sdim                   !strconcat(asmstr, "\t${dst:vecfull}, {{$a1, $a2}};"),
808239310Sdim                   [(set vclass:$dst, (build_vector sclass:$a1, sclass:$a2))],
809239310Sdim                   si>;
810239310Sdimclass Build_Vector4<string asmstr, NVPTXRegClass vclass, NVPTXRegClass sclass,
811239310Sdim  NVPTXInst si>
812239310Sdim                   : NVPTXVecInst<(outs vclass:$dst),
813239310Sdim                   (ins  sclass:$a1, sclass:$a2, sclass:$a3, sclass:$a4),
814239310Sdim               !strconcat(asmstr, "\t${dst:vecfull}, {{$a1, $a2, $a3, $a4}};"),
815239310Sdim                   [(set vclass:$dst,
816239310Sdim                     (build_vector sclass:$a1, sclass:$a2,
817239310Sdim                       sclass:$a3, sclass:$a4))], si>;
818239310Sdim
819239310Sdimlet isAsCheapAsAMove=1, VecInstType=isVecBuild.Value in {
820239310Sdimdef Build_Vector2_f32 : Build_Vector2<"mov.v2.f32", V2F32Regs, Float32Regs,
821239310Sdim  FMOV32rr>;
822239310Sdimdef Build_Vector2_f64 : Build_Vector2<"mov.v2.f64", V2F64Regs, Float64Regs,
823239310Sdim  FMOV64rr>;
824239310Sdim
825239310Sdimdef Build_Vector2_i32 : Build_Vector2<"mov.v2.u32", V2I32Regs, Int32Regs,
826239310Sdim  IMOV32rr>;
827239310Sdimdef Build_Vector2_i64 : Build_Vector2<"mov.v2.u64", V2I64Regs, Int64Regs,
828239310Sdim  IMOV64rr>;
829239310Sdimdef Build_Vector2_i16 : Build_Vector2<"mov.v2.u16", V2I16Regs, Int16Regs,
830239310Sdim  IMOV16rr>;
831239310Sdimdef Build_Vector2_i8  : Build_Vector2<"mov.v2.u16",  V2I8Regs,  Int8Regs,
832239310Sdim  IMOV8rr>;
833239310Sdim
834239310Sdimdef Build_Vector4_f32 : Build_Vector4<"mov.v4.f32", V4F32Regs, Float32Regs,
835239310Sdim  FMOV32rr>;
836239310Sdim
837239310Sdimdef Build_Vector4_i32 : Build_Vector4<"mov.v4.u32", V4I32Regs, Int32Regs,
838239310Sdim  IMOV32rr>;
839239310Sdimdef Build_Vector4_i16 : Build_Vector4<"mov.v4.u16", V4I16Regs, Int16Regs,
840239310Sdim  IMOV16rr>;
841239310Sdimdef Build_Vector4_i8  : Build_Vector4<"mov.v4.u16", V4I8Regs, Int8Regs,
842239310Sdim  IMOV8rr>;
843239310Sdim}
844239310Sdim
845239310Sdimclass Vec_Move<string asmstr, NVPTXRegClass vclass, NVPTXInst sop=NOP>
846239310Sdim                 : NVPTXVecInst<(outs vclass:$dst), (ins vclass:$src),
847239310Sdim                   !strconcat(asmstr, "\t${dst:vecfull}, ${src:vecfull};"),
848239310Sdim                   [], sop>;
849239310Sdim
850239310Sdimlet isAsCheapAsAMove=1, neverHasSideEffects=1, IsSimpleMove=1,
851239310Sdim  VecInstType=isVecOther.Value in {
852239310Sdimdef V4f32Mov : Vec_Move<"mov.v4.f32", V4F32Regs, FMOV32rr>;
853239310Sdimdef V2f32Mov : Vec_Move<"mov.v2.f32", V2F32Regs, FMOV32rr>;
854239310Sdim
855239310Sdimdef V4i32Mov : Vec_Move<"mov.v4.u32", V4I32Regs, IMOV32rr>;
856239310Sdimdef V2i32Mov : Vec_Move<"mov.v2.u32", V2I32Regs, IMOV32rr>;
857239310Sdim
858239310Sdimdef V4i16Mov : Vec_Move<"mov.v4.u16", V4I16Regs, IMOV16rr>;
859239310Sdimdef V2i16Mov : Vec_Move<"mov.v2.u16", V2I16Regs, IMOV16rr>;
860239310Sdim
861239310Sdimdef V4i8Mov : Vec_Move<"mov.v4.u16", V4I8Regs, IMOV8rr>;
862239310Sdimdef V2i8Mov : Vec_Move<"mov.v2.u16", V2I8Regs, IMOV8rr>;
863239310Sdim
864239310Sdimdef V2f64Mov : Vec_Move<"mov.v2.f64", V2F64Regs, FMOV64rr>;
865239310Sdimdef V2i64Mov : Vec_Move<"mov.v2.u64", V2I64Regs, IMOV64rr>;
866239310Sdim}
867239310Sdim
868239310Sdim// extract subvector patterns
869239310Sdimdef extract_subvec : SDNode<"ISD::EXTRACT_SUBVECTOR",
870239310Sdim                        SDTypeProfile<1, 2, [SDTCisPtrTy<2>]>>;
871239310Sdim
872239310Sdimdef : Pat<(v2f32 (extract_subvec V4F32Regs:$src, 0)),
873239310Sdim                 (Build_Vector2_f32 (V4f32Extract V4F32Regs:$src, 0),
874239310Sdim                                    (V4f32Extract V4F32Regs:$src, 1))>;
875239310Sdimdef : Pat<(v2f32 (extract_subvec V4F32Regs:$src, 2)),
876239310Sdim                 (Build_Vector2_f32 (V4f32Extract V4F32Regs:$src, 2),
877239310Sdim                                    (V4f32Extract V4F32Regs:$src, 3))>;
878239310Sdimdef : Pat<(v2i32 (extract_subvec V4I32Regs:$src, 0)),
879239310Sdim                 (Build_Vector2_i32 (V4i32Extract V4I32Regs:$src, 0),
880239310Sdim                                    (V4i32Extract V4I32Regs:$src, 1))>;
881239310Sdimdef : Pat<(v2i32 (extract_subvec V4I32Regs:$src, 2)),
882239310Sdim                 (Build_Vector2_i32 (V4i32Extract V4I32Regs:$src, 2),
883239310Sdim                                    (V4i32Extract V4I32Regs:$src, 3))>;
884239310Sdimdef : Pat<(v2i16 (extract_subvec V4I16Regs:$src, 0)),
885239310Sdim                 (Build_Vector2_i16 (V4i16Extract V4I16Regs:$src, 0),
886239310Sdim                                    (V4i16Extract V4I16Regs:$src, 1))>;
887239310Sdimdef : Pat<(v2i16 (extract_subvec V4I16Regs:$src, 2)),
888239310Sdim                 (Build_Vector2_i16 (V4i16Extract V4I16Regs:$src, 2),
889239310Sdim                                    (V4i16Extract V4I16Regs:$src, 3))>;
890239310Sdimdef : Pat<(v2i8 (extract_subvec V4I8Regs:$src, 0)),
891239310Sdim                 (Build_Vector2_i8 (V4i8Extract V4I8Regs:$src, 0),
892239310Sdim                                    (V4i8Extract V4I8Regs:$src, 1))>;
893239310Sdimdef : Pat<(v2i8 (extract_subvec V4I8Regs:$src, 2)),
894239310Sdim                 (Build_Vector2_i8 (V4i8Extract V4I8Regs:$src, 2),
895239310Sdim                                    (V4i8Extract V4I8Regs:$src, 3))>;
896239310Sdim
897239310Sdim// Select instructions
898239310Sdimclass Select_OneLine<string type, string pos> {
899239310Sdim  string t1 = !strconcat("selp.", type);
900239310Sdim  string t2 = !strconcat(t1, " \t${dst}_");
901239310Sdim  string t3 = !strconcat(t2, pos);
902239310Sdim  string t4 = !strconcat(t3, ", ${src1}_");
903239310Sdim  string t5 = !strconcat(t4, pos);
904239310Sdim  string t6 = !strconcat(t5, ", ${src2}_");
905239310Sdim  string t7 = !strconcat(t6, pos);
906239310Sdim  string s  = !strconcat(t7, ", $p;");
907239310Sdim}
908239310Sdim
909239310Sdimclass Select_Str2<string type> {
910239310Sdim  string t1 = Select_OneLine<type, "0">.s;
911239310Sdim  string t2 = !strconcat(t1, "\n\t");
912239310Sdim  string s  = !strconcat(t2, Select_OneLine<type, "1">.s);
913239310Sdim}
914239310Sdim
915239310Sdimclass Select_Str4<string type> {
916239310Sdim  string t1 = Select_OneLine<type, "0">.s;
917239310Sdim  string t2 = !strconcat(t1, "\n\t");
918239310Sdim  string t3 = !strconcat(t2, Select_OneLine<type, "1">.s);
919239310Sdim  string t4 = !strconcat(t3, "\n\t");
920239310Sdim  string t5 = !strconcat(t4, Select_OneLine<type, "2">.s);
921239310Sdim  string t6 = !strconcat(t5, "\n\t");
922239310Sdim  string s  = !strconcat(t6, Select_OneLine<type, "3">.s);
923239310Sdim
924239310Sdim}
925239310Sdim
926239310Sdimclass Vec_Select<NVPTXRegClass vclass, string asmstr, NVPTXInst sop>
927239310Sdim      : NVPTXVecInst<(outs vclass:$dst),
928239310Sdim                     (ins  vclass:$src1, vclass:$src2, Int1Regs:$p),
929239310Sdim                     asmstr,
930239310Sdim                     [(set vclass:$dst, (select Int1Regs:$p, vclass:$src1,
931239310Sdim                       vclass:$src2))],
932239310Sdim                     sop>;
933239310Sdim
934239310Sdimlet VecInstType=isVecOther.Value in {
935239310Sdimdef V2I64_Select : Vec_Select<V2I64Regs, Select_Str2<"b64">.s, SELECTi64rr>;
936239310Sdimdef V4I32_Select : Vec_Select<V4I32Regs, Select_Str4<"b32">.s, SELECTi32rr>;
937239310Sdimdef V2I32_Select : Vec_Select<V2I32Regs, Select_Str2<"b32">.s, SELECTi32rr>;
938239310Sdimdef V4I16_Select : Vec_Select<V4I16Regs, Select_Str4<"b16">.s, SELECTi16rr>;
939239310Sdimdef V2I16_Select : Vec_Select<V2I16Regs, Select_Str2<"b16">.s, SELECTi16rr>;
940239310Sdimdef V4I8_Select  : Vec_Select<V4I8Regs,  Select_Str4<"b16">.s, SELECTi8rr>;
941239310Sdimdef V2I8_Select  : Vec_Select<V2I8Regs,  Select_Str2<"b16">.s, SELECTi8rr>;
942239310Sdim
943239310Sdimdef V2F64_Select : Vec_Select<V2F64Regs, Select_Str2<"f64">.s, SELECTf64rr>;
944239310Sdimdef V4F32_Select : Vec_Select<V4F32Regs, Select_Str4<"f32">.s, SELECTf32rr>;
945239310Sdimdef V2F32_Select : Vec_Select<V2F32Regs, Select_Str2<"f32">.s, SELECTf32rr>;
946239310Sdim}
947239310Sdim
948239310Sdim// Comparison instructions
949239310Sdim
950239310Sdim// setcc convenience fragments.
951239310Sdimdef vsetoeq : PatFrag<(ops node:$lhs, node:$rhs),
952239310Sdim                      (setcc node:$lhs, node:$rhs, SETOEQ)>;
953239310Sdimdef vsetogt : PatFrag<(ops node:$lhs, node:$rhs),
954239310Sdim                      (setcc node:$lhs, node:$rhs, SETOGT)>;
955239310Sdimdef vsetoge : PatFrag<(ops node:$lhs, node:$rhs),
956239310Sdim                      (setcc node:$lhs, node:$rhs, SETOGE)>;
957239310Sdimdef vsetolt : PatFrag<(ops node:$lhs, node:$rhs),
958239310Sdim                      (setcc node:$lhs, node:$rhs, SETOLT)>;
959239310Sdimdef vsetole : PatFrag<(ops node:$lhs, node:$rhs),
960239310Sdim                      (setcc node:$lhs, node:$rhs, SETOLE)>;
961239310Sdimdef vsetone : PatFrag<(ops node:$lhs, node:$rhs),
962239310Sdim                      (setcc node:$lhs, node:$rhs, SETONE)>;
963239310Sdimdef vseto   : PatFrag<(ops node:$lhs, node:$rhs),
964239310Sdim                      (setcc node:$lhs, node:$rhs, SETO)>;
965239310Sdimdef vsetuo  : PatFrag<(ops node:$lhs, node:$rhs),
966239310Sdim                      (setcc node:$lhs, node:$rhs, SETUO)>;
967239310Sdimdef vsetueq : PatFrag<(ops node:$lhs, node:$rhs),
968239310Sdim                      (setcc node:$lhs, node:$rhs, SETUEQ)>;
969239310Sdimdef vsetugt : PatFrag<(ops node:$lhs, node:$rhs),
970239310Sdim                      (setcc node:$lhs, node:$rhs, SETUGT)>;
971239310Sdimdef vsetuge : PatFrag<(ops node:$lhs, node:$rhs),
972239310Sdim                      (setcc node:$lhs, node:$rhs, SETUGE)>;
973239310Sdimdef vsetult : PatFrag<(ops node:$lhs, node:$rhs),
974239310Sdim                      (setcc node:$lhs, node:$rhs, SETULT)>;
975239310Sdimdef vsetule : PatFrag<(ops node:$lhs, node:$rhs),
976239310Sdim                      (setcc node:$lhs, node:$rhs, SETULE)>;
977239310Sdimdef vsetune : PatFrag<(ops node:$lhs, node:$rhs),
978239310Sdim                      (setcc node:$lhs, node:$rhs, SETUNE)>;
979239310Sdimdef vseteq  : PatFrag<(ops node:$lhs, node:$rhs),
980239310Sdim                      (setcc node:$lhs, node:$rhs, SETEQ)>;
981239310Sdimdef vsetgt  : PatFrag<(ops node:$lhs, node:$rhs),
982239310Sdim                      (setcc node:$lhs, node:$rhs, SETGT)>;
983239310Sdimdef vsetge  : PatFrag<(ops node:$lhs, node:$rhs),
984239310Sdim                      (setcc node:$lhs, node:$rhs, SETGE)>;
985239310Sdimdef vsetlt  : PatFrag<(ops node:$lhs, node:$rhs),
986239310Sdim                      (setcc node:$lhs, node:$rhs, SETLT)>;
987239310Sdimdef vsetle  : PatFrag<(ops node:$lhs, node:$rhs),
988239310Sdim                      (setcc node:$lhs, node:$rhs, SETLE)>;
989239310Sdimdef vsetne  : PatFrag<(ops node:$lhs, node:$rhs),
990239310Sdim                      (setcc node:$lhs, node:$rhs, SETNE)>;
991239310Sdim
992239310Sdimclass Vec_Compare<PatFrag op, NVPTXRegClass outrclass, NVPTXRegClass inrclass,
993239310Sdim  NVPTXInst sop>
994239310Sdim    : NVPTXVecInst<(outs outrclass:$dst),
995239310Sdim                   (ins  inrclass:$a, inrclass:$b),
996239310Sdim                   "Unsupported",
997239310Sdim                   [(set outrclass:$dst, (op inrclass:$a, inrclass:$b))],
998239310Sdim                   sop>;
999239310Sdim
1000239310Sdimmulticlass Vec_Compare_All<PatFrag op,
1001239310Sdim                           NVPTXInst inst8,
1002239310Sdim                           NVPTXInst inst16,
1003239310Sdim                           NVPTXInst inst32,
1004239310Sdim                           NVPTXInst inst64>
1005239310Sdim{
1006239310Sdim  def  V2I8 : Vec_Compare<op, V2I8Regs,  V2I8Regs,  inst8>;
1007239310Sdim  def  V4I8 : Vec_Compare<op, V4I8Regs,  V4I8Regs,  inst8>;
1008239310Sdim  def V2I16 : Vec_Compare<op, V2I16Regs, V2I16Regs, inst16>;
1009239310Sdim  def V4I16 : Vec_Compare<op, V4I16Regs, V4I16Regs, inst16>;
1010239310Sdim  def V2I32 : Vec_Compare<op, V2I32Regs, V2I32Regs, inst32>;
1011239310Sdim  def V4I32 : Vec_Compare<op, V4I32Regs, V4I32Regs, inst32>;
1012239310Sdim  def V2I64 : Vec_Compare<op, V2I64Regs, V2I64Regs, inst64>;
1013239310Sdim}
1014239310Sdim
1015239310Sdimlet VecInstType=isVecOther.Value in {
1016239310Sdim  defm VecSGT : Vec_Compare_All<vsetgt,  ISetSGTi8rr_toi8, ISetSGTi16rr_toi16,
1017239310Sdim    ISetSGTi32rr_toi32, ISetSGTi64rr_toi64>;
1018239310Sdim  defm VecUGT : Vec_Compare_All<vsetugt, ISetUGTi8rr_toi8, ISetUGTi16rr_toi16,
1019239310Sdim    ISetUGTi32rr_toi32, ISetUGTi64rr_toi64>;
1020239310Sdim  defm VecSLT : Vec_Compare_All<vsetlt,  ISetSLTi8rr_toi8, ISetSLTi16rr_toi16,
1021239310Sdim    ISetSLTi32rr_toi32, ISetSLTi64rr_toi64>;
1022239310Sdim  defm VecULT : Vec_Compare_All<vsetult, ISetULTi8rr_toi8, ISetULTi16rr_toi16,
1023239310Sdim    ISetULTi32rr_toi32, ISetULTi64rr_toi64>;
1024239310Sdim  defm VecSGE : Vec_Compare_All<vsetge,  ISetSGEi8rr_toi8, ISetSGEi16rr_toi16,
1025239310Sdim    ISetSGEi32rr_toi32, ISetSGEi64rr_toi64>;
1026239310Sdim  defm VecUGE : Vec_Compare_All<vsetuge, ISetUGEi8rr_toi8, ISetUGEi16rr_toi16,
1027239310Sdim    ISetUGEi32rr_toi32, ISetUGEi64rr_toi64>;
1028239310Sdim  defm VecSLE : Vec_Compare_All<vsetle,  ISetSLEi8rr_toi8, ISetSLEi16rr_toi16,
1029239310Sdim    ISetSLEi32rr_toi32, ISetSLEi64rr_toi64>;
1030239310Sdim  defm VecULE : Vec_Compare_All<vsetule, ISetULEi8rr_toi8, ISetULEi16rr_toi16,
1031239310Sdim    ISetULEi32rr_toi32, ISetULEi64rr_toi64>;
1032239310Sdim  defm VecSEQ : Vec_Compare_All<vseteq,  ISetSEQi8rr_toi8, ISetSEQi16rr_toi16,
1033239310Sdim    ISetSEQi32rr_toi32, ISetSEQi64rr_toi64>;
1034239310Sdim  defm VecUEQ : Vec_Compare_All<vsetueq, ISetUEQi8rr_toi8, ISetUEQi16rr_toi16,
1035239310Sdim    ISetUEQi32rr_toi32, ISetUEQi64rr_toi64>;
1036239310Sdim  defm VecSNE : Vec_Compare_All<vsetne,  ISetSNEi8rr_toi8, ISetSNEi16rr_toi16,
1037239310Sdim    ISetSNEi32rr_toi32, ISetSNEi64rr_toi64>;
1038239310Sdim  defm VecUNE : Vec_Compare_All<vsetune, ISetUNEi8rr_toi8, ISetUNEi16rr_toi16,
1039239310Sdim    ISetUNEi32rr_toi32, ISetUNEi64rr_toi64>;
1040239310Sdim}
1041239310Sdim
1042239310Sdimmulticlass FVec_Compare_All<PatFrag op,
1043239310Sdim                            NVPTXInst instf32,
1044239310Sdim                            NVPTXInst instf64>
1045239310Sdim{
1046239310Sdim  def V2F32 : Vec_Compare<op, V2I32Regs, V2F32Regs, instf32>;
1047239310Sdim  def V4F32 : Vec_Compare<op, V4I32Regs, V4F32Regs, instf32>;
1048239310Sdim  def V2F64 : Vec_Compare<op, V2I64Regs, V2F64Regs, instf64>;
1049239310Sdim}
1050239310Sdim
1051239310Sdimlet VecInstType=isVecOther.Value in {
1052239310Sdim  defm FVecGT :  FVec_Compare_All<vsetogt, FSetGTf32rr_toi32,
1053239310Sdim    FSetGTf64rr_toi64>;
1054239310Sdim  defm FVecLT :  FVec_Compare_All<vsetolt, FSetLTf32rr_toi32,
1055239310Sdim    FSetLTf64rr_toi64>;
1056239310Sdim  defm FVecGE :  FVec_Compare_All<vsetoge, FSetGEf32rr_toi32,
1057239310Sdim    FSetGEf64rr_toi64>;
1058239310Sdim  defm FVecLE :  FVec_Compare_All<vsetole, FSetLEf32rr_toi32,
1059239310Sdim    FSetLEf64rr_toi64>;
1060239310Sdim  defm FVecEQ :  FVec_Compare_All<vsetoeq, FSetEQf32rr_toi32,
1061239310Sdim    FSetEQf64rr_toi64>;
1062239310Sdim  defm FVecNE :  FVec_Compare_All<vsetone, FSetNEf32rr_toi32,
1063239310Sdim    FSetNEf64rr_toi64>;
1064239310Sdim
1065239310Sdim  defm FVecUGT :  FVec_Compare_All<vsetugt, FSetUGTf32rr_toi32,
1066239310Sdim    FSetUGTf64rr_toi64>;
1067239310Sdim  defm FVecULT :  FVec_Compare_All<vsetult, FSetULTf32rr_toi32,
1068239310Sdim    FSetULTf64rr_toi64>;
1069239310Sdim  defm FVecUGE :  FVec_Compare_All<vsetuge, FSetUGEf32rr_toi32,
1070239310Sdim    FSetUGEf64rr_toi64>;
1071239310Sdim  defm FVecULE :  FVec_Compare_All<vsetule, FSetULEf32rr_toi32,
1072239310Sdim    FSetULEf64rr_toi64>;
1073239310Sdim  defm FVecUEQ :  FVec_Compare_All<vsetueq, FSetUEQf32rr_toi32,
1074239310Sdim    FSetUEQf64rr_toi64>;
1075239310Sdim  defm FVecUNE :  FVec_Compare_All<vsetune, FSetUNEf32rr_toi32,
1076239310Sdim    FSetUNEf64rr_toi64>;
1077239310Sdim
1078239310Sdim  defm FVecNUM :  FVec_Compare_All<vseto,  FSetNUMf32rr_toi32,
1079239310Sdim    FSetNUMf64rr_toi64>;
1080239310Sdim  defm FVecNAN :  FVec_Compare_All<vsetuo, FSetNANf32rr_toi32,
1081239310Sdim    FSetNANf64rr_toi64>;
1082239310Sdim}
1083239310Sdim
1084239310Sdimclass LoadParamScalar4Inst<NVPTXRegClass regclass, string opstr> :
1085239310Sdim      NVPTXInst<(outs regclass:$d1, regclass:$d2, regclass:$d3, regclass:$d4),
1086239310Sdim                (ins i32imm:$a, i32imm:$b),
1087239310Sdim                !strconcat(!strconcat("ld.param", opstr),
1088239310Sdim                  "\t{{$d1, $d2, $d3, $d4}}, [retval0+$b];"), []>;
1089239310Sdim
1090239310Sdimclass LoadParamScalar2Inst<NVPTXRegClass regclass, string opstr> :
1091239310Sdim      NVPTXInst<(outs regclass:$d1, regclass:$d2),
1092239310Sdim                (ins i32imm:$a, i32imm:$b),
1093239310Sdim                !strconcat(!strconcat("ld.param", opstr),
1094239310Sdim                  "\t{{$d1, $d2}}, [retval0+$b];"), []>;
1095239310Sdim
1096239310Sdim
1097239310Sdimclass StoreParamScalar4Inst<NVPTXRegClass regclass, string opstr> :
1098239310Sdim      NVPTXInst<(outs),
1099239310Sdim                (ins regclass:$s1, regclass:$s2, regclass:$s3, regclass:$s4,
1100239310Sdim                  i32imm:$a, i32imm:$b),
1101239310Sdim                !strconcat(!strconcat("st.param", opstr),
1102239310Sdim                  "\t[param$a+$b], {{$s1, $s2, $s3, $s4}};"), []>;
1103239310Sdim
1104239310Sdimclass StoreParamScalar2Inst<NVPTXRegClass regclass, string opstr> :
1105239310Sdim      NVPTXInst<(outs),
1106239310Sdim                (ins regclass:$s1, regclass:$s2, i32imm:$a, i32imm:$b),
1107239310Sdim                !strconcat(!strconcat("st.param", opstr),
1108239310Sdim                  "\t[param$a+$b], {{$s1, $s2}};"), []>;
1109239310Sdim
1110239310Sdimclass StoreRetvalScalar4Inst<NVPTXRegClass regclass, string opstr> :
1111239310Sdim      NVPTXInst<(outs),
1112239310Sdim                (ins regclass:$s1, regclass:$s2, regclass:$s3, regclass:$s4,
1113239310Sdim                  i32imm:$a),
1114239310Sdim                !strconcat(!strconcat("st.param", opstr),
1115239310Sdim                  "\t[func_retval+$a], {{$s1, $s2, $s3, $s4}};"), []>;
1116239310Sdim
1117239310Sdimclass StoreRetvalScalar2Inst<NVPTXRegClass regclass, string opstr> :
1118239310Sdim      NVPTXInst<(outs),
1119239310Sdim                (ins regclass:$s1, regclass:$s2, i32imm:$a),
1120239310Sdim                !strconcat(!strconcat("st.param", opstr),
1121239310Sdim                  "\t[func_retval+$a], {{$s1, $s2}};"), []>;
1122239310Sdim
1123239310Sdimdef LoadParamScalar4I32 : LoadParamScalar4Inst<Int32Regs, ".v4.b32">;
1124239310Sdimdef LoadParamScalar4I16 : LoadParamScalar4Inst<Int16Regs, ".v4.b16">;
1125239310Sdimdef LoadParamScalar4I8  : LoadParamScalar4Inst<Int8Regs, ".v4.b8">;
1126239310Sdim
1127239310Sdimdef LoadParamScalar2I64 : LoadParamScalar2Inst<Int32Regs, ".v2.b64">;
1128239310Sdimdef LoadParamScalar2I32 : LoadParamScalar2Inst<Int32Regs, ".v2.b32">;
1129239310Sdimdef LoadParamScalar2I16 : LoadParamScalar2Inst<Int32Regs, ".v2.b16">;
1130239310Sdimdef LoadParamScalar2I8  : LoadParamScalar2Inst<Int32Regs, ".v2.b8">;
1131239310Sdim
1132239310Sdimdef LoadParamScalar4F32 : LoadParamScalar4Inst<Float32Regs, ".v4.f32">;
1133239310Sdimdef LoadParamScalar2F32 : LoadParamScalar2Inst<Float32Regs, ".v2.f32">;
1134239310Sdimdef LoadParamScalar2F64 : LoadParamScalar2Inst<Float64Regs, ".v2.f64">;
1135239310Sdim
1136239310Sdimdef StoreParamScalar4I32 : StoreParamScalar4Inst<Int32Regs, ".v4.b32">;
1137239310Sdimdef StoreParamScalar4I16 : StoreParamScalar4Inst<Int16Regs, ".v4.b16">;
1138239310Sdimdef StoreParamScalar4I8  : StoreParamScalar4Inst<Int8Regs, ".v4.b8">;
1139239310Sdim
1140239310Sdimdef StoreParamScalar2I64 : StoreParamScalar2Inst<Int64Regs, ".v2.b64">;
1141239310Sdimdef StoreParamScalar2I32 : StoreParamScalar2Inst<Int32Regs, ".v2.b32">;
1142239310Sdimdef StoreParamScalar2I16 : StoreParamScalar2Inst<Int16Regs, ".v2.b16">;
1143239310Sdimdef StoreParamScalar2I8  : StoreParamScalar2Inst<Int8Regs, ".v2.b8">;
1144239310Sdim
1145239310Sdimdef StoreParamScalar4F32 : StoreParamScalar4Inst<Float32Regs, ".v4.f32">;
1146239310Sdimdef StoreParamScalar2F32 : StoreParamScalar2Inst<Float32Regs, ".v2.f32">;
1147239310Sdimdef StoreParamScalar2F64 : StoreParamScalar2Inst<Float64Regs, ".v2.f64">;
1148239310Sdim
1149239310Sdimdef StoreRetvalScalar4I32 : StoreRetvalScalar4Inst<Int32Regs, ".v4.b32">;
1150239310Sdimdef StoreRetvalScalar4I16 : StoreRetvalScalar4Inst<Int16Regs, ".v4.b16">;
1151239310Sdimdef StoreRetvalScalar4I8  : StoreRetvalScalar4Inst<Int8Regs, ".v4.b8">;
1152239310Sdim
1153239310Sdimdef StoreRetvalScalar2I64 : StoreRetvalScalar2Inst<Int64Regs, ".v2.b64">;
1154239310Sdimdef StoreRetvalScalar2I32 : StoreRetvalScalar2Inst<Int32Regs, ".v2.b32">;
1155239310Sdimdef StoreRetvalScalar2I16 : StoreRetvalScalar2Inst<Int16Regs, ".v2.b16">;
1156239310Sdimdef StoreRetvalScalar2I8  : StoreRetvalScalar2Inst<Int8Regs, ".v2.b8">;
1157239310Sdim
1158239310Sdimdef StoreRetvalScalar4F32 : StoreRetvalScalar4Inst<Float32Regs, ".v4.f32">;
1159239310Sdimdef StoreRetvalScalar2F32 : StoreRetvalScalar2Inst<Float32Regs, ".v2.f32">;
1160239310Sdimdef StoreRetvalScalar2F64 : StoreRetvalScalar2Inst<Float64Regs, ".v2.f64">;
1161239310Sdim
1162239310Sdimclass LoadParamVecInst<NVPTXRegClass regclass, string opstr, NVPTXInst sop=NOP>:
1163239310Sdim      NVPTXVecInst<(outs regclass:$dst), (ins i32imm:$a, i32imm:$b),
1164239310Sdim                "loadparam : $dst <- [$a, $b]",
1165239310Sdim                [(set regclass:$dst, (LoadParam (i32 imm:$a), (i32 imm:$b)))],
1166239310Sdim                sop>;
1167239310Sdim
1168239310Sdimclass StoreParamVecInst<NVPTXRegClass regclass, string opstr, NVPTXInst sop=NOP>
1169239310Sdim      : NVPTXVecInst<(outs), (ins regclass:$val, i32imm:$a, i32imm:$b),
1170239310Sdim                "storeparam : [$a, $b] <- $val",
1171239310Sdim                [(StoreParam (i32 imm:$a), (i32 imm:$b), regclass:$val)], sop>;
1172239310Sdim
1173239310Sdimclass StoreRetvalVecInst<NVPTXRegClass regclass, string opstr,
1174239310Sdim  NVPTXInst sop=NOP>
1175239310Sdim      : NVPTXVecInst<(outs), (ins regclass:$val, i32imm:$a),
1176239310Sdim                "storeretval : retval[$a] <- $val",
1177239310Sdim                [(StoreRetval (i32 imm:$a), regclass:$val)], sop>;
1178239310Sdim
1179239310Sdimlet VecInstType=isVecLD.Value in {
1180239310Sdimdef LoadParamV4I32  : LoadParamVecInst<V4I32Regs, ".v4.b32",
1181239310Sdim  LoadParamScalar4I32>;
1182239310Sdimdef LoadParamV4I16  : LoadParamVecInst<V4I16Regs, ".v4.b16",
1183239310Sdim  LoadParamScalar4I16>;
1184239310Sdimdef LoadParamV4I8   : LoadParamVecInst<V4I8Regs, ".v4.b8",
1185239310Sdim  LoadParamScalar4I8>;
1186239310Sdim
1187239310Sdimdef LoadParamV2I64  : LoadParamVecInst<V2I64Regs, ".v2.b64",
1188239310Sdim  LoadParamScalar2I64>;
1189239310Sdimdef LoadParamV2I32  : LoadParamVecInst<V2I32Regs, ".v2.b32",
1190239310Sdim  LoadParamScalar2I32>;
1191239310Sdimdef LoadParamV2I16  : LoadParamVecInst<V2I16Regs, ".v2.b16",
1192239310Sdim  LoadParamScalar2I16>;
1193239310Sdimdef LoadParamV2I8   : LoadParamVecInst<V2I8Regs, ".v2.b8",
1194239310Sdim  LoadParamScalar2I8>;
1195239310Sdim
1196239310Sdimdef LoadParamV4F32  : LoadParamVecInst<V4F32Regs, ".v4.f32",
1197239310Sdim  LoadParamScalar4F32>;
1198239310Sdimdef LoadParamV2F32  : LoadParamVecInst<V2F32Regs, ".v2.f32",
1199239310Sdim  LoadParamScalar2F32>;
1200239310Sdimdef LoadParamV2F64  : LoadParamVecInst<V2F64Regs, ".v2.f64",
1201239310Sdim  LoadParamScalar2F64>;
1202239310Sdim}
1203239310Sdim
1204239310Sdimlet VecInstType=isVecST.Value in {
1205239310Sdimdef StoreParamV4I32  : StoreParamVecInst<V4I32Regs, ".v4.b32",
1206239310Sdim  StoreParamScalar4I32>;
1207239310Sdimdef StoreParamV4I16  : StoreParamVecInst<V4I16Regs, ".v4.b16",
1208239310Sdim  StoreParamScalar4I16>;
1209239310Sdimdef StoreParamV4I8   : StoreParamVecInst<V4I8Regs, ".v4.b8",
1210239310Sdim  StoreParamScalar4I8>;
1211239310Sdim
1212239310Sdimdef StoreParamV2I64  : StoreParamVecInst<V2I64Regs, ".v2.b64",
1213239310Sdim  StoreParamScalar2I64>;
1214239310Sdimdef StoreParamV2I32  : StoreParamVecInst<V2I32Regs, ".v2.b32",
1215239310Sdim  StoreParamScalar2I32>;
1216239310Sdimdef StoreParamV2I16  : StoreParamVecInst<V2I16Regs, ".v2.b16",
1217239310Sdim  StoreParamScalar2I16>;
1218239310Sdimdef StoreParamV2I8   : StoreParamVecInst<V2I8Regs, ".v2.b8",
1219239310Sdim  StoreParamScalar2I8>;
1220239310Sdim
1221239310Sdimdef StoreParamV4F32  : StoreParamVecInst<V4F32Regs, ".v4.f32",
1222239310Sdim  StoreParamScalar4F32>;
1223239310Sdimdef StoreParamV2F32  : StoreParamVecInst<V2F32Regs, ".v2.f32",
1224239310Sdim  StoreParamScalar2F32>;
1225239310Sdimdef StoreParamV2F64  : StoreParamVecInst<V2F64Regs, ".v2.f64",
1226239310Sdim  StoreParamScalar2F64>;
1227239310Sdim
1228239310Sdimdef StoreRetvalV4I32  : StoreRetvalVecInst<V4I32Regs, ".v4.b32",
1229239310Sdim  StoreRetvalScalar4I32>;
1230239310Sdimdef StoreRetvalV4I16  : StoreRetvalVecInst<V4I16Regs, ".v4.b16",
1231239310Sdim  StoreRetvalScalar4I16>;
1232239310Sdimdef StoreRetvalV4I8   : StoreRetvalVecInst<V4I8Regs,  ".v4.b8",
1233239310Sdim  StoreRetvalScalar4I8>;
1234239310Sdim
1235239310Sdimdef StoreRetvalV2I64  : StoreRetvalVecInst<V2I64Regs, ".v2.b64",
1236239310Sdim  StoreRetvalScalar2I64>;
1237239310Sdimdef StoreRetvalV2I32  : StoreRetvalVecInst<V2I32Regs, ".v2.b32",
1238239310Sdim  StoreRetvalScalar2I32>;
1239239310Sdimdef StoreRetvalV2I16  : StoreRetvalVecInst<V2I16Regs, ".v2.b16",
1240239310Sdim  StoreRetvalScalar2I16>;
1241239310Sdimdef StoreRetvalV2I8   : StoreRetvalVecInst<V2I8Regs,  ".v2.b8",
1242239310Sdim  StoreRetvalScalar2I8>;
1243239310Sdim
1244239310Sdimdef StoreRetvalV4F32  : StoreRetvalVecInst<V4F32Regs, ".v4.f32",
1245239310Sdim  StoreRetvalScalar4F32>;
1246239310Sdimdef StoreRetvalV2F32  : StoreRetvalVecInst<V2F32Regs, ".v2.f32",
1247239310Sdim  StoreRetvalScalar2F32>;
1248239310Sdimdef StoreRetvalV2F64  : StoreRetvalVecInst<V2F64Regs, ".v2.f64",
1249239310Sdim  StoreRetvalScalar2F64>;
1250239310Sdim
1251239310Sdim}
1252239310Sdim
1253239310Sdim
1254239310Sdim// Int vector to int scalar bit convert
1255239310Sdim// v4i8 -> i32
1256239310Sdimdef : Pat<(i32 (bitconvert V4I8Regs:$s)),
1257239310Sdim          (V4I8toI32 (V4i8Extract V4I8Regs:$s,0), (V4i8Extract V4I8Regs:$s,1),
1258239310Sdim                     (V4i8Extract V4I8Regs:$s,2), (V4i8Extract V4I8Regs:$s,3))>;
1259239310Sdim// v4i16 -> i64
1260239310Sdimdef : Pat<(i64 (bitconvert V4I16Regs:$s)),
1261239310Sdim          (V4I16toI64 (V4i16Extract V4I16Regs:$s,0),
1262239310Sdim            (V4i16Extract V4I16Regs:$s,1),
1263239310Sdim                     (V4i16Extract V4I16Regs:$s,2),
1264239310Sdim                     (V4i16Extract V4I16Regs:$s,3))>;
1265239310Sdim// v2i8 -> i16
1266239310Sdimdef : Pat<(i16 (bitconvert V2I8Regs:$s)),
1267239310Sdim          (V2I8toI16 (V2i8Extract V2I8Regs:$s,0), (V2i8Extract V2I8Regs:$s,1))>;
1268239310Sdim// v2i16 -> i32
1269239310Sdimdef : Pat<(i32 (bitconvert V2I16Regs:$s)),
1270239310Sdim          (V2I16toI32 (V2i16Extract V2I16Regs:$s,0),
1271239310Sdim            (V2i16Extract V2I16Regs:$s,1))>;
1272239310Sdim// v2i32 -> i64
1273239310Sdimdef : Pat<(i64 (bitconvert V2I32Regs:$s)),
1274239310Sdim          (V2I32toI64 (V2i32Extract V2I32Regs:$s,0),
1275239310Sdim            (V2i32Extract V2I32Regs:$s,1))>;
1276239310Sdim
1277239310Sdim// Int scalar to int vector bit convert
1278239310Sdimlet VecInstType=isVecDest.Value in {
1279239310Sdim// i32 -> v4i8
1280239310Sdimdef VecI32toV4I8 : NVPTXVecInst<(outs V4I8Regs:$d), (ins Int32Regs:$s),
1281239310Sdim                                "Error!",
1282239310Sdim                                [(set V4I8Regs:$d, (bitconvert Int32Regs:$s))],
1283239310Sdim                                I32toV4I8>;
1284239310Sdim// i64 -> v4i16
1285239310Sdimdef VecI64toV4I16 : NVPTXVecInst<(outs V4I16Regs:$d), (ins Int64Regs:$s),
1286239310Sdim                                 "Error!",
1287239310Sdim                                [(set V4I16Regs:$d, (bitconvert Int64Regs:$s))],
1288239310Sdim                                 I64toV4I16>;
1289239310Sdim// i16 -> v2i8
1290239310Sdimdef VecI16toV2I8 : NVPTXVecInst<(outs V2I8Regs:$d), (ins Int16Regs:$s),
1291239310Sdim                                "Error!",
1292239310Sdim                               [(set V2I8Regs:$d, (bitconvert Int16Regs:$s))],
1293239310Sdim                                I16toV2I8>;
1294239310Sdim// i32 -> v2i16
1295239310Sdimdef VecI32toV2I16 : NVPTXVecInst<(outs V2I16Regs:$d), (ins Int32Regs:$s),
1296239310Sdim                                 "Error!",
1297239310Sdim                                [(set V2I16Regs:$d, (bitconvert Int32Regs:$s))],
1298239310Sdim                                 I32toV2I16>;
1299239310Sdim// i64 -> v2i32
1300239310Sdimdef VecI64toV2I32 : NVPTXVecInst<(outs V2I32Regs:$d), (ins Int64Regs:$s),
1301239310Sdim                                  "Error!",
1302239310Sdim                                [(set V2I32Regs:$d, (bitconvert Int64Regs:$s))],
1303239310Sdim                                  I64toV2I32>;
1304239310Sdim}
1305239310Sdim
1306239310Sdim// Int vector to int vector bit convert
1307239310Sdim// v4i8 -> v2i16
1308239310Sdimdef : Pat<(v2i16 (bitconvert V4I8Regs:$s)),
1309239310Sdim          (VecI32toV2I16
1310239310Sdim          (V4I8toI32 (V4i8Extract V4I8Regs:$s,0), (V4i8Extract V4I8Regs:$s,1),
1311239310Sdim                    (V4i8Extract V4I8Regs:$s,2), (V4i8Extract V4I8Regs:$s,3)))>;
1312239310Sdim// v4i16 -> v2i32
1313239310Sdimdef : Pat<(v2i32 (bitconvert V4I16Regs:$s)),
1314239310Sdim          (VecI64toV2I32
1315239310Sdim       (V4I16toI64 (V4i16Extract V4I16Regs:$s,0), (V4i16Extract V4I16Regs:$s,1),
1316239310Sdim                (V4i16Extract V4I16Regs:$s,2), (V4i16Extract V4I16Regs:$s,3)))>;
1317239310Sdim// v2i16 -> v4i8
1318239310Sdimdef : Pat<(v4i8 (bitconvert V2I16Regs:$s)),
1319239310Sdim          (VecI32toV4I8
1320239310Sdim    (V2I16toI32 (V2i16Extract V2I16Regs:$s,0), (V2i16Extract V2I16Regs:$s,1)))>;
1321239310Sdim// v2i32 -> v4i16
1322239310Sdimdef : Pat<(v4i16 (bitconvert V2I32Regs:$s)),
1323239310Sdim          (VecI64toV4I16
1324239310Sdim    (V2I32toI64 (V2i32Extract V2I32Regs:$s,0), (V2i32Extract V2I32Regs:$s,1)))>;
1325239310Sdim// v2i64 -> v4i32
1326239310Sdimdef : Pat<(v4i32 (bitconvert V2I64Regs:$s)),
1327239310Sdim          (Build_Vector4_i32
1328239310Sdim            (V2i32Extract (VecI64toV2I32 (V2i64Extract V2I64Regs:$s, 0)), 0),
1329239310Sdim            (V2i32Extract (VecI64toV2I32 (V2i64Extract V2I64Regs:$s, 0)), 1),
1330239310Sdim            (V2i32Extract (VecI64toV2I32 (V2i64Extract V2I64Regs:$s, 1)), 0),
1331239310Sdim            (V2i32Extract (VecI64toV2I32 (V2i64Extract V2I64Regs:$s, 1)), 1))>;
1332239310Sdim// v4i32 -> v2i64
1333239310Sdimdef : Pat<(v2i64 (bitconvert V4I32Regs:$s)),
1334239310Sdim          (Build_Vector2_i64
1335239310Sdim      (V2I32toI64 (V4i32Extract V4I32Regs:$s,0), (V4i32Extract V4I32Regs:$s,1)),
1336239310Sdim    (V2I32toI64 (V4i32Extract V4I32Regs:$s,2), (V4i32Extract V4I32Regs:$s,3)))>;
1337239310Sdim
1338239310Sdim// Fp scalar to fp vector convert
1339239310Sdim// f64 -> v2f32
1340239310Sdimlet VecInstType=isVecDest.Value in {
1341239310Sdimdef VecF64toV2F32 : NVPTXVecInst<(outs V2F32Regs:$d), (ins Float64Regs:$s),
1342239310Sdim                                  "Error!",
1343239310Sdim                              [(set V2F32Regs:$d, (bitconvert Float64Regs:$s))],
1344239310Sdim                                  F64toV2F32>;
1345239310Sdim}
1346239310Sdim
1347239310Sdim// Fp vector to fp scalar convert
1348239310Sdim// v2f32 -> f64
1349239310Sdimdef : Pat<(f64 (bitconvert V2F32Regs:$s)),
1350239310Sdim     (V2F32toF64 (V2f32Extract V2F32Regs:$s,0), (V2f32Extract V2F32Regs:$s,1))>;
1351239310Sdim
1352239310Sdim// Fp scalar to int vector convert
1353239310Sdim// f32 -> v4i8
1354239310Sdimdef : Pat<(v4i8 (bitconvert Float32Regs:$s)),
1355239310Sdim          (VecI32toV4I8 (BITCONVERT_32_F2I Float32Regs:$s))>;
1356239310Sdim// f32 -> v2i16
1357239310Sdimdef : Pat<(v2i16 (bitconvert Float32Regs:$s)),
1358239310Sdim          (VecI32toV2I16 (BITCONVERT_32_F2I Float32Regs:$s))>;
1359239310Sdim// f64 -> v4i16
1360239310Sdimdef : Pat<(v4i16 (bitconvert Float64Regs:$s)),
1361239310Sdim          (VecI64toV4I16 (BITCONVERT_64_F2I Float64Regs:$s))>;
1362239310Sdim// f64 -> v2i32
1363239310Sdimdef : Pat<(v2i32 (bitconvert Float64Regs:$s)),
1364239310Sdim          (VecI64toV2I32 (BITCONVERT_64_F2I Float64Regs:$s))>;
1365239310Sdim
1366239310Sdim// Int vector to fp scalar convert
1367239310Sdim// v4i8 -> f32
1368239310Sdimdef : Pat<(f32 (bitconvert V4I8Regs:$s)),
1369239310Sdim          (BITCONVERT_32_I2F
1370239310Sdim          (V4I8toI32 (V4i8Extract V4I8Regs:$s,0), (V4i8Extract V4I8Regs:$s,1),
1371239310Sdim                    (V4i8Extract V4I8Regs:$s,2), (V4i8Extract V4I8Regs:$s,3)))>;
1372239310Sdim// v4i16 -> f64
1373239310Sdimdef : Pat<(f64 (bitconvert V4I16Regs:$s)),
1374239310Sdim          (BITCONVERT_64_I2F
1375239310Sdim       (V4I16toI64 (V4i16Extract V4I16Regs:$s,0), (V4i16Extract V4I16Regs:$s,1),
1376239310Sdim                (V4i16Extract V4I16Regs:$s,2), (V4i16Extract V4I16Regs:$s,3)))>;
1377239310Sdim// v2i16 -> f32
1378239310Sdimdef : Pat<(f32 (bitconvert V2I16Regs:$s)),
1379239310Sdim          (BITCONVERT_32_I2F
1380239310Sdim    (V2I16toI32 (V2i16Extract V2I16Regs:$s,0), (V2i16Extract V2I16Regs:$s,1)))>;
1381239310Sdim// v2i32 -> f64
1382239310Sdimdef : Pat<(f64 (bitconvert V2I32Regs:$s)),
1383239310Sdim          (BITCONVERT_64_I2F
1384239310Sdim    (V2I32toI64 (V2i32Extract V2I32Regs:$s,0), (V2i32Extract V2I32Regs:$s,1)))>;
1385239310Sdim
1386239310Sdim// Int scalar to fp vector convert
1387239310Sdim// i64 -> v2f32
1388239310Sdimdef : Pat<(v2f32 (bitconvert Int64Regs:$s)),
1389239310Sdim          (VecF64toV2F32 (BITCONVERT_64_I2F Int64Regs:$s))>;
1390239310Sdim
1391239310Sdim// Fp vector to int scalar convert
1392239310Sdim// v2f32 -> i64
1393239310Sdimdef : Pat<(i64 (bitconvert V2F32Regs:$s)),
1394239310Sdim          (BITCONVERT_64_F2I
1395239310Sdim    (V2F32toF64 (V2f32Extract V2F32Regs:$s,0), (V2f32Extract V2F32Regs:$s,1)))>;
1396239310Sdim
1397239310Sdim// Int vector to fp vector convert
1398239310Sdim// v2i64 -> v4f32
1399239310Sdimdef : Pat<(v4f32 (bitconvert V2I64Regs:$s)),
1400239310Sdim          (Build_Vector4_f32
1401239310Sdim            (BITCONVERT_32_I2F (V2i32Extract (VecI64toV2I32
1402239310Sdim              (V2i64Extract V2I64Regs:$s, 0)), 0)),
1403239310Sdim            (BITCONVERT_32_I2F (V2i32Extract (VecI64toV2I32
1404239310Sdim              (V2i64Extract V2I64Regs:$s, 0)), 1)),
1405239310Sdim            (BITCONVERT_32_I2F (V2i32Extract (VecI64toV2I32
1406239310Sdim              (V2i64Extract V2I64Regs:$s, 1)), 0)),
1407239310Sdim            (BITCONVERT_32_I2F (V2i32Extract (VecI64toV2I32
1408239310Sdim              (V2i64Extract V2I64Regs:$s, 1)), 1)))>;
1409239310Sdim// v2i64 -> v2f64
1410239310Sdimdef : Pat<(v2f64 (bitconvert V2I64Regs:$s)),
1411239310Sdim    (Build_Vector2_f64
1412239310Sdim            (BITCONVERT_64_I2F (V2i64Extract V2I64Regs:$s,0)),
1413239310Sdim            (BITCONVERT_64_I2F (V2i64Extract V2I64Regs:$s,1)))>;
1414239310Sdim// v2i32 -> v2f32
1415239310Sdimdef : Pat<(v2f32 (bitconvert V2I32Regs:$s)),
1416239310Sdim    (Build_Vector2_f32
1417239310Sdim            (BITCONVERT_32_I2F (V2i32Extract V2I32Regs:$s,0)),
1418239310Sdim            (BITCONVERT_32_I2F (V2i32Extract V2I32Regs:$s,1)))>;
1419239310Sdim// v4i32 -> v2f64
1420239310Sdimdef : Pat<(v2f64 (bitconvert V4I32Regs:$s)),
1421239310Sdim          (Build_Vector2_f64
1422239310Sdim           (BITCONVERT_64_I2F (V2I32toI64 (V4i32Extract V4I32Regs:$s,0),
1423239310Sdim             (V4i32Extract V4I32Regs:$s,1))),
1424239310Sdim           (BITCONVERT_64_I2F (V2I32toI64 (V4i32Extract V4I32Regs:$s,2),
1425239310Sdim             (V4i32Extract V4I32Regs:$s,3))))>;
1426239310Sdim// v4i32 -> v4f32
1427239310Sdimdef : Pat<(v4f32 (bitconvert V4I32Regs:$s)),
1428239310Sdim    (Build_Vector4_f32
1429239310Sdim            (BITCONVERT_32_I2F (V4i32Extract V4I32Regs:$s,0)),
1430239310Sdim            (BITCONVERT_32_I2F (V4i32Extract V4I32Regs:$s,1)),
1431239310Sdim            (BITCONVERT_32_I2F (V4i32Extract V4I32Regs:$s,2)),
1432239310Sdim            (BITCONVERT_32_I2F (V4i32Extract V4I32Regs:$s,3)))>;
1433239310Sdim// v4i16 -> v2f32
1434239310Sdimdef : Pat<(v2f32 (bitconvert V4I16Regs:$s)),
1435239310Sdim          (VecF64toV2F32 (BITCONVERT_64_I2F
1436239310Sdim          (V4I16toI64 (V4i16Extract V4I16Regs:$s,0),
1437239310Sdim            (V4i16Extract V4I16Regs:$s,1),
1438239310Sdim                      (V4i16Extract V4I16Regs:$s,2),
1439239310Sdim                      (V4i16Extract V4I16Regs:$s,3))))>;
1440239310Sdim
1441239310Sdim// Fp vector to int vector convert
1442239310Sdim// v2i64 <- v4f32
1443239310Sdimdef : Pat<(v2i64 (bitconvert V4F32Regs:$s)),
1444239310Sdim          (Build_Vector2_i64
1445239310Sdim           (BITCONVERT_64_F2I (V2F32toF64 (V4f32Extract V4F32Regs:$s,0),
1446239310Sdim             (V4f32Extract V4F32Regs:$s,1))),
1447239310Sdim           (BITCONVERT_64_F2I (V2F32toF64 (V4f32Extract V4F32Regs:$s,2),
1448239310Sdim             (V4f32Extract V4F32Regs:$s,3))))>;
1449239310Sdim// v2i64 <- v2f64
1450239310Sdimdef : Pat<(v2i64 (bitconvert V2F64Regs:$s)),
1451239310Sdim    (Build_Vector2_i64
1452239310Sdim            (BITCONVERT_64_F2I (V2f64Extract V2F64Regs:$s,0)),
1453239310Sdim            (BITCONVERT_64_F2I (V2f64Extract V2F64Regs:$s,1)))>;
1454239310Sdim// v2i32 <- v2f32
1455239310Sdimdef : Pat<(v2i32 (bitconvert V2F32Regs:$s)),
1456239310Sdim    (Build_Vector2_i32
1457239310Sdim            (BITCONVERT_32_F2I (V2f32Extract V2F32Regs:$s,0)),
1458239310Sdim            (BITCONVERT_32_F2I (V2f32Extract V2F32Regs:$s,1)))>;
1459239310Sdim// v4i32 <- v2f64
1460239310Sdimdef : Pat<(v4i32 (bitconvert V2F64Regs:$s)),
1461239310Sdim          (Build_Vector4_i32
1462239310Sdim            (BITCONVERT_32_F2I (V2f32Extract (VecF64toV2F32
1463239310Sdim              (V2f64Extract V2F64Regs:$s, 0)), 0)),
1464239310Sdim            (BITCONVERT_32_F2I (V2f32Extract (VecF64toV2F32
1465239310Sdim              (V2f64Extract V2F64Regs:$s, 0)), 1)),
1466239310Sdim            (BITCONVERT_32_F2I (V2f32Extract (VecF64toV2F32
1467239310Sdim              (V2f64Extract V2F64Regs:$s, 1)), 0)),
1468239310Sdim            (BITCONVERT_32_F2I (V2f32Extract (VecF64toV2F32
1469239310Sdim              (V2f64Extract V2F64Regs:$s, 1)), 1)))>;
1470239310Sdim// v4i32 <- v4f32
1471239310Sdimdef : Pat<(v4i32 (bitconvert V4F32Regs:$s)),
1472239310Sdim          (Build_Vector4_i32
1473239310Sdim            (BITCONVERT_32_F2I (V4f32Extract V4F32Regs:$s,0)),
1474239310Sdim            (BITCONVERT_32_F2I (V4f32Extract V4F32Regs:$s,1)),
1475239310Sdim            (BITCONVERT_32_F2I (V4f32Extract V4F32Regs:$s,2)),
1476239310Sdim            (BITCONVERT_32_F2I (V4f32Extract V4F32Regs:$s,3)))>;
1477239310Sdim// v4i16 <- v2f32
1478239310Sdimdef : Pat<(v4i16 (bitconvert V2F32Regs:$s)),
1479239310Sdim          (VecI64toV4I16 (BITCONVERT_64_F2I
1480239310Sdim          (V2F32toF64 (V2f32Extract V2F32Regs:$s,0),
1481239310Sdim            (V2f32Extract V2F32Regs:$s,1))))>;
1482