NVPTXISelDAGToDAG.cpp revision 360784
1//===-- NVPTXISelDAGToDAG.cpp - A dag to dag inst selector for NVPTX ------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the NVPTX target.
10//
11//===----------------------------------------------------------------------===//
12
13#include "NVPTXISelDAGToDAG.h"
14#include "MCTargetDesc/NVPTXBaseInfo.h"
15#include "NVPTXUtilities.h"
16#include "llvm/Analysis/ValueTracking.h"
17#include "llvm/IR/GlobalValue.h"
18#include "llvm/IR/Instructions.h"
19#include "llvm/IR/IntrinsicsNVPTX.h"
20#include "llvm/Support/AtomicOrdering.h"
21#include "llvm/Support/CommandLine.h"
22#include "llvm/Support/Debug.h"
23#include "llvm/Support/ErrorHandling.h"
24#include "llvm/Support/raw_ostream.h"
25#include "llvm/Target/TargetIntrinsicInfo.h"
26
27using namespace llvm;
28
29#define DEBUG_TYPE "nvptx-isel"
30
31/// createNVPTXISelDag - This pass converts a legalized DAG into a
32/// NVPTX-specific DAG, ready for instruction scheduling.
33FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM,
34                                       llvm::CodeGenOpt::Level OptLevel) {
35  return new NVPTXDAGToDAGISel(TM, OptLevel);
36}
37
38NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
39                                     CodeGenOpt::Level OptLevel)
40    : SelectionDAGISel(tm, OptLevel), TM(tm) {
41  doMulWide = (OptLevel > 0);
42}
43
44bool NVPTXDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
45  Subtarget = &static_cast<const NVPTXSubtarget &>(MF.getSubtarget());
46  return SelectionDAGISel::runOnMachineFunction(MF);
47}
48
49int NVPTXDAGToDAGISel::getDivF32Level() const {
50  return Subtarget->getTargetLowering()->getDivF32Level();
51}
52
53bool NVPTXDAGToDAGISel::usePrecSqrtF32() const {
54  return Subtarget->getTargetLowering()->usePrecSqrtF32();
55}
56
57bool NVPTXDAGToDAGISel::useF32FTZ() const {
58  return Subtarget->getTargetLowering()->useF32FTZ(*MF);
59}
60
61bool NVPTXDAGToDAGISel::allowFMA() const {
62  const NVPTXTargetLowering *TL = Subtarget->getTargetLowering();
63  return TL->allowFMA(*MF, OptLevel);
64}
65
66bool NVPTXDAGToDAGISel::allowUnsafeFPMath() const {
67  const NVPTXTargetLowering *TL = Subtarget->getTargetLowering();
68  return TL->allowUnsafeFPMath(*MF);
69}
70
71bool NVPTXDAGToDAGISel::useShortPointers() const {
72  return TM.useShortPointers();
73}
74
75/// Select - Select instructions not customized! Used for
76/// expanded, promoted and normal instructions.
77void NVPTXDAGToDAGISel::Select(SDNode *N) {
78
79  if (N->isMachineOpcode()) {
80    N->setNodeId(-1);
81    return; // Already selected.
82  }
83
84  switch (N->getOpcode()) {
85  case ISD::LOAD:
86  case ISD::ATOMIC_LOAD:
87    if (tryLoad(N))
88      return;
89    break;
90  case ISD::STORE:
91  case ISD::ATOMIC_STORE:
92    if (tryStore(N))
93      return;
94    break;
95  case ISD::EXTRACT_VECTOR_ELT:
96    if (tryEXTRACT_VECTOR_ELEMENT(N))
97      return;
98    break;
99  case NVPTXISD::SETP_F16X2:
100    SelectSETP_F16X2(N);
101    return;
102
103  case NVPTXISD::LoadV2:
104  case NVPTXISD::LoadV4:
105    if (tryLoadVector(N))
106      return;
107    break;
108  case NVPTXISD::LDGV2:
109  case NVPTXISD::LDGV4:
110  case NVPTXISD::LDUV2:
111  case NVPTXISD::LDUV4:
112    if (tryLDGLDU(N))
113      return;
114    break;
115  case NVPTXISD::StoreV2:
116  case NVPTXISD::StoreV4:
117    if (tryStoreVector(N))
118      return;
119    break;
120  case NVPTXISD::LoadParam:
121  case NVPTXISD::LoadParamV2:
122  case NVPTXISD::LoadParamV4:
123    if (tryLoadParam(N))
124      return;
125    break;
126  case NVPTXISD::StoreRetval:
127  case NVPTXISD::StoreRetvalV2:
128  case NVPTXISD::StoreRetvalV4:
129    if (tryStoreRetval(N))
130      return;
131    break;
132  case NVPTXISD::StoreParam:
133  case NVPTXISD::StoreParamV2:
134  case NVPTXISD::StoreParamV4:
135  case NVPTXISD::StoreParamS32:
136  case NVPTXISD::StoreParamU32:
137    if (tryStoreParam(N))
138      return;
139    break;
140  case ISD::INTRINSIC_WO_CHAIN:
141    if (tryIntrinsicNoChain(N))
142      return;
143    break;
144  case ISD::INTRINSIC_W_CHAIN:
145    if (tryIntrinsicChain(N))
146      return;
147    break;
148  case NVPTXISD::Tex1DFloatS32:
149  case NVPTXISD::Tex1DFloatFloat:
150  case NVPTXISD::Tex1DFloatFloatLevel:
151  case NVPTXISD::Tex1DFloatFloatGrad:
152  case NVPTXISD::Tex1DS32S32:
153  case NVPTXISD::Tex1DS32Float:
154  case NVPTXISD::Tex1DS32FloatLevel:
155  case NVPTXISD::Tex1DS32FloatGrad:
156  case NVPTXISD::Tex1DU32S32:
157  case NVPTXISD::Tex1DU32Float:
158  case NVPTXISD::Tex1DU32FloatLevel:
159  case NVPTXISD::Tex1DU32FloatGrad:
160  case NVPTXISD::Tex1DArrayFloatS32:
161  case NVPTXISD::Tex1DArrayFloatFloat:
162  case NVPTXISD::Tex1DArrayFloatFloatLevel:
163  case NVPTXISD::Tex1DArrayFloatFloatGrad:
164  case NVPTXISD::Tex1DArrayS32S32:
165  case NVPTXISD::Tex1DArrayS32Float:
166  case NVPTXISD::Tex1DArrayS32FloatLevel:
167  case NVPTXISD::Tex1DArrayS32FloatGrad:
168  case NVPTXISD::Tex1DArrayU32S32:
169  case NVPTXISD::Tex1DArrayU32Float:
170  case NVPTXISD::Tex1DArrayU32FloatLevel:
171  case NVPTXISD::Tex1DArrayU32FloatGrad:
172  case NVPTXISD::Tex2DFloatS32:
173  case NVPTXISD::Tex2DFloatFloat:
174  case NVPTXISD::Tex2DFloatFloatLevel:
175  case NVPTXISD::Tex2DFloatFloatGrad:
176  case NVPTXISD::Tex2DS32S32:
177  case NVPTXISD::Tex2DS32Float:
178  case NVPTXISD::Tex2DS32FloatLevel:
179  case NVPTXISD::Tex2DS32FloatGrad:
180  case NVPTXISD::Tex2DU32S32:
181  case NVPTXISD::Tex2DU32Float:
182  case NVPTXISD::Tex2DU32FloatLevel:
183  case NVPTXISD::Tex2DU32FloatGrad:
184  case NVPTXISD::Tex2DArrayFloatS32:
185  case NVPTXISD::Tex2DArrayFloatFloat:
186  case NVPTXISD::Tex2DArrayFloatFloatLevel:
187  case NVPTXISD::Tex2DArrayFloatFloatGrad:
188  case NVPTXISD::Tex2DArrayS32S32:
189  case NVPTXISD::Tex2DArrayS32Float:
190  case NVPTXISD::Tex2DArrayS32FloatLevel:
191  case NVPTXISD::Tex2DArrayS32FloatGrad:
192  case NVPTXISD::Tex2DArrayU32S32:
193  case NVPTXISD::Tex2DArrayU32Float:
194  case NVPTXISD::Tex2DArrayU32FloatLevel:
195  case NVPTXISD::Tex2DArrayU32FloatGrad:
196  case NVPTXISD::Tex3DFloatS32:
197  case NVPTXISD::Tex3DFloatFloat:
198  case NVPTXISD::Tex3DFloatFloatLevel:
199  case NVPTXISD::Tex3DFloatFloatGrad:
200  case NVPTXISD::Tex3DS32S32:
201  case NVPTXISD::Tex3DS32Float:
202  case NVPTXISD::Tex3DS32FloatLevel:
203  case NVPTXISD::Tex3DS32FloatGrad:
204  case NVPTXISD::Tex3DU32S32:
205  case NVPTXISD::Tex3DU32Float:
206  case NVPTXISD::Tex3DU32FloatLevel:
207  case NVPTXISD::Tex3DU32FloatGrad:
208  case NVPTXISD::TexCubeFloatFloat:
209  case NVPTXISD::TexCubeFloatFloatLevel:
210  case NVPTXISD::TexCubeS32Float:
211  case NVPTXISD::TexCubeS32FloatLevel:
212  case NVPTXISD::TexCubeU32Float:
213  case NVPTXISD::TexCubeU32FloatLevel:
214  case NVPTXISD::TexCubeArrayFloatFloat:
215  case NVPTXISD::TexCubeArrayFloatFloatLevel:
216  case NVPTXISD::TexCubeArrayS32Float:
217  case NVPTXISD::TexCubeArrayS32FloatLevel:
218  case NVPTXISD::TexCubeArrayU32Float:
219  case NVPTXISD::TexCubeArrayU32FloatLevel:
220  case NVPTXISD::Tld4R2DFloatFloat:
221  case NVPTXISD::Tld4G2DFloatFloat:
222  case NVPTXISD::Tld4B2DFloatFloat:
223  case NVPTXISD::Tld4A2DFloatFloat:
224  case NVPTXISD::Tld4R2DS64Float:
225  case NVPTXISD::Tld4G2DS64Float:
226  case NVPTXISD::Tld4B2DS64Float:
227  case NVPTXISD::Tld4A2DS64Float:
228  case NVPTXISD::Tld4R2DU64Float:
229  case NVPTXISD::Tld4G2DU64Float:
230  case NVPTXISD::Tld4B2DU64Float:
231  case NVPTXISD::Tld4A2DU64Float:
232  case NVPTXISD::TexUnified1DFloatS32:
233  case NVPTXISD::TexUnified1DFloatFloat:
234  case NVPTXISD::TexUnified1DFloatFloatLevel:
235  case NVPTXISD::TexUnified1DFloatFloatGrad:
236  case NVPTXISD::TexUnified1DS32S32:
237  case NVPTXISD::TexUnified1DS32Float:
238  case NVPTXISD::TexUnified1DS32FloatLevel:
239  case NVPTXISD::TexUnified1DS32FloatGrad:
240  case NVPTXISD::TexUnified1DU32S32:
241  case NVPTXISD::TexUnified1DU32Float:
242  case NVPTXISD::TexUnified1DU32FloatLevel:
243  case NVPTXISD::TexUnified1DU32FloatGrad:
244  case NVPTXISD::TexUnified1DArrayFloatS32:
245  case NVPTXISD::TexUnified1DArrayFloatFloat:
246  case NVPTXISD::TexUnified1DArrayFloatFloatLevel:
247  case NVPTXISD::TexUnified1DArrayFloatFloatGrad:
248  case NVPTXISD::TexUnified1DArrayS32S32:
249  case NVPTXISD::TexUnified1DArrayS32Float:
250  case NVPTXISD::TexUnified1DArrayS32FloatLevel:
251  case NVPTXISD::TexUnified1DArrayS32FloatGrad:
252  case NVPTXISD::TexUnified1DArrayU32S32:
253  case NVPTXISD::TexUnified1DArrayU32Float:
254  case NVPTXISD::TexUnified1DArrayU32FloatLevel:
255  case NVPTXISD::TexUnified1DArrayU32FloatGrad:
256  case NVPTXISD::TexUnified2DFloatS32:
257  case NVPTXISD::TexUnified2DFloatFloat:
258  case NVPTXISD::TexUnified2DFloatFloatLevel:
259  case NVPTXISD::TexUnified2DFloatFloatGrad:
260  case NVPTXISD::TexUnified2DS32S32:
261  case NVPTXISD::TexUnified2DS32Float:
262  case NVPTXISD::TexUnified2DS32FloatLevel:
263  case NVPTXISD::TexUnified2DS32FloatGrad:
264  case NVPTXISD::TexUnified2DU32S32:
265  case NVPTXISD::TexUnified2DU32Float:
266  case NVPTXISD::TexUnified2DU32FloatLevel:
267  case NVPTXISD::TexUnified2DU32FloatGrad:
268  case NVPTXISD::TexUnified2DArrayFloatS32:
269  case NVPTXISD::TexUnified2DArrayFloatFloat:
270  case NVPTXISD::TexUnified2DArrayFloatFloatLevel:
271  case NVPTXISD::TexUnified2DArrayFloatFloatGrad:
272  case NVPTXISD::TexUnified2DArrayS32S32:
273  case NVPTXISD::TexUnified2DArrayS32Float:
274  case NVPTXISD::TexUnified2DArrayS32FloatLevel:
275  case NVPTXISD::TexUnified2DArrayS32FloatGrad:
276  case NVPTXISD::TexUnified2DArrayU32S32:
277  case NVPTXISD::TexUnified2DArrayU32Float:
278  case NVPTXISD::TexUnified2DArrayU32FloatLevel:
279  case NVPTXISD::TexUnified2DArrayU32FloatGrad:
280  case NVPTXISD::TexUnified3DFloatS32:
281  case NVPTXISD::TexUnified3DFloatFloat:
282  case NVPTXISD::TexUnified3DFloatFloatLevel:
283  case NVPTXISD::TexUnified3DFloatFloatGrad:
284  case NVPTXISD::TexUnified3DS32S32:
285  case NVPTXISD::TexUnified3DS32Float:
286  case NVPTXISD::TexUnified3DS32FloatLevel:
287  case NVPTXISD::TexUnified3DS32FloatGrad:
288  case NVPTXISD::TexUnified3DU32S32:
289  case NVPTXISD::TexUnified3DU32Float:
290  case NVPTXISD::TexUnified3DU32FloatLevel:
291  case NVPTXISD::TexUnified3DU32FloatGrad:
292  case NVPTXISD::TexUnifiedCubeFloatFloat:
293  case NVPTXISD::TexUnifiedCubeFloatFloatLevel:
294  case NVPTXISD::TexUnifiedCubeS32Float:
295  case NVPTXISD::TexUnifiedCubeS32FloatLevel:
296  case NVPTXISD::TexUnifiedCubeU32Float:
297  case NVPTXISD::TexUnifiedCubeU32FloatLevel:
298  case NVPTXISD::TexUnifiedCubeArrayFloatFloat:
299  case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel:
300  case NVPTXISD::TexUnifiedCubeArrayS32Float:
301  case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel:
302  case NVPTXISD::TexUnifiedCubeArrayU32Float:
303  case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
304  case NVPTXISD::Tld4UnifiedR2DFloatFloat:
305  case NVPTXISD::Tld4UnifiedG2DFloatFloat:
306  case NVPTXISD::Tld4UnifiedB2DFloatFloat:
307  case NVPTXISD::Tld4UnifiedA2DFloatFloat:
308  case NVPTXISD::Tld4UnifiedR2DS64Float:
309  case NVPTXISD::Tld4UnifiedG2DS64Float:
310  case NVPTXISD::Tld4UnifiedB2DS64Float:
311  case NVPTXISD::Tld4UnifiedA2DS64Float:
312  case NVPTXISD::Tld4UnifiedR2DU64Float:
313  case NVPTXISD::Tld4UnifiedG2DU64Float:
314  case NVPTXISD::Tld4UnifiedB2DU64Float:
315  case NVPTXISD::Tld4UnifiedA2DU64Float:
316    if (tryTextureIntrinsic(N))
317      return;
318    break;
319  case NVPTXISD::Suld1DI8Clamp:
320  case NVPTXISD::Suld1DI16Clamp:
321  case NVPTXISD::Suld1DI32Clamp:
322  case NVPTXISD::Suld1DI64Clamp:
323  case NVPTXISD::Suld1DV2I8Clamp:
324  case NVPTXISD::Suld1DV2I16Clamp:
325  case NVPTXISD::Suld1DV2I32Clamp:
326  case NVPTXISD::Suld1DV2I64Clamp:
327  case NVPTXISD::Suld1DV4I8Clamp:
328  case NVPTXISD::Suld1DV4I16Clamp:
329  case NVPTXISD::Suld1DV4I32Clamp:
330  case NVPTXISD::Suld1DArrayI8Clamp:
331  case NVPTXISD::Suld1DArrayI16Clamp:
332  case NVPTXISD::Suld1DArrayI32Clamp:
333  case NVPTXISD::Suld1DArrayI64Clamp:
334  case NVPTXISD::Suld1DArrayV2I8Clamp:
335  case NVPTXISD::Suld1DArrayV2I16Clamp:
336  case NVPTXISD::Suld1DArrayV2I32Clamp:
337  case NVPTXISD::Suld1DArrayV2I64Clamp:
338  case NVPTXISD::Suld1DArrayV4I8Clamp:
339  case NVPTXISD::Suld1DArrayV4I16Clamp:
340  case NVPTXISD::Suld1DArrayV4I32Clamp:
341  case NVPTXISD::Suld2DI8Clamp:
342  case NVPTXISD::Suld2DI16Clamp:
343  case NVPTXISD::Suld2DI32Clamp:
344  case NVPTXISD::Suld2DI64Clamp:
345  case NVPTXISD::Suld2DV2I8Clamp:
346  case NVPTXISD::Suld2DV2I16Clamp:
347  case NVPTXISD::Suld2DV2I32Clamp:
348  case NVPTXISD::Suld2DV2I64Clamp:
349  case NVPTXISD::Suld2DV4I8Clamp:
350  case NVPTXISD::Suld2DV4I16Clamp:
351  case NVPTXISD::Suld2DV4I32Clamp:
352  case NVPTXISD::Suld2DArrayI8Clamp:
353  case NVPTXISD::Suld2DArrayI16Clamp:
354  case NVPTXISD::Suld2DArrayI32Clamp:
355  case NVPTXISD::Suld2DArrayI64Clamp:
356  case NVPTXISD::Suld2DArrayV2I8Clamp:
357  case NVPTXISD::Suld2DArrayV2I16Clamp:
358  case NVPTXISD::Suld2DArrayV2I32Clamp:
359  case NVPTXISD::Suld2DArrayV2I64Clamp:
360  case NVPTXISD::Suld2DArrayV4I8Clamp:
361  case NVPTXISD::Suld2DArrayV4I16Clamp:
362  case NVPTXISD::Suld2DArrayV4I32Clamp:
363  case NVPTXISD::Suld3DI8Clamp:
364  case NVPTXISD::Suld3DI16Clamp:
365  case NVPTXISD::Suld3DI32Clamp:
366  case NVPTXISD::Suld3DI64Clamp:
367  case NVPTXISD::Suld3DV2I8Clamp:
368  case NVPTXISD::Suld3DV2I16Clamp:
369  case NVPTXISD::Suld3DV2I32Clamp:
370  case NVPTXISD::Suld3DV2I64Clamp:
371  case NVPTXISD::Suld3DV4I8Clamp:
372  case NVPTXISD::Suld3DV4I16Clamp:
373  case NVPTXISD::Suld3DV4I32Clamp:
374  case NVPTXISD::Suld1DI8Trap:
375  case NVPTXISD::Suld1DI16Trap:
376  case NVPTXISD::Suld1DI32Trap:
377  case NVPTXISD::Suld1DI64Trap:
378  case NVPTXISD::Suld1DV2I8Trap:
379  case NVPTXISD::Suld1DV2I16Trap:
380  case NVPTXISD::Suld1DV2I32Trap:
381  case NVPTXISD::Suld1DV2I64Trap:
382  case NVPTXISD::Suld1DV4I8Trap:
383  case NVPTXISD::Suld1DV4I16Trap:
384  case NVPTXISD::Suld1DV4I32Trap:
385  case NVPTXISD::Suld1DArrayI8Trap:
386  case NVPTXISD::Suld1DArrayI16Trap:
387  case NVPTXISD::Suld1DArrayI32Trap:
388  case NVPTXISD::Suld1DArrayI64Trap:
389  case NVPTXISD::Suld1DArrayV2I8Trap:
390  case NVPTXISD::Suld1DArrayV2I16Trap:
391  case NVPTXISD::Suld1DArrayV2I32Trap:
392  case NVPTXISD::Suld1DArrayV2I64Trap:
393  case NVPTXISD::Suld1DArrayV4I8Trap:
394  case NVPTXISD::Suld1DArrayV4I16Trap:
395  case NVPTXISD::Suld1DArrayV4I32Trap:
396  case NVPTXISD::Suld2DI8Trap:
397  case NVPTXISD::Suld2DI16Trap:
398  case NVPTXISD::Suld2DI32Trap:
399  case NVPTXISD::Suld2DI64Trap:
400  case NVPTXISD::Suld2DV2I8Trap:
401  case NVPTXISD::Suld2DV2I16Trap:
402  case NVPTXISD::Suld2DV2I32Trap:
403  case NVPTXISD::Suld2DV2I64Trap:
404  case NVPTXISD::Suld2DV4I8Trap:
405  case NVPTXISD::Suld2DV4I16Trap:
406  case NVPTXISD::Suld2DV4I32Trap:
407  case NVPTXISD::Suld2DArrayI8Trap:
408  case NVPTXISD::Suld2DArrayI16Trap:
409  case NVPTXISD::Suld2DArrayI32Trap:
410  case NVPTXISD::Suld2DArrayI64Trap:
411  case NVPTXISD::Suld2DArrayV2I8Trap:
412  case NVPTXISD::Suld2DArrayV2I16Trap:
413  case NVPTXISD::Suld2DArrayV2I32Trap:
414  case NVPTXISD::Suld2DArrayV2I64Trap:
415  case NVPTXISD::Suld2DArrayV4I8Trap:
416  case NVPTXISD::Suld2DArrayV4I16Trap:
417  case NVPTXISD::Suld2DArrayV4I32Trap:
418  case NVPTXISD::Suld3DI8Trap:
419  case NVPTXISD::Suld3DI16Trap:
420  case NVPTXISD::Suld3DI32Trap:
421  case NVPTXISD::Suld3DI64Trap:
422  case NVPTXISD::Suld3DV2I8Trap:
423  case NVPTXISD::Suld3DV2I16Trap:
424  case NVPTXISD::Suld3DV2I32Trap:
425  case NVPTXISD::Suld3DV2I64Trap:
426  case NVPTXISD::Suld3DV4I8Trap:
427  case NVPTXISD::Suld3DV4I16Trap:
428  case NVPTXISD::Suld3DV4I32Trap:
429  case NVPTXISD::Suld1DI8Zero:
430  case NVPTXISD::Suld1DI16Zero:
431  case NVPTXISD::Suld1DI32Zero:
432  case NVPTXISD::Suld1DI64Zero:
433  case NVPTXISD::Suld1DV2I8Zero:
434  case NVPTXISD::Suld1DV2I16Zero:
435  case NVPTXISD::Suld1DV2I32Zero:
436  case NVPTXISD::Suld1DV2I64Zero:
437  case NVPTXISD::Suld1DV4I8Zero:
438  case NVPTXISD::Suld1DV4I16Zero:
439  case NVPTXISD::Suld1DV4I32Zero:
440  case NVPTXISD::Suld1DArrayI8Zero:
441  case NVPTXISD::Suld1DArrayI16Zero:
442  case NVPTXISD::Suld1DArrayI32Zero:
443  case NVPTXISD::Suld1DArrayI64Zero:
444  case NVPTXISD::Suld1DArrayV2I8Zero:
445  case NVPTXISD::Suld1DArrayV2I16Zero:
446  case NVPTXISD::Suld1DArrayV2I32Zero:
447  case NVPTXISD::Suld1DArrayV2I64Zero:
448  case NVPTXISD::Suld1DArrayV4I8Zero:
449  case NVPTXISD::Suld1DArrayV4I16Zero:
450  case NVPTXISD::Suld1DArrayV4I32Zero:
451  case NVPTXISD::Suld2DI8Zero:
452  case NVPTXISD::Suld2DI16Zero:
453  case NVPTXISD::Suld2DI32Zero:
454  case NVPTXISD::Suld2DI64Zero:
455  case NVPTXISD::Suld2DV2I8Zero:
456  case NVPTXISD::Suld2DV2I16Zero:
457  case NVPTXISD::Suld2DV2I32Zero:
458  case NVPTXISD::Suld2DV2I64Zero:
459  case NVPTXISD::Suld2DV4I8Zero:
460  case NVPTXISD::Suld2DV4I16Zero:
461  case NVPTXISD::Suld2DV4I32Zero:
462  case NVPTXISD::Suld2DArrayI8Zero:
463  case NVPTXISD::Suld2DArrayI16Zero:
464  case NVPTXISD::Suld2DArrayI32Zero:
465  case NVPTXISD::Suld2DArrayI64Zero:
466  case NVPTXISD::Suld2DArrayV2I8Zero:
467  case NVPTXISD::Suld2DArrayV2I16Zero:
468  case NVPTXISD::Suld2DArrayV2I32Zero:
469  case NVPTXISD::Suld2DArrayV2I64Zero:
470  case NVPTXISD::Suld2DArrayV4I8Zero:
471  case NVPTXISD::Suld2DArrayV4I16Zero:
472  case NVPTXISD::Suld2DArrayV4I32Zero:
473  case NVPTXISD::Suld3DI8Zero:
474  case NVPTXISD::Suld3DI16Zero:
475  case NVPTXISD::Suld3DI32Zero:
476  case NVPTXISD::Suld3DI64Zero:
477  case NVPTXISD::Suld3DV2I8Zero:
478  case NVPTXISD::Suld3DV2I16Zero:
479  case NVPTXISD::Suld3DV2I32Zero:
480  case NVPTXISD::Suld3DV2I64Zero:
481  case NVPTXISD::Suld3DV4I8Zero:
482  case NVPTXISD::Suld3DV4I16Zero:
483  case NVPTXISD::Suld3DV4I32Zero:
484    if (trySurfaceIntrinsic(N))
485      return;
486    break;
487  case ISD::AND:
488  case ISD::SRA:
489  case ISD::SRL:
490    // Try to select BFE
491    if (tryBFE(N))
492      return;
493    break;
494  case ISD::ADDRSPACECAST:
495    SelectAddrSpaceCast(N);
496    return;
497  case ISD::ConstantFP:
498    if (tryConstantFP16(N))
499      return;
500    break;
501  default:
502    break;
503  }
504  SelectCode(N);
505}
506
507bool NVPTXDAGToDAGISel::tryIntrinsicChain(SDNode *N) {
508  unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
509  switch (IID) {
510  default:
511    return false;
512  case Intrinsic::nvvm_ldg_global_f:
513  case Intrinsic::nvvm_ldg_global_i:
514  case Intrinsic::nvvm_ldg_global_p:
515  case Intrinsic::nvvm_ldu_global_f:
516  case Intrinsic::nvvm_ldu_global_i:
517  case Intrinsic::nvvm_ldu_global_p:
518    return tryLDGLDU(N);
519  }
520}
521
522// There's no way to specify FP16 immediates in .f16 ops, so we have to
523// load them into an .f16 register first.
524bool NVPTXDAGToDAGISel::tryConstantFP16(SDNode *N) {
525  if (N->getValueType(0) != MVT::f16)
526    return false;
527  SDValue Val = CurDAG->getTargetConstantFP(
528      cast<ConstantFPSDNode>(N)->getValueAPF(), SDLoc(N), MVT::f16);
529  SDNode *LoadConstF16 =
530      CurDAG->getMachineNode(NVPTX::LOAD_CONST_F16, SDLoc(N), MVT::f16, Val);
531  ReplaceNode(N, LoadConstF16);
532  return true;
533}
534
535// Map ISD:CONDCODE value to appropriate CmpMode expected by
536// NVPTXInstPrinter::printCmpMode()
537static unsigned getPTXCmpMode(const CondCodeSDNode &CondCode, bool FTZ) {
538  using NVPTX::PTXCmpMode::CmpMode;
539  unsigned PTXCmpMode = [](ISD::CondCode CC) {
540    switch (CC) {
541    default:
542      llvm_unreachable("Unexpected condition code.");
543    case ISD::SETOEQ:
544      return CmpMode::EQ;
545    case ISD::SETOGT:
546      return CmpMode::GT;
547    case ISD::SETOGE:
548      return CmpMode::GE;
549    case ISD::SETOLT:
550      return CmpMode::LT;
551    case ISD::SETOLE:
552      return CmpMode::LE;
553    case ISD::SETONE:
554      return CmpMode::NE;
555    case ISD::SETO:
556      return CmpMode::NUM;
557    case ISD::SETUO:
558      return CmpMode::NotANumber;
559    case ISD::SETUEQ:
560      return CmpMode::EQU;
561    case ISD::SETUGT:
562      return CmpMode::GTU;
563    case ISD::SETUGE:
564      return CmpMode::GEU;
565    case ISD::SETULT:
566      return CmpMode::LTU;
567    case ISD::SETULE:
568      return CmpMode::LEU;
569    case ISD::SETUNE:
570      return CmpMode::NEU;
571    case ISD::SETEQ:
572      return CmpMode::EQ;
573    case ISD::SETGT:
574      return CmpMode::GT;
575    case ISD::SETGE:
576      return CmpMode::GE;
577    case ISD::SETLT:
578      return CmpMode::LT;
579    case ISD::SETLE:
580      return CmpMode::LE;
581    case ISD::SETNE:
582      return CmpMode::NE;
583    }
584  }(CondCode.get());
585
586  if (FTZ)
587    PTXCmpMode |= NVPTX::PTXCmpMode::FTZ_FLAG;
588
589  return PTXCmpMode;
590}
591
592bool NVPTXDAGToDAGISel::SelectSETP_F16X2(SDNode *N) {
593  unsigned PTXCmpMode =
594      getPTXCmpMode(*cast<CondCodeSDNode>(N->getOperand(2)), useF32FTZ());
595  SDLoc DL(N);
596  SDNode *SetP = CurDAG->getMachineNode(
597      NVPTX::SETP_f16x2rr, DL, MVT::i1, MVT::i1, N->getOperand(0),
598      N->getOperand(1), CurDAG->getTargetConstant(PTXCmpMode, DL, MVT::i32));
599  ReplaceNode(N, SetP);
600  return true;
601}
602
603// Find all instances of extract_vector_elt that use this v2f16 vector
604// and coalesce them into a scattering move instruction.
605bool NVPTXDAGToDAGISel::tryEXTRACT_VECTOR_ELEMENT(SDNode *N) {
606  SDValue Vector = N->getOperand(0);
607
608  // We only care about f16x2 as it's the only real vector type we
609  // need to deal with.
610  if (Vector.getSimpleValueType() != MVT::v2f16)
611    return false;
612
613  // Find and record all uses of this vector that extract element 0 or 1.
614  SmallVector<SDNode *, 4> E0, E1;
615  for (auto U : Vector.getNode()->uses()) {
616    if (U->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
617      continue;
618    if (U->getOperand(0) != Vector)
619      continue;
620    if (const ConstantSDNode *IdxConst =
621            dyn_cast<ConstantSDNode>(U->getOperand(1))) {
622      if (IdxConst->getZExtValue() == 0)
623        E0.push_back(U);
624      else if (IdxConst->getZExtValue() == 1)
625        E1.push_back(U);
626      else
627        llvm_unreachable("Invalid vector index.");
628    }
629  }
630
631  // There's no point scattering f16x2 if we only ever access one
632  // element of it.
633  if (E0.empty() || E1.empty())
634    return false;
635
636  unsigned Op = NVPTX::SplitF16x2;
637  // If the vector has been BITCAST'ed from i32, we can use original
638  // value directly and avoid register-to-register move.
639  SDValue Source = Vector;
640  if (Vector->getOpcode() == ISD::BITCAST) {
641    Op = NVPTX::SplitI32toF16x2;
642    Source = Vector->getOperand(0);
643  }
644  // Merge (f16 extractelt(V, 0), f16 extractelt(V,1))
645  // into f16,f16 SplitF16x2(V)
646  SDNode *ScatterOp =
647      CurDAG->getMachineNode(Op, SDLoc(N), MVT::f16, MVT::f16, Source);
648  for (auto *Node : E0)
649    ReplaceUses(SDValue(Node, 0), SDValue(ScatterOp, 0));
650  for (auto *Node : E1)
651    ReplaceUses(SDValue(Node, 0), SDValue(ScatterOp, 1));
652
653  return true;
654}
655
656static unsigned int getCodeAddrSpace(MemSDNode *N) {
657  const Value *Src = N->getMemOperand()->getValue();
658
659  if (!Src)
660    return NVPTX::PTXLdStInstCode::GENERIC;
661
662  if (auto *PT = dyn_cast<PointerType>(Src->getType())) {
663    switch (PT->getAddressSpace()) {
664    case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL;
665    case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL;
666    case llvm::ADDRESS_SPACE_SHARED: return NVPTX::PTXLdStInstCode::SHARED;
667    case llvm::ADDRESS_SPACE_GENERIC: return NVPTX::PTXLdStInstCode::GENERIC;
668    case llvm::ADDRESS_SPACE_PARAM: return NVPTX::PTXLdStInstCode::PARAM;
669    case llvm::ADDRESS_SPACE_CONST: return NVPTX::PTXLdStInstCode::CONSTANT;
670    default: break;
671    }
672  }
673  return NVPTX::PTXLdStInstCode::GENERIC;
674}
675
676static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget,
677                          unsigned CodeAddrSpace, MachineFunction *F) {
678  // We use ldg (i.e. ld.global.nc) for invariant loads from the global address
679  // space.
680  //
681  // We have two ways of identifying invariant loads: Loads may be explicitly
682  // marked as invariant, or we may infer them to be invariant.
683  //
684  // We currently infer invariance for loads from
685  //  - constant global variables, and
686  //  - kernel function pointer params that are noalias (i.e. __restrict) and
687  //    never written to.
688  //
689  // TODO: Perform a more powerful invariance analysis (ideally IPO, and ideally
690  // not during the SelectionDAG phase).
691  //
692  // TODO: Infer invariance only at -O2.  We still want to use ldg at -O0 for
693  // explicitly invariant loads because these are how clang tells us to use ldg
694  // when the user uses a builtin.
695  if (!Subtarget.hasLDG() || CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL)
696    return false;
697
698  if (N->isInvariant())
699    return true;
700
701  bool IsKernelFn = isKernelFunction(F->getFunction());
702
703  // We use GetUnderlyingObjects() here instead of GetUnderlyingObject() mainly
704  // because the former looks through phi nodes while the latter does not. We
705  // need to look through phi nodes to handle pointer induction variables.
706  SmallVector<const Value *, 8> Objs;
707  GetUnderlyingObjects(N->getMemOperand()->getValue(),
708                       Objs, F->getDataLayout());
709
710  return all_of(Objs, [&](const Value *V) {
711    if (auto *A = dyn_cast<const Argument>(V))
712      return IsKernelFn && A->onlyReadsMemory() && A->hasNoAliasAttr();
713    if (auto *GV = dyn_cast<const GlobalVariable>(V))
714      return GV->isConstant();
715    return false;
716  });
717}
718
719bool NVPTXDAGToDAGISel::tryIntrinsicNoChain(SDNode *N) {
720  unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
721  switch (IID) {
722  default:
723    return false;
724  case Intrinsic::nvvm_texsurf_handle_internal:
725    SelectTexSurfHandle(N);
726    return true;
727  }
728}
729
730void NVPTXDAGToDAGISel::SelectTexSurfHandle(SDNode *N) {
731  // Op 0 is the intrinsic ID
732  SDValue Wrapper = N->getOperand(1);
733  SDValue GlobalVal = Wrapper.getOperand(0);
734  ReplaceNode(N, CurDAG->getMachineNode(NVPTX::texsurf_handles, SDLoc(N),
735                                        MVT::i64, GlobalVal));
736}
737
738void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
739  SDValue Src = N->getOperand(0);
740  AddrSpaceCastSDNode *CastN = cast<AddrSpaceCastSDNode>(N);
741  unsigned SrcAddrSpace = CastN->getSrcAddressSpace();
742  unsigned DstAddrSpace = CastN->getDestAddressSpace();
743  assert(SrcAddrSpace != DstAddrSpace &&
744         "addrspacecast must be between different address spaces");
745
746  if (DstAddrSpace == ADDRESS_SPACE_GENERIC) {
747    // Specific to generic
748    unsigned Opc;
749    switch (SrcAddrSpace) {
750    default: report_fatal_error("Bad address space in addrspacecast");
751    case ADDRESS_SPACE_GLOBAL:
752      Opc = TM.is64Bit() ? NVPTX::cvta_global_yes_64 : NVPTX::cvta_global_yes;
753      break;
754    case ADDRESS_SPACE_SHARED:
755      Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_shared_yes_6432
756                                               : NVPTX::cvta_shared_yes_64)
757                         : NVPTX::cvta_shared_yes;
758      break;
759    case ADDRESS_SPACE_CONST:
760      Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_const_yes_6432
761                                               : NVPTX::cvta_const_yes_64)
762                         : NVPTX::cvta_const_yes;
763      break;
764    case ADDRESS_SPACE_LOCAL:
765      Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_local_yes_6432
766                                               : NVPTX::cvta_local_yes_64)
767                         : NVPTX::cvta_local_yes;
768      break;
769    }
770    ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
771                                          Src));
772    return;
773  } else {
774    // Generic to specific
775    if (SrcAddrSpace != 0)
776      report_fatal_error("Cannot cast between two non-generic address spaces");
777    unsigned Opc;
778    switch (DstAddrSpace) {
779    default: report_fatal_error("Bad address space in addrspacecast");
780    case ADDRESS_SPACE_GLOBAL:
781      Opc = TM.is64Bit() ? NVPTX::cvta_to_global_yes_64
782                         : NVPTX::cvta_to_global_yes;
783      break;
784    case ADDRESS_SPACE_SHARED:
785      Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_to_shared_yes_3264
786                                                : NVPTX::cvta_to_shared_yes_64)
787                         : NVPTX::cvta_to_shared_yes;
788      break;
789    case ADDRESS_SPACE_CONST:
790      Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_to_const_yes_3264
791                                             : NVPTX::cvta_to_const_yes_64)
792                         : NVPTX::cvta_to_const_yes;
793      break;
794    case ADDRESS_SPACE_LOCAL:
795      Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_to_local_yes_3264
796                                               : NVPTX::cvta_to_local_yes_64)
797                         : NVPTX::cvta_to_local_yes;
798      break;
799    case ADDRESS_SPACE_PARAM:
800      Opc = TM.is64Bit() ? NVPTX::nvvm_ptr_gen_to_param_64
801                         : NVPTX::nvvm_ptr_gen_to_param;
802      break;
803    }
804    ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
805                                          Src));
806    return;
807  }
808}
809
810// Helper function template to reduce amount of boilerplate code for
811// opcode selection.
812static Optional<unsigned> pickOpcodeForVT(
813    MVT::SimpleValueType VT, unsigned Opcode_i8, unsigned Opcode_i16,
814    unsigned Opcode_i32, Optional<unsigned> Opcode_i64, unsigned Opcode_f16,
815    unsigned Opcode_f16x2, unsigned Opcode_f32, Optional<unsigned> Opcode_f64) {
816  switch (VT) {
817  case MVT::i1:
818  case MVT::i8:
819    return Opcode_i8;
820  case MVT::i16:
821    return Opcode_i16;
822  case MVT::i32:
823    return Opcode_i32;
824  case MVT::i64:
825    return Opcode_i64;
826  case MVT::f16:
827    return Opcode_f16;
828  case MVT::v2f16:
829    return Opcode_f16x2;
830  case MVT::f32:
831    return Opcode_f32;
832  case MVT::f64:
833    return Opcode_f64;
834  default:
835    return None;
836  }
837}
838
839bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
840  SDLoc dl(N);
841  MemSDNode *LD = cast<MemSDNode>(N);
842  assert(LD->readMem() && "Expected load");
843  LoadSDNode *PlainLoad = dyn_cast<LoadSDNode>(N);
844  EVT LoadedVT = LD->getMemoryVT();
845  SDNode *NVPTXLD = nullptr;
846
847  // do not support pre/post inc/dec
848  if (PlainLoad && PlainLoad->isIndexed())
849    return false;
850
851  if (!LoadedVT.isSimple())
852    return false;
853
854  AtomicOrdering Ordering = LD->getOrdering();
855  // In order to lower atomic loads with stronger guarantees we would need to
856  // use load.acquire or insert fences. However these features were only added
857  // with PTX ISA 6.0 / sm_70.
858  // TODO: Check if we can actually use the new instructions and implement them.
859  if (isStrongerThanMonotonic(Ordering))
860    return false;
861
862  // Address Space Setting
863  unsigned int CodeAddrSpace = getCodeAddrSpace(LD);
864  if (canLowerToLDG(LD, *Subtarget, CodeAddrSpace, MF)) {
865    return tryLDGLDU(N);
866  }
867
868  unsigned int PointerSize =
869      CurDAG->getDataLayout().getPointerSizeInBits(LD->getAddressSpace());
870
871  // Volatile Setting
872  // - .volatile is only available for .global and .shared
873  // - .volatile has the same memory synchronization semantics as .relaxed.sys
874  bool isVolatile = LD->isVolatile() || Ordering == AtomicOrdering::Monotonic;
875  if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
876      CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
877      CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
878    isVolatile = false;
879
880  // Type Setting: fromType + fromTypeWidth
881  //
882  // Sign   : ISD::SEXTLOAD
883  // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
884  //          type is integer
885  // Float  : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
886  MVT SimpleVT = LoadedVT.getSimpleVT();
887  MVT ScalarVT = SimpleVT.getScalarType();
888  // Read at least 8 bits (predicates are stored as 8-bit values)
889  unsigned fromTypeWidth = std::max(8U, (unsigned)ScalarVT.getSizeInBits());
890  unsigned int fromType;
891
892  // Vector Setting
893  unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
894  if (SimpleVT.isVector()) {
895    assert(LoadedVT == MVT::v2f16 && "Unexpected vector type");
896    // v2f16 is loaded using ld.b32
897    fromTypeWidth = 32;
898  }
899
900  if (PlainLoad && (PlainLoad->getExtensionType() == ISD::SEXTLOAD))
901    fromType = NVPTX::PTXLdStInstCode::Signed;
902  else if (ScalarVT.isFloatingPoint())
903    // f16 uses .b16 as its storage type.
904    fromType = ScalarVT.SimpleTy == MVT::f16 ? NVPTX::PTXLdStInstCode::Untyped
905                                             : NVPTX::PTXLdStInstCode::Float;
906  else
907    fromType = NVPTX::PTXLdStInstCode::Unsigned;
908
909  // Create the machine instruction DAG
910  SDValue Chain = N->getOperand(0);
911  SDValue N1 = N->getOperand(1);
912  SDValue Addr;
913  SDValue Offset, Base;
914  Optional<unsigned> Opcode;
915  MVT::SimpleValueType TargetVT = LD->getSimpleValueType(0).SimpleTy;
916
917  if (SelectDirectAddr(N1, Addr)) {
918    Opcode = pickOpcodeForVT(
919        TargetVT, NVPTX::LD_i8_avar, NVPTX::LD_i16_avar, NVPTX::LD_i32_avar,
920        NVPTX::LD_i64_avar, NVPTX::LD_f16_avar, NVPTX::LD_f16x2_avar,
921        NVPTX::LD_f32_avar, NVPTX::LD_f64_avar);
922    if (!Opcode)
923      return false;
924    SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
925                      getI32Imm(vecType, dl), getI32Imm(fromType, dl),
926                      getI32Imm(fromTypeWidth, dl), Addr, Chain };
927    NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT,
928                                     MVT::Other, Ops);
929  } else if (PointerSize == 64 ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
930                               : SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
931    Opcode = pickOpcodeForVT(TargetVT, NVPTX::LD_i8_asi, NVPTX::LD_i16_asi,
932                                 NVPTX::LD_i32_asi, NVPTX::LD_i64_asi,
933                                 NVPTX::LD_f16_asi, NVPTX::LD_f16x2_asi,
934                                 NVPTX::LD_f32_asi, NVPTX::LD_f64_asi);
935    if (!Opcode)
936      return false;
937    SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
938                      getI32Imm(vecType, dl), getI32Imm(fromType, dl),
939                      getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
940    NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT,
941                                     MVT::Other, Ops);
942  } else if (PointerSize == 64 ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
943                               : SelectADDRri(N1.getNode(), N1, Base, Offset)) {
944    if (PointerSize == 64)
945      Opcode = pickOpcodeForVT(
946          TargetVT, NVPTX::LD_i8_ari_64, NVPTX::LD_i16_ari_64,
947          NVPTX::LD_i32_ari_64, NVPTX::LD_i64_ari_64, NVPTX::LD_f16_ari_64,
948          NVPTX::LD_f16x2_ari_64, NVPTX::LD_f32_ari_64, NVPTX::LD_f64_ari_64);
949    else
950      Opcode = pickOpcodeForVT(
951          TargetVT, NVPTX::LD_i8_ari, NVPTX::LD_i16_ari, NVPTX::LD_i32_ari,
952          NVPTX::LD_i64_ari, NVPTX::LD_f16_ari, NVPTX::LD_f16x2_ari,
953          NVPTX::LD_f32_ari, NVPTX::LD_f64_ari);
954    if (!Opcode)
955      return false;
956    SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
957                      getI32Imm(vecType, dl), getI32Imm(fromType, dl),
958                      getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
959    NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT,
960                                     MVT::Other, Ops);
961  } else {
962    if (PointerSize == 64)
963      Opcode = pickOpcodeForVT(
964          TargetVT, NVPTX::LD_i8_areg_64, NVPTX::LD_i16_areg_64,
965          NVPTX::LD_i32_areg_64, NVPTX::LD_i64_areg_64, NVPTX::LD_f16_areg_64,
966          NVPTX::LD_f16x2_areg_64, NVPTX::LD_f32_areg_64,
967          NVPTX::LD_f64_areg_64);
968    else
969      Opcode = pickOpcodeForVT(
970          TargetVT, NVPTX::LD_i8_areg, NVPTX::LD_i16_areg, NVPTX::LD_i32_areg,
971          NVPTX::LD_i64_areg, NVPTX::LD_f16_areg, NVPTX::LD_f16x2_areg,
972          NVPTX::LD_f32_areg, NVPTX::LD_f64_areg);
973    if (!Opcode)
974      return false;
975    SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
976                      getI32Imm(vecType, dl), getI32Imm(fromType, dl),
977                      getI32Imm(fromTypeWidth, dl), N1, Chain };
978    NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT,
979                                     MVT::Other, Ops);
980  }
981
982  if (!NVPTXLD)
983    return false;
984
985  MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
986  CurDAG->setNodeMemRefs(cast<MachineSDNode>(NVPTXLD), {MemRef});
987
988  ReplaceNode(N, NVPTXLD);
989  return true;
990}
991
992bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
993
994  SDValue Chain = N->getOperand(0);
995  SDValue Op1 = N->getOperand(1);
996  SDValue Addr, Offset, Base;
997  Optional<unsigned> Opcode;
998  SDLoc DL(N);
999  SDNode *LD;
1000  MemSDNode *MemSD = cast<MemSDNode>(N);
1001  EVT LoadedVT = MemSD->getMemoryVT();
1002
1003  if (!LoadedVT.isSimple())
1004    return false;
1005
1006  // Address Space Setting
1007  unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD);
1008  if (canLowerToLDG(MemSD, *Subtarget, CodeAddrSpace, MF)) {
1009    return tryLDGLDU(N);
1010  }
1011
1012  unsigned int PointerSize =
1013      CurDAG->getDataLayout().getPointerSizeInBits(MemSD->getAddressSpace());
1014
1015  // Volatile Setting
1016  // - .volatile is only availalble for .global and .shared
1017  bool IsVolatile = MemSD->isVolatile();
1018  if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
1019      CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
1020      CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
1021    IsVolatile = false;
1022
1023  // Vector Setting
1024  MVT SimpleVT = LoadedVT.getSimpleVT();
1025
1026  // Type Setting: fromType + fromTypeWidth
1027  //
1028  // Sign   : ISD::SEXTLOAD
1029  // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
1030  //          type is integer
1031  // Float  : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
1032  MVT ScalarVT = SimpleVT.getScalarType();
1033  // Read at least 8 bits (predicates are stored as 8-bit values)
1034  unsigned FromTypeWidth = std::max(8U, (unsigned)ScalarVT.getSizeInBits());
1035  unsigned int FromType;
1036  // The last operand holds the original LoadSDNode::getExtensionType() value
1037  unsigned ExtensionType = cast<ConstantSDNode>(
1038      N->getOperand(N->getNumOperands() - 1))->getZExtValue();
1039  if (ExtensionType == ISD::SEXTLOAD)
1040    FromType = NVPTX::PTXLdStInstCode::Signed;
1041  else if (ScalarVT.isFloatingPoint())
1042    FromType = ScalarVT.SimpleTy == MVT::f16 ? NVPTX::PTXLdStInstCode::Untyped
1043                                             : NVPTX::PTXLdStInstCode::Float;
1044  else
1045    FromType = NVPTX::PTXLdStInstCode::Unsigned;
1046
1047  unsigned VecType;
1048
1049  switch (N->getOpcode()) {
1050  case NVPTXISD::LoadV2:
1051    VecType = NVPTX::PTXLdStInstCode::V2;
1052    break;
1053  case NVPTXISD::LoadV4:
1054    VecType = NVPTX::PTXLdStInstCode::V4;
1055    break;
1056  default:
1057    return false;
1058  }
1059
1060  EVT EltVT = N->getValueType(0);
1061
1062  // v8f16 is a special case. PTX doesn't have ld.v8.f16
1063  // instruction. Instead, we split the vector into v2f16 chunks and
1064  // load them with ld.v4.b32.
1065  if (EltVT == MVT::v2f16) {
1066    assert(N->getOpcode() == NVPTXISD::LoadV4 && "Unexpected load opcode.");
1067    EltVT = MVT::i32;
1068    FromType = NVPTX::PTXLdStInstCode::Untyped;
1069    FromTypeWidth = 32;
1070  }
1071
1072  if (SelectDirectAddr(Op1, Addr)) {
1073    switch (N->getOpcode()) {
1074    default:
1075      return false;
1076    case NVPTXISD::LoadV2:
1077      Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1078                               NVPTX::LDV_i8_v2_avar, NVPTX::LDV_i16_v2_avar,
1079                               NVPTX::LDV_i32_v2_avar, NVPTX::LDV_i64_v2_avar,
1080                               NVPTX::LDV_f16_v2_avar, NVPTX::LDV_f16x2_v2_avar,
1081                               NVPTX::LDV_f32_v2_avar, NVPTX::LDV_f64_v2_avar);
1082      break;
1083    case NVPTXISD::LoadV4:
1084      Opcode =
1085          pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_avar,
1086                          NVPTX::LDV_i16_v4_avar, NVPTX::LDV_i32_v4_avar, None,
1087                          NVPTX::LDV_f16_v4_avar, NVPTX::LDV_f16x2_v4_avar,
1088                          NVPTX::LDV_f32_v4_avar, None);
1089      break;
1090    }
1091    if (!Opcode)
1092      return false;
1093    SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1094                      getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1095                      getI32Imm(FromTypeWidth, DL), Addr, Chain };
1096    LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops);
1097  } else if (PointerSize == 64
1098                 ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
1099                 : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
1100    switch (N->getOpcode()) {
1101    default:
1102      return false;
1103    case NVPTXISD::LoadV2:
1104      Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1105                               NVPTX::LDV_i8_v2_asi, NVPTX::LDV_i16_v2_asi,
1106                               NVPTX::LDV_i32_v2_asi, NVPTX::LDV_i64_v2_asi,
1107                               NVPTX::LDV_f16_v2_asi, NVPTX::LDV_f16x2_v2_asi,
1108                               NVPTX::LDV_f32_v2_asi, NVPTX::LDV_f64_v2_asi);
1109      break;
1110    case NVPTXISD::LoadV4:
1111      Opcode =
1112          pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_asi,
1113                          NVPTX::LDV_i16_v4_asi, NVPTX::LDV_i32_v4_asi, None,
1114                          NVPTX::LDV_f16_v4_asi, NVPTX::LDV_f16x2_v4_asi,
1115                          NVPTX::LDV_f32_v4_asi, None);
1116      break;
1117    }
1118    if (!Opcode)
1119      return false;
1120    SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1121                      getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1122                      getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
1123    LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops);
1124  } else if (PointerSize == 64
1125                 ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
1126                 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
1127    if (PointerSize == 64) {
1128      switch (N->getOpcode()) {
1129      default:
1130        return false;
1131      case NVPTXISD::LoadV2:
1132        Opcode = pickOpcodeForVT(
1133            EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_ari_64,
1134            NVPTX::LDV_i16_v2_ari_64, NVPTX::LDV_i32_v2_ari_64,
1135            NVPTX::LDV_i64_v2_ari_64, NVPTX::LDV_f16_v2_ari_64,
1136            NVPTX::LDV_f16x2_v2_ari_64, NVPTX::LDV_f32_v2_ari_64,
1137            NVPTX::LDV_f64_v2_ari_64);
1138        break;
1139      case NVPTXISD::LoadV4:
1140        Opcode = pickOpcodeForVT(
1141            EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_ari_64,
1142            NVPTX::LDV_i16_v4_ari_64, NVPTX::LDV_i32_v4_ari_64, None,
1143            NVPTX::LDV_f16_v4_ari_64, NVPTX::LDV_f16x2_v4_ari_64,
1144            NVPTX::LDV_f32_v4_ari_64, None);
1145        break;
1146      }
1147    } else {
1148      switch (N->getOpcode()) {
1149      default:
1150        return false;
1151      case NVPTXISD::LoadV2:
1152        Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1153                                 NVPTX::LDV_i8_v2_ari, NVPTX::LDV_i16_v2_ari,
1154                                 NVPTX::LDV_i32_v2_ari, NVPTX::LDV_i64_v2_ari,
1155                                 NVPTX::LDV_f16_v2_ari, NVPTX::LDV_f16x2_v2_ari,
1156                                 NVPTX::LDV_f32_v2_ari, NVPTX::LDV_f64_v2_ari);
1157        break;
1158      case NVPTXISD::LoadV4:
1159        Opcode =
1160            pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_ari,
1161                            NVPTX::LDV_i16_v4_ari, NVPTX::LDV_i32_v4_ari, None,
1162                            NVPTX::LDV_f16_v4_ari, NVPTX::LDV_f16x2_v4_ari,
1163                            NVPTX::LDV_f32_v4_ari, None);
1164        break;
1165      }
1166    }
1167    if (!Opcode)
1168      return false;
1169    SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1170                      getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1171                      getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
1172
1173    LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops);
1174  } else {
1175    if (PointerSize == 64) {
1176      switch (N->getOpcode()) {
1177      default:
1178        return false;
1179      case NVPTXISD::LoadV2:
1180        Opcode = pickOpcodeForVT(
1181            EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_areg_64,
1182            NVPTX::LDV_i16_v2_areg_64, NVPTX::LDV_i32_v2_areg_64,
1183            NVPTX::LDV_i64_v2_areg_64, NVPTX::LDV_f16_v2_areg_64,
1184            NVPTX::LDV_f16x2_v2_areg_64, NVPTX::LDV_f32_v2_areg_64,
1185            NVPTX::LDV_f64_v2_areg_64);
1186        break;
1187      case NVPTXISD::LoadV4:
1188        Opcode = pickOpcodeForVT(
1189            EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_areg_64,
1190            NVPTX::LDV_i16_v4_areg_64, NVPTX::LDV_i32_v4_areg_64, None,
1191            NVPTX::LDV_f16_v4_areg_64, NVPTX::LDV_f16x2_v4_areg_64,
1192            NVPTX::LDV_f32_v4_areg_64, None);
1193        break;
1194      }
1195    } else {
1196      switch (N->getOpcode()) {
1197      default:
1198        return false;
1199      case NVPTXISD::LoadV2:
1200        Opcode =
1201            pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_areg,
1202                            NVPTX::LDV_i16_v2_areg, NVPTX::LDV_i32_v2_areg,
1203                            NVPTX::LDV_i64_v2_areg, NVPTX::LDV_f16_v2_areg,
1204                            NVPTX::LDV_f16x2_v2_areg, NVPTX::LDV_f32_v2_areg,
1205                            NVPTX::LDV_f64_v2_areg);
1206        break;
1207      case NVPTXISD::LoadV4:
1208        Opcode = pickOpcodeForVT(
1209            EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_areg,
1210            NVPTX::LDV_i16_v4_areg, NVPTX::LDV_i32_v4_areg, None,
1211            NVPTX::LDV_f16_v4_areg, NVPTX::LDV_f16x2_v4_areg,
1212            NVPTX::LDV_f32_v4_areg, None);
1213        break;
1214      }
1215    }
1216    if (!Opcode)
1217      return false;
1218    SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1219                      getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1220                      getI32Imm(FromTypeWidth, DL), Op1, Chain };
1221    LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops);
1222  }
1223
1224  MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
1225  CurDAG->setNodeMemRefs(cast<MachineSDNode>(LD), {MemRef});
1226
1227  ReplaceNode(N, LD);
1228  return true;
1229}
1230
1231bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
1232
1233  SDValue Chain = N->getOperand(0);
1234  SDValue Op1;
1235  MemSDNode *Mem;
1236  bool IsLDG = true;
1237
1238  // If this is an LDG intrinsic, the address is the third operand. If its an
1239  // LDG/LDU SD node (from custom vector handling), then its the second operand
1240  if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
1241    Op1 = N->getOperand(2);
1242    Mem = cast<MemIntrinsicSDNode>(N);
1243    unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
1244    switch (IID) {
1245    default:
1246      return false;
1247    case Intrinsic::nvvm_ldg_global_f:
1248    case Intrinsic::nvvm_ldg_global_i:
1249    case Intrinsic::nvvm_ldg_global_p:
1250      IsLDG = true;
1251      break;
1252    case Intrinsic::nvvm_ldu_global_f:
1253    case Intrinsic::nvvm_ldu_global_i:
1254    case Intrinsic::nvvm_ldu_global_p:
1255      IsLDG = false;
1256      break;
1257    }
1258  } else {
1259    Op1 = N->getOperand(1);
1260    Mem = cast<MemSDNode>(N);
1261  }
1262
1263  Optional<unsigned> Opcode;
1264  SDLoc DL(N);
1265  SDNode *LD;
1266  SDValue Base, Offset, Addr;
1267
1268  EVT EltVT = Mem->getMemoryVT();
1269  unsigned NumElts = 1;
1270  if (EltVT.isVector()) {
1271    NumElts = EltVT.getVectorNumElements();
1272    EltVT = EltVT.getVectorElementType();
1273    // vectors of f16 are loaded/stored as multiples of v2f16 elements.
1274    if (EltVT == MVT::f16 && N->getValueType(0) == MVT::v2f16) {
1275      assert(NumElts % 2 == 0 && "Vector must have even number of elements");
1276      EltVT = MVT::v2f16;
1277      NumElts /= 2;
1278    }
1279  }
1280
1281  // Build the "promoted" result VTList for the load. If we are really loading
1282  // i8s, then the return type will be promoted to i16 since we do not expose
1283  // 8-bit registers in NVPTX.
1284  EVT NodeVT = (EltVT == MVT::i8) ? MVT::i16 : EltVT;
1285  SmallVector<EVT, 5> InstVTs;
1286  for (unsigned i = 0; i != NumElts; ++i) {
1287    InstVTs.push_back(NodeVT);
1288  }
1289  InstVTs.push_back(MVT::Other);
1290  SDVTList InstVTList = CurDAG->getVTList(InstVTs);
1291
1292  if (SelectDirectAddr(Op1, Addr)) {
1293    switch (N->getOpcode()) {
1294    default:
1295      return false;
1296    case ISD::LOAD:
1297    case ISD::INTRINSIC_W_CHAIN:
1298      if (IsLDG)
1299        Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1300                                     NVPTX::INT_PTX_LDG_GLOBAL_i8avar,
1301                                     NVPTX::INT_PTX_LDG_GLOBAL_i16avar,
1302                                     NVPTX::INT_PTX_LDG_GLOBAL_i32avar,
1303                                     NVPTX::INT_PTX_LDG_GLOBAL_i64avar,
1304                                     NVPTX::INT_PTX_LDG_GLOBAL_f16avar,
1305                                     NVPTX::INT_PTX_LDG_GLOBAL_f16x2avar,
1306                                     NVPTX::INT_PTX_LDG_GLOBAL_f32avar,
1307                                     NVPTX::INT_PTX_LDG_GLOBAL_f64avar);
1308      else
1309        Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1310                                     NVPTX::INT_PTX_LDU_GLOBAL_i8avar,
1311                                     NVPTX::INT_PTX_LDU_GLOBAL_i16avar,
1312                                     NVPTX::INT_PTX_LDU_GLOBAL_i32avar,
1313                                     NVPTX::INT_PTX_LDU_GLOBAL_i64avar,
1314                                     NVPTX::INT_PTX_LDU_GLOBAL_f16avar,
1315                                     NVPTX::INT_PTX_LDU_GLOBAL_f16x2avar,
1316                                     NVPTX::INT_PTX_LDU_GLOBAL_f32avar,
1317                                     NVPTX::INT_PTX_LDU_GLOBAL_f64avar);
1318      break;
1319    case NVPTXISD::LoadV2:
1320    case NVPTXISD::LDGV2:
1321      Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1322                                   NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar,
1323                                   NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar,
1324                                   NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar,
1325                                   NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar,
1326                                   NVPTX::INT_PTX_LDG_G_v2f16_ELE_avar,
1327                                   NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_avar,
1328                                   NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar,
1329                                   NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar);
1330      break;
1331    case NVPTXISD::LDUV2:
1332      Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1333                                   NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar,
1334                                   NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar,
1335                                   NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar,
1336                                   NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar,
1337                                   NVPTX::INT_PTX_LDU_G_v2f16_ELE_avar,
1338                                   NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_avar,
1339                                   NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar,
1340                                   NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar);
1341      break;
1342    case NVPTXISD::LoadV4:
1343    case NVPTXISD::LDGV4:
1344      Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1345                               NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar,
1346                               NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar,
1347                               NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar, None,
1348                               NVPTX::INT_PTX_LDG_G_v4f16_ELE_avar,
1349                               NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_avar,
1350                               NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar, None);
1351      break;
1352    case NVPTXISD::LDUV4:
1353      Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1354                               NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar,
1355                               NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar,
1356                               NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar, None,
1357                               NVPTX::INT_PTX_LDU_G_v4f16_ELE_avar,
1358                               NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_avar,
1359                               NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar, None);
1360      break;
1361    }
1362    if (!Opcode)
1363      return false;
1364    SDValue Ops[] = { Addr, Chain };
1365    LD = CurDAG->getMachineNode(Opcode.getValue(), DL, InstVTList, Ops);
1366  } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
1367                          : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
1368    if (TM.is64Bit()) {
1369      switch (N->getOpcode()) {
1370      default:
1371        return false;
1372      case ISD::LOAD:
1373      case ISD::INTRINSIC_W_CHAIN:
1374        if (IsLDG)
1375          Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1376                                       NVPTX::INT_PTX_LDG_GLOBAL_i8ari64,
1377                                       NVPTX::INT_PTX_LDG_GLOBAL_i16ari64,
1378                                       NVPTX::INT_PTX_LDG_GLOBAL_i32ari64,
1379                                       NVPTX::INT_PTX_LDG_GLOBAL_i64ari64,
1380                                       NVPTX::INT_PTX_LDG_GLOBAL_f16ari64,
1381                                       NVPTX::INT_PTX_LDG_GLOBAL_f16x2ari64,
1382                                       NVPTX::INT_PTX_LDG_GLOBAL_f32ari64,
1383                                       NVPTX::INT_PTX_LDG_GLOBAL_f64ari64);
1384        else
1385          Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1386                                       NVPTX::INT_PTX_LDU_GLOBAL_i8ari64,
1387                                       NVPTX::INT_PTX_LDU_GLOBAL_i16ari64,
1388                                       NVPTX::INT_PTX_LDU_GLOBAL_i32ari64,
1389                                       NVPTX::INT_PTX_LDU_GLOBAL_i64ari64,
1390                                       NVPTX::INT_PTX_LDU_GLOBAL_f16ari64,
1391                                       NVPTX::INT_PTX_LDU_GLOBAL_f16x2ari64,
1392                                       NVPTX::INT_PTX_LDU_GLOBAL_f32ari64,
1393                                       NVPTX::INT_PTX_LDU_GLOBAL_f64ari64);
1394        break;
1395      case NVPTXISD::LoadV2:
1396      case NVPTXISD::LDGV2:
1397        Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1398                                     NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64,
1399                                     NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64,
1400                                     NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64,
1401                                     NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64,
1402                                     NVPTX::INT_PTX_LDG_G_v2f16_ELE_ari64,
1403                                     NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_ari64,
1404                                     NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64,
1405                                     NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64);
1406        break;
1407      case NVPTXISD::LDUV2:
1408        Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1409                                     NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64,
1410                                     NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64,
1411                                     NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64,
1412                                     NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64,
1413                                     NVPTX::INT_PTX_LDU_G_v2f16_ELE_ari64,
1414                                     NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_ari64,
1415                                     NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64,
1416                                     NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64);
1417        break;
1418      case NVPTXISD::LoadV4:
1419      case NVPTXISD::LDGV4:
1420        Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1421                                 NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64,
1422                                 NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64,
1423                                 NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64, None,
1424                                 NVPTX::INT_PTX_LDG_G_v4f16_ELE_ari64,
1425                                 NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_ari64,
1426                                 NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64, None);
1427        break;
1428      case NVPTXISD::LDUV4:
1429        Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1430                                 NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64,
1431                                 NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64,
1432                                 NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64, None,
1433                                 NVPTX::INT_PTX_LDU_G_v4f16_ELE_ari64,
1434                                 NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_ari64,
1435                                 NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64, None);
1436        break;
1437      }
1438    } else {
1439      switch (N->getOpcode()) {
1440      default:
1441        return false;
1442      case ISD::LOAD:
1443      case ISD::INTRINSIC_W_CHAIN:
1444        if (IsLDG)
1445          Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1446                                       NVPTX::INT_PTX_LDG_GLOBAL_i8ari,
1447                                       NVPTX::INT_PTX_LDG_GLOBAL_i16ari,
1448                                       NVPTX::INT_PTX_LDG_GLOBAL_i32ari,
1449                                       NVPTX::INT_PTX_LDG_GLOBAL_i64ari,
1450                                       NVPTX::INT_PTX_LDG_GLOBAL_f16ari,
1451                                       NVPTX::INT_PTX_LDG_GLOBAL_f16x2ari,
1452                                       NVPTX::INT_PTX_LDG_GLOBAL_f32ari,
1453                                       NVPTX::INT_PTX_LDG_GLOBAL_f64ari);
1454        else
1455          Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1456                                       NVPTX::INT_PTX_LDU_GLOBAL_i8ari,
1457                                       NVPTX::INT_PTX_LDU_GLOBAL_i16ari,
1458                                       NVPTX::INT_PTX_LDU_GLOBAL_i32ari,
1459                                       NVPTX::INT_PTX_LDU_GLOBAL_i64ari,
1460                                       NVPTX::INT_PTX_LDU_GLOBAL_f16ari,
1461                                       NVPTX::INT_PTX_LDU_GLOBAL_f16x2ari,
1462                                       NVPTX::INT_PTX_LDU_GLOBAL_f32ari,
1463                                       NVPTX::INT_PTX_LDU_GLOBAL_f64ari);
1464        break;
1465      case NVPTXISD::LoadV2:
1466      case NVPTXISD::LDGV2:
1467        Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1468                                     NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32,
1469                                     NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32,
1470                                     NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32,
1471                                     NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32,
1472                                     NVPTX::INT_PTX_LDG_G_v2f16_ELE_ari32,
1473                                     NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_ari32,
1474                                     NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32,
1475                                     NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32);
1476        break;
1477      case NVPTXISD::LDUV2:
1478        Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1479                                     NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32,
1480                                     NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32,
1481                                     NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32,
1482                                     NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32,
1483                                     NVPTX::INT_PTX_LDU_G_v2f16_ELE_ari32,
1484                                     NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_ari32,
1485                                     NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32,
1486                                     NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32);
1487        break;
1488      case NVPTXISD::LoadV4:
1489      case NVPTXISD::LDGV4:
1490        Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1491                                 NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32,
1492                                 NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32,
1493                                 NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32, None,
1494                                 NVPTX::INT_PTX_LDG_G_v4f16_ELE_ari32,
1495                                 NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_ari32,
1496                                 NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32, None);
1497        break;
1498      case NVPTXISD::LDUV4:
1499        Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1500                                 NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32,
1501                                 NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32,
1502                                 NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32, None,
1503                                 NVPTX::INT_PTX_LDU_G_v4f16_ELE_ari32,
1504                                 NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_ari32,
1505                                 NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32, None);
1506        break;
1507      }
1508    }
1509    if (!Opcode)
1510      return false;
1511    SDValue Ops[] = {Base, Offset, Chain};
1512    LD = CurDAG->getMachineNode(Opcode.getValue(), DL, InstVTList, Ops);
1513  } else {
1514    if (TM.is64Bit()) {
1515      switch (N->getOpcode()) {
1516      default:
1517        return false;
1518      case ISD::LOAD:
1519      case ISD::INTRINSIC_W_CHAIN:
1520        if (IsLDG)
1521          Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1522                                       NVPTX::INT_PTX_LDG_GLOBAL_i8areg64,
1523                                       NVPTX::INT_PTX_LDG_GLOBAL_i16areg64,
1524                                       NVPTX::INT_PTX_LDG_GLOBAL_i32areg64,
1525                                       NVPTX::INT_PTX_LDG_GLOBAL_i64areg64,
1526                                       NVPTX::INT_PTX_LDG_GLOBAL_f16areg64,
1527                                       NVPTX::INT_PTX_LDG_GLOBAL_f16x2areg64,
1528                                       NVPTX::INT_PTX_LDG_GLOBAL_f32areg64,
1529                                       NVPTX::INT_PTX_LDG_GLOBAL_f64areg64);
1530        else
1531          Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1532                                       NVPTX::INT_PTX_LDU_GLOBAL_i8areg64,
1533                                       NVPTX::INT_PTX_LDU_GLOBAL_i16areg64,
1534                                       NVPTX::INT_PTX_LDU_GLOBAL_i32areg64,
1535                                       NVPTX::INT_PTX_LDU_GLOBAL_i64areg64,
1536                                       NVPTX::INT_PTX_LDU_GLOBAL_f16areg64,
1537                                       NVPTX::INT_PTX_LDU_GLOBAL_f16x2areg64,
1538                                       NVPTX::INT_PTX_LDU_GLOBAL_f32areg64,
1539                                       NVPTX::INT_PTX_LDU_GLOBAL_f64areg64);
1540        break;
1541      case NVPTXISD::LoadV2:
1542      case NVPTXISD::LDGV2:
1543        Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1544                                     NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64,
1545                                     NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64,
1546                                     NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64,
1547                                     NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64,
1548                                     NVPTX::INT_PTX_LDG_G_v2f16_ELE_areg64,
1549                                     NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_areg64,
1550                                     NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64,
1551                                     NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64);
1552        break;
1553      case NVPTXISD::LDUV2:
1554        Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1555                                     NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64,
1556                                     NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64,
1557                                     NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64,
1558                                     NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64,
1559                                     NVPTX::INT_PTX_LDU_G_v2f16_ELE_areg64,
1560                                     NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_areg64,
1561                                     NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64,
1562                                     NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64);
1563        break;
1564      case NVPTXISD::LoadV4:
1565      case NVPTXISD::LDGV4:
1566        Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1567                                 NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64,
1568                                 NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64,
1569                                 NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64, None,
1570                                 NVPTX::INT_PTX_LDG_G_v4f16_ELE_areg64,
1571                                 NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_areg64,
1572                                 NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64, None);
1573        break;
1574      case NVPTXISD::LDUV4:
1575        Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1576                                 NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64,
1577                                 NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64,
1578                                 NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64, None,
1579                                 NVPTX::INT_PTX_LDU_G_v4f16_ELE_areg64,
1580                                 NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_areg64,
1581                                 NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64, None);
1582        break;
1583      }
1584    } else {
1585      switch (N->getOpcode()) {
1586      default:
1587        return false;
1588      case ISD::LOAD:
1589      case ISD::INTRINSIC_W_CHAIN:
1590        if (IsLDG)
1591          Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1592                                   NVPTX::INT_PTX_LDG_GLOBAL_i8areg,
1593                                   NVPTX::INT_PTX_LDG_GLOBAL_i16areg,
1594                                   NVPTX::INT_PTX_LDG_GLOBAL_i32areg,
1595                                   NVPTX::INT_PTX_LDG_GLOBAL_i64areg,
1596                                   NVPTX::INT_PTX_LDG_GLOBAL_f16areg,
1597                                   NVPTX::INT_PTX_LDG_GLOBAL_f16x2areg,
1598                                   NVPTX::INT_PTX_LDG_GLOBAL_f32areg,
1599                                   NVPTX::INT_PTX_LDG_GLOBAL_f64areg);
1600        else
1601          Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1602                                   NVPTX::INT_PTX_LDU_GLOBAL_i8areg,
1603                                   NVPTX::INT_PTX_LDU_GLOBAL_i16areg,
1604                                   NVPTX::INT_PTX_LDU_GLOBAL_i32areg,
1605                                   NVPTX::INT_PTX_LDU_GLOBAL_i64areg,
1606                                   NVPTX::INT_PTX_LDU_GLOBAL_f16areg,
1607                                   NVPTX::INT_PTX_LDU_GLOBAL_f16x2areg,
1608                                   NVPTX::INT_PTX_LDU_GLOBAL_f32areg,
1609                                   NVPTX::INT_PTX_LDU_GLOBAL_f64areg);
1610        break;
1611      case NVPTXISD::LoadV2:
1612      case NVPTXISD::LDGV2:
1613        Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1614                                 NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32,
1615                                 NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32,
1616                                 NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32,
1617                                 NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32,
1618                                 NVPTX::INT_PTX_LDG_G_v2f16_ELE_areg32,
1619                                 NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_areg32,
1620                                 NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32,
1621                                 NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32);
1622        break;
1623      case NVPTXISD::LDUV2:
1624        Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1625                                 NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32,
1626                                 NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32,
1627                                 NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32,
1628                                 NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32,
1629                                 NVPTX::INT_PTX_LDU_G_v2f16_ELE_areg32,
1630                                 NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_areg32,
1631                                 NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32,
1632                                 NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32);
1633        break;
1634      case NVPTXISD::LoadV4:
1635      case NVPTXISD::LDGV4:
1636        Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1637                                 NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32,
1638                                 NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32,
1639                                 NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32, None,
1640                                 NVPTX::INT_PTX_LDG_G_v4f16_ELE_areg32,
1641                                 NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_areg32,
1642                                 NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32, None);
1643        break;
1644      case NVPTXISD::LDUV4:
1645        Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1646                                 NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32,
1647                                 NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32,
1648                                 NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32, None,
1649                                 NVPTX::INT_PTX_LDU_G_v4f16_ELE_areg32,
1650                                 NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_areg32,
1651                                 NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32, None);
1652        break;
1653      }
1654    }
1655    if (!Opcode)
1656      return false;
1657    SDValue Ops[] = { Op1, Chain };
1658    LD = CurDAG->getMachineNode(Opcode.getValue(), DL, InstVTList, Ops);
1659  }
1660
1661  MachineMemOperand *MemRef = Mem->getMemOperand();
1662  CurDAG->setNodeMemRefs(cast<MachineSDNode>(LD), {MemRef});
1663
1664  // For automatic generation of LDG (through SelectLoad[Vector], not the
1665  // intrinsics), we may have an extending load like:
1666  //
1667  //   i32,ch = load<LD1[%data1(addrspace=1)], zext from i8> t0, t7, undef:i64
1668  //
1669  // In this case, the matching logic above will select a load for the original
1670  // memory type (in this case, i8) and our types will not match (the node needs
1671  // to return an i32 in this case). Our LDG/LDU nodes do not support the
1672  // concept of sign-/zero-extension, so emulate it here by adding an explicit
1673  // CVT instruction. Ptxas should clean up any redundancies here.
1674
1675  EVT OrigType = N->getValueType(0);
1676  LoadSDNode *LdNode = dyn_cast<LoadSDNode>(N);
1677
1678  if (OrigType != EltVT && LdNode) {
1679    // We have an extending-load. The instruction we selected operates on the
1680    // smaller type, but the SDNode we are replacing has the larger type. We
1681    // need to emit a CVT to make the types match.
1682    bool IsSigned = LdNode->getExtensionType() == ISD::SEXTLOAD;
1683    unsigned CvtOpc = GetConvertOpcode(OrigType.getSimpleVT(),
1684                                       EltVT.getSimpleVT(), IsSigned);
1685
1686    // For each output value, apply the manual sign/zero-extension and make sure
1687    // all users of the load go through that CVT.
1688    for (unsigned i = 0; i != NumElts; ++i) {
1689      SDValue Res(LD, i);
1690      SDValue OrigVal(N, i);
1691
1692      SDNode *CvtNode =
1693        CurDAG->getMachineNode(CvtOpc, DL, OrigType, Res,
1694                               CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE,
1695                                                         DL, MVT::i32));
1696      ReplaceUses(OrigVal, SDValue(CvtNode, 0));
1697    }
1698  }
1699
1700  ReplaceNode(N, LD);
1701  return true;
1702}
1703
1704bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
1705  SDLoc dl(N);
1706  MemSDNode *ST = cast<MemSDNode>(N);
1707  assert(ST->writeMem() && "Expected store");
1708  StoreSDNode *PlainStore = dyn_cast<StoreSDNode>(N);
1709  AtomicSDNode *AtomicStore = dyn_cast<AtomicSDNode>(N);
1710  assert((PlainStore || AtomicStore) && "Expected store");
1711  EVT StoreVT = ST->getMemoryVT();
1712  SDNode *NVPTXST = nullptr;
1713
1714  // do not support pre/post inc/dec
1715  if (PlainStore && PlainStore->isIndexed())
1716    return false;
1717
1718  if (!StoreVT.isSimple())
1719    return false;
1720
1721  AtomicOrdering Ordering = ST->getOrdering();
1722  // In order to lower atomic loads with stronger guarantees we would need to
1723  // use store.release or insert fences. However these features were only added
1724  // with PTX ISA 6.0 / sm_70.
1725  // TODO: Check if we can actually use the new instructions and implement them.
1726  if (isStrongerThanMonotonic(Ordering))
1727    return false;
1728
1729  // Address Space Setting
1730  unsigned int CodeAddrSpace = getCodeAddrSpace(ST);
1731  unsigned int PointerSize =
1732      CurDAG->getDataLayout().getPointerSizeInBits(ST->getAddressSpace());
1733
1734  // Volatile Setting
1735  // - .volatile is only available for .global and .shared
1736  // - .volatile has the same memory synchronization semantics as .relaxed.sys
1737  bool isVolatile = ST->isVolatile() || Ordering == AtomicOrdering::Monotonic;
1738  if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
1739      CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
1740      CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
1741    isVolatile = false;
1742
1743  // Vector Setting
1744  MVT SimpleVT = StoreVT.getSimpleVT();
1745  unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
1746
1747  // Type Setting: toType + toTypeWidth
1748  // - for integer type, always use 'u'
1749  //
1750  MVT ScalarVT = SimpleVT.getScalarType();
1751  unsigned toTypeWidth = ScalarVT.getSizeInBits();
1752  if (SimpleVT.isVector()) {
1753    assert(StoreVT == MVT::v2f16 && "Unexpected vector type");
1754    // v2f16 is stored using st.b32
1755    toTypeWidth = 32;
1756  }
1757
1758  unsigned int toType;
1759  if (ScalarVT.isFloatingPoint())
1760    // f16 uses .b16 as its storage type.
1761    toType = ScalarVT.SimpleTy == MVT::f16 ? NVPTX::PTXLdStInstCode::Untyped
1762                                           : NVPTX::PTXLdStInstCode::Float;
1763  else
1764    toType = NVPTX::PTXLdStInstCode::Unsigned;
1765
1766  // Create the machine instruction DAG
1767  SDValue Chain = ST->getChain();
1768  SDValue Value = PlainStore ? PlainStore->getValue() : AtomicStore->getVal();
1769  SDValue BasePtr = ST->getBasePtr();
1770  SDValue Addr;
1771  SDValue Offset, Base;
1772  Optional<unsigned> Opcode;
1773  MVT::SimpleValueType SourceVT =
1774      Value.getNode()->getSimpleValueType(0).SimpleTy;
1775
1776  if (SelectDirectAddr(BasePtr, Addr)) {
1777    Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_avar, NVPTX::ST_i16_avar,
1778                             NVPTX::ST_i32_avar, NVPTX::ST_i64_avar,
1779                             NVPTX::ST_f16_avar, NVPTX::ST_f16x2_avar,
1780                             NVPTX::ST_f32_avar, NVPTX::ST_f64_avar);
1781    if (!Opcode)
1782      return false;
1783    SDValue Ops[] = {Value,
1784                     getI32Imm(isVolatile, dl),
1785                     getI32Imm(CodeAddrSpace, dl),
1786                     getI32Imm(vecType, dl),
1787                     getI32Imm(toType, dl),
1788                     getI32Imm(toTypeWidth, dl),
1789                     Addr,
1790                     Chain};
1791    NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops);
1792  } else if (PointerSize == 64
1793                 ? SelectADDRsi64(BasePtr.getNode(), BasePtr, Base, Offset)
1794                 : SelectADDRsi(BasePtr.getNode(), BasePtr, Base, Offset)) {
1795    Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_asi, NVPTX::ST_i16_asi,
1796                             NVPTX::ST_i32_asi, NVPTX::ST_i64_asi,
1797                             NVPTX::ST_f16_asi, NVPTX::ST_f16x2_asi,
1798                             NVPTX::ST_f32_asi, NVPTX::ST_f64_asi);
1799    if (!Opcode)
1800      return false;
1801    SDValue Ops[] = {Value,
1802                     getI32Imm(isVolatile, dl),
1803                     getI32Imm(CodeAddrSpace, dl),
1804                     getI32Imm(vecType, dl),
1805                     getI32Imm(toType, dl),
1806                     getI32Imm(toTypeWidth, dl),
1807                     Base,
1808                     Offset,
1809                     Chain};
1810    NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops);
1811  } else if (PointerSize == 64
1812                 ? SelectADDRri64(BasePtr.getNode(), BasePtr, Base, Offset)
1813                 : SelectADDRri(BasePtr.getNode(), BasePtr, Base, Offset)) {
1814    if (PointerSize == 64)
1815      Opcode = pickOpcodeForVT(
1816          SourceVT, NVPTX::ST_i8_ari_64, NVPTX::ST_i16_ari_64,
1817          NVPTX::ST_i32_ari_64, NVPTX::ST_i64_ari_64, NVPTX::ST_f16_ari_64,
1818          NVPTX::ST_f16x2_ari_64, NVPTX::ST_f32_ari_64, NVPTX::ST_f64_ari_64);
1819    else
1820      Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_ari, NVPTX::ST_i16_ari,
1821                               NVPTX::ST_i32_ari, NVPTX::ST_i64_ari,
1822                               NVPTX::ST_f16_ari, NVPTX::ST_f16x2_ari,
1823                               NVPTX::ST_f32_ari, NVPTX::ST_f64_ari);
1824    if (!Opcode)
1825      return false;
1826
1827    SDValue Ops[] = {Value,
1828                     getI32Imm(isVolatile, dl),
1829                     getI32Imm(CodeAddrSpace, dl),
1830                     getI32Imm(vecType, dl),
1831                     getI32Imm(toType, dl),
1832                     getI32Imm(toTypeWidth, dl),
1833                     Base,
1834                     Offset,
1835                     Chain};
1836    NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops);
1837  } else {
1838    if (PointerSize == 64)
1839      Opcode =
1840          pickOpcodeForVT(SourceVT, NVPTX::ST_i8_areg_64, NVPTX::ST_i16_areg_64,
1841                          NVPTX::ST_i32_areg_64, NVPTX::ST_i64_areg_64,
1842                          NVPTX::ST_f16_areg_64, NVPTX::ST_f16x2_areg_64,
1843                          NVPTX::ST_f32_areg_64, NVPTX::ST_f64_areg_64);
1844    else
1845      Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_areg, NVPTX::ST_i16_areg,
1846                               NVPTX::ST_i32_areg, NVPTX::ST_i64_areg,
1847                               NVPTX::ST_f16_areg, NVPTX::ST_f16x2_areg,
1848                               NVPTX::ST_f32_areg, NVPTX::ST_f64_areg);
1849    if (!Opcode)
1850      return false;
1851    SDValue Ops[] = {Value,
1852                     getI32Imm(isVolatile, dl),
1853                     getI32Imm(CodeAddrSpace, dl),
1854                     getI32Imm(vecType, dl),
1855                     getI32Imm(toType, dl),
1856                     getI32Imm(toTypeWidth, dl),
1857                     BasePtr,
1858                     Chain};
1859    NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops);
1860  }
1861
1862  if (!NVPTXST)
1863    return false;
1864
1865  MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
1866  CurDAG->setNodeMemRefs(cast<MachineSDNode>(NVPTXST), {MemRef});
1867  ReplaceNode(N, NVPTXST);
1868  return true;
1869}
1870
1871bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
1872  SDValue Chain = N->getOperand(0);
1873  SDValue Op1 = N->getOperand(1);
1874  SDValue Addr, Offset, Base;
1875  Optional<unsigned> Opcode;
1876  SDLoc DL(N);
1877  SDNode *ST;
1878  EVT EltVT = Op1.getValueType();
1879  MemSDNode *MemSD = cast<MemSDNode>(N);
1880  EVT StoreVT = MemSD->getMemoryVT();
1881
1882  // Address Space Setting
1883  unsigned CodeAddrSpace = getCodeAddrSpace(MemSD);
1884  if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) {
1885    report_fatal_error("Cannot store to pointer that points to constant "
1886                       "memory space");
1887  }
1888  unsigned int PointerSize =
1889      CurDAG->getDataLayout().getPointerSizeInBits(MemSD->getAddressSpace());
1890
1891  // Volatile Setting
1892  // - .volatile is only availalble for .global and .shared
1893  bool IsVolatile = MemSD->isVolatile();
1894  if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
1895      CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
1896      CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
1897    IsVolatile = false;
1898
1899  // Type Setting: toType + toTypeWidth
1900  // - for integer type, always use 'u'
1901  assert(StoreVT.isSimple() && "Store value is not simple");
1902  MVT ScalarVT = StoreVT.getSimpleVT().getScalarType();
1903  unsigned ToTypeWidth = ScalarVT.getSizeInBits();
1904  unsigned ToType;
1905  if (ScalarVT.isFloatingPoint())
1906    ToType = ScalarVT.SimpleTy == MVT::f16 ? NVPTX::PTXLdStInstCode::Untyped
1907                                           : NVPTX::PTXLdStInstCode::Float;
1908  else
1909    ToType = NVPTX::PTXLdStInstCode::Unsigned;
1910
1911  SmallVector<SDValue, 12> StOps;
1912  SDValue N2;
1913  unsigned VecType;
1914
1915  switch (N->getOpcode()) {
1916  case NVPTXISD::StoreV2:
1917    VecType = NVPTX::PTXLdStInstCode::V2;
1918    StOps.push_back(N->getOperand(1));
1919    StOps.push_back(N->getOperand(2));
1920    N2 = N->getOperand(3);
1921    break;
1922  case NVPTXISD::StoreV4:
1923    VecType = NVPTX::PTXLdStInstCode::V4;
1924    StOps.push_back(N->getOperand(1));
1925    StOps.push_back(N->getOperand(2));
1926    StOps.push_back(N->getOperand(3));
1927    StOps.push_back(N->getOperand(4));
1928    N2 = N->getOperand(5);
1929    break;
1930  default:
1931    return false;
1932  }
1933
1934  // v8f16 is a special case. PTX doesn't have st.v8.f16
1935  // instruction. Instead, we split the vector into v2f16 chunks and
1936  // store them with st.v4.b32.
1937  if (EltVT == MVT::v2f16) {
1938    assert(N->getOpcode() == NVPTXISD::StoreV4 && "Unexpected load opcode.");
1939    EltVT = MVT::i32;
1940    ToType = NVPTX::PTXLdStInstCode::Untyped;
1941    ToTypeWidth = 32;
1942  }
1943
1944  StOps.push_back(getI32Imm(IsVolatile, DL));
1945  StOps.push_back(getI32Imm(CodeAddrSpace, DL));
1946  StOps.push_back(getI32Imm(VecType, DL));
1947  StOps.push_back(getI32Imm(ToType, DL));
1948  StOps.push_back(getI32Imm(ToTypeWidth, DL));
1949
1950  if (SelectDirectAddr(N2, Addr)) {
1951    switch (N->getOpcode()) {
1952    default:
1953      return false;
1954    case NVPTXISD::StoreV2:
1955      Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1956                               NVPTX::STV_i8_v2_avar, NVPTX::STV_i16_v2_avar,
1957                               NVPTX::STV_i32_v2_avar, NVPTX::STV_i64_v2_avar,
1958                               NVPTX::STV_f16_v2_avar, NVPTX::STV_f16x2_v2_avar,
1959                               NVPTX::STV_f32_v2_avar, NVPTX::STV_f64_v2_avar);
1960      break;
1961    case NVPTXISD::StoreV4:
1962      Opcode =
1963          pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_avar,
1964                          NVPTX::STV_i16_v4_avar, NVPTX::STV_i32_v4_avar, None,
1965                          NVPTX::STV_f16_v4_avar, NVPTX::STV_f16x2_v4_avar,
1966                          NVPTX::STV_f32_v4_avar, None);
1967      break;
1968    }
1969    StOps.push_back(Addr);
1970  } else if (PointerSize == 64 ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
1971                               : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
1972    switch (N->getOpcode()) {
1973    default:
1974      return false;
1975    case NVPTXISD::StoreV2:
1976      Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1977                               NVPTX::STV_i8_v2_asi, NVPTX::STV_i16_v2_asi,
1978                               NVPTX::STV_i32_v2_asi, NVPTX::STV_i64_v2_asi,
1979                               NVPTX::STV_f16_v2_asi, NVPTX::STV_f16x2_v2_asi,
1980                               NVPTX::STV_f32_v2_asi, NVPTX::STV_f64_v2_asi);
1981      break;
1982    case NVPTXISD::StoreV4:
1983      Opcode =
1984          pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_asi,
1985                          NVPTX::STV_i16_v4_asi, NVPTX::STV_i32_v4_asi, None,
1986                          NVPTX::STV_f16_v4_asi, NVPTX::STV_f16x2_v4_asi,
1987                          NVPTX::STV_f32_v4_asi, None);
1988      break;
1989    }
1990    StOps.push_back(Base);
1991    StOps.push_back(Offset);
1992  } else if (PointerSize == 64 ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
1993                               : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
1994    if (PointerSize == 64) {
1995      switch (N->getOpcode()) {
1996      default:
1997        return false;
1998      case NVPTXISD::StoreV2:
1999        Opcode = pickOpcodeForVT(
2000            EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_ari_64,
2001            NVPTX::STV_i16_v2_ari_64, NVPTX::STV_i32_v2_ari_64,
2002            NVPTX::STV_i64_v2_ari_64, NVPTX::STV_f16_v2_ari_64,
2003            NVPTX::STV_f16x2_v2_ari_64, NVPTX::STV_f32_v2_ari_64,
2004            NVPTX::STV_f64_v2_ari_64);
2005        break;
2006      case NVPTXISD::StoreV4:
2007        Opcode = pickOpcodeForVT(
2008            EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_ari_64,
2009            NVPTX::STV_i16_v4_ari_64, NVPTX::STV_i32_v4_ari_64, None,
2010            NVPTX::STV_f16_v4_ari_64, NVPTX::STV_f16x2_v4_ari_64,
2011            NVPTX::STV_f32_v4_ari_64, None);
2012        break;
2013      }
2014    } else {
2015      switch (N->getOpcode()) {
2016      default:
2017        return false;
2018      case NVPTXISD::StoreV2:
2019        Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
2020                                 NVPTX::STV_i8_v2_ari, NVPTX::STV_i16_v2_ari,
2021                                 NVPTX::STV_i32_v2_ari, NVPTX::STV_i64_v2_ari,
2022                                 NVPTX::STV_f16_v2_ari, NVPTX::STV_f16x2_v2_ari,
2023                                 NVPTX::STV_f32_v2_ari, NVPTX::STV_f64_v2_ari);
2024        break;
2025      case NVPTXISD::StoreV4:
2026        Opcode =
2027            pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_ari,
2028                            NVPTX::STV_i16_v4_ari, NVPTX::STV_i32_v4_ari, None,
2029                            NVPTX::STV_f16_v4_ari, NVPTX::STV_f16x2_v4_ari,
2030                            NVPTX::STV_f32_v4_ari, None);
2031        break;
2032      }
2033    }
2034    StOps.push_back(Base);
2035    StOps.push_back(Offset);
2036  } else {
2037    if (PointerSize == 64) {
2038      switch (N->getOpcode()) {
2039      default:
2040        return false;
2041      case NVPTXISD::StoreV2:
2042        Opcode = pickOpcodeForVT(
2043            EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_areg_64,
2044            NVPTX::STV_i16_v2_areg_64, NVPTX::STV_i32_v2_areg_64,
2045            NVPTX::STV_i64_v2_areg_64, NVPTX::STV_f16_v2_areg_64,
2046            NVPTX::STV_f16x2_v2_areg_64, NVPTX::STV_f32_v2_areg_64,
2047            NVPTX::STV_f64_v2_areg_64);
2048        break;
2049      case NVPTXISD::StoreV4:
2050        Opcode = pickOpcodeForVT(
2051            EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_areg_64,
2052            NVPTX::STV_i16_v4_areg_64, NVPTX::STV_i32_v4_areg_64, None,
2053            NVPTX::STV_f16_v4_areg_64, NVPTX::STV_f16x2_v4_areg_64,
2054            NVPTX::STV_f32_v4_areg_64, None);
2055        break;
2056      }
2057    } else {
2058      switch (N->getOpcode()) {
2059      default:
2060        return false;
2061      case NVPTXISD::StoreV2:
2062        Opcode =
2063            pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_areg,
2064                            NVPTX::STV_i16_v2_areg, NVPTX::STV_i32_v2_areg,
2065                            NVPTX::STV_i64_v2_areg, NVPTX::STV_f16_v2_areg,
2066                            NVPTX::STV_f16x2_v2_areg, NVPTX::STV_f32_v2_areg,
2067                            NVPTX::STV_f64_v2_areg);
2068        break;
2069      case NVPTXISD::StoreV4:
2070        Opcode =
2071            pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_areg,
2072                            NVPTX::STV_i16_v4_areg, NVPTX::STV_i32_v4_areg, None,
2073                            NVPTX::STV_f16_v4_areg, NVPTX::STV_f16x2_v4_areg,
2074                            NVPTX::STV_f32_v4_areg, None);
2075        break;
2076      }
2077    }
2078    StOps.push_back(N2);
2079  }
2080
2081  if (!Opcode)
2082    return false;
2083
2084  StOps.push_back(Chain);
2085
2086  ST = CurDAG->getMachineNode(Opcode.getValue(), DL, MVT::Other, StOps);
2087
2088  MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
2089  CurDAG->setNodeMemRefs(cast<MachineSDNode>(ST), {MemRef});
2090
2091  ReplaceNode(N, ST);
2092  return true;
2093}
2094
2095bool NVPTXDAGToDAGISel::tryLoadParam(SDNode *Node) {
2096  SDValue Chain = Node->getOperand(0);
2097  SDValue Offset = Node->getOperand(2);
2098  SDValue Flag = Node->getOperand(3);
2099  SDLoc DL(Node);
2100  MemSDNode *Mem = cast<MemSDNode>(Node);
2101
2102  unsigned VecSize;
2103  switch (Node->getOpcode()) {
2104  default:
2105    return false;
2106  case NVPTXISD::LoadParam:
2107    VecSize = 1;
2108    break;
2109  case NVPTXISD::LoadParamV2:
2110    VecSize = 2;
2111    break;
2112  case NVPTXISD::LoadParamV4:
2113    VecSize = 4;
2114    break;
2115  }
2116
2117  EVT EltVT = Node->getValueType(0);
2118  EVT MemVT = Mem->getMemoryVT();
2119
2120  Optional<unsigned> Opcode;
2121
2122  switch (VecSize) {
2123  default:
2124    return false;
2125  case 1:
2126    Opcode = pickOpcodeForVT(MemVT.getSimpleVT().SimpleTy,
2127                             NVPTX::LoadParamMemI8, NVPTX::LoadParamMemI16,
2128                             NVPTX::LoadParamMemI32, NVPTX::LoadParamMemI64,
2129                             NVPTX::LoadParamMemF16, NVPTX::LoadParamMemF16x2,
2130                             NVPTX::LoadParamMemF32, NVPTX::LoadParamMemF64);
2131    break;
2132  case 2:
2133    Opcode =
2134        pickOpcodeForVT(MemVT.getSimpleVT().SimpleTy, NVPTX::LoadParamMemV2I8,
2135                        NVPTX::LoadParamMemV2I16, NVPTX::LoadParamMemV2I32,
2136                        NVPTX::LoadParamMemV2I64, NVPTX::LoadParamMemV2F16,
2137                        NVPTX::LoadParamMemV2F16x2, NVPTX::LoadParamMemV2F32,
2138                        NVPTX::LoadParamMemV2F64);
2139    break;
2140  case 4:
2141    Opcode = pickOpcodeForVT(
2142        MemVT.getSimpleVT().SimpleTy, NVPTX::LoadParamMemV4I8,
2143        NVPTX::LoadParamMemV4I16, NVPTX::LoadParamMemV4I32, None,
2144        NVPTX::LoadParamMemV4F16, NVPTX::LoadParamMemV4F16x2,
2145        NVPTX::LoadParamMemV4F32, None);
2146    break;
2147  }
2148  if (!Opcode)
2149    return false;
2150
2151  SDVTList VTs;
2152  if (VecSize == 1) {
2153    VTs = CurDAG->getVTList(EltVT, MVT::Other, MVT::Glue);
2154  } else if (VecSize == 2) {
2155    VTs = CurDAG->getVTList(EltVT, EltVT, MVT::Other, MVT::Glue);
2156  } else {
2157    EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue };
2158    VTs = CurDAG->getVTList(EVTs);
2159  }
2160
2161  unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2162
2163  SmallVector<SDValue, 2> Ops;
2164  Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
2165  Ops.push_back(Chain);
2166  Ops.push_back(Flag);
2167
2168  ReplaceNode(Node, CurDAG->getMachineNode(Opcode.getValue(), DL, VTs, Ops));
2169  return true;
2170}
2171
2172bool NVPTXDAGToDAGISel::tryStoreRetval(SDNode *N) {
2173  SDLoc DL(N);
2174  SDValue Chain = N->getOperand(0);
2175  SDValue Offset = N->getOperand(1);
2176  unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2177  MemSDNode *Mem = cast<MemSDNode>(N);
2178
2179  // How many elements do we have?
2180  unsigned NumElts = 1;
2181  switch (N->getOpcode()) {
2182  default:
2183    return false;
2184  case NVPTXISD::StoreRetval:
2185    NumElts = 1;
2186    break;
2187  case NVPTXISD::StoreRetvalV2:
2188    NumElts = 2;
2189    break;
2190  case NVPTXISD::StoreRetvalV4:
2191    NumElts = 4;
2192    break;
2193  }
2194
2195  // Build vector of operands
2196  SmallVector<SDValue, 6> Ops;
2197  for (unsigned i = 0; i < NumElts; ++i)
2198    Ops.push_back(N->getOperand(i + 2));
2199  Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
2200  Ops.push_back(Chain);
2201
2202  // Determine target opcode
2203  // If we have an i1, use an 8-bit store. The lowering code in
2204  // NVPTXISelLowering will have already emitted an upcast.
2205  Optional<unsigned> Opcode = 0;
2206  switch (NumElts) {
2207  default:
2208    return false;
2209  case 1:
2210    Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2211                             NVPTX::StoreRetvalI8, NVPTX::StoreRetvalI16,
2212                             NVPTX::StoreRetvalI32, NVPTX::StoreRetvalI64,
2213                             NVPTX::StoreRetvalF16, NVPTX::StoreRetvalF16x2,
2214                             NVPTX::StoreRetvalF32, NVPTX::StoreRetvalF64);
2215    break;
2216  case 2:
2217    Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2218                             NVPTX::StoreRetvalV2I8, NVPTX::StoreRetvalV2I16,
2219                             NVPTX::StoreRetvalV2I32, NVPTX::StoreRetvalV2I64,
2220                             NVPTX::StoreRetvalV2F16, NVPTX::StoreRetvalV2F16x2,
2221                             NVPTX::StoreRetvalV2F32, NVPTX::StoreRetvalV2F64);
2222    break;
2223  case 4:
2224    Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2225                             NVPTX::StoreRetvalV4I8, NVPTX::StoreRetvalV4I16,
2226                             NVPTX::StoreRetvalV4I32, None,
2227                             NVPTX::StoreRetvalV4F16, NVPTX::StoreRetvalV4F16x2,
2228                             NVPTX::StoreRetvalV4F32, None);
2229    break;
2230  }
2231  if (!Opcode)
2232    return false;
2233
2234  SDNode *Ret = CurDAG->getMachineNode(Opcode.getValue(), DL, MVT::Other, Ops);
2235  MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
2236  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ret), {MemRef});
2237
2238  ReplaceNode(N, Ret);
2239  return true;
2240}
2241
2242bool NVPTXDAGToDAGISel::tryStoreParam(SDNode *N) {
2243  SDLoc DL(N);
2244  SDValue Chain = N->getOperand(0);
2245  SDValue Param = N->getOperand(1);
2246  unsigned ParamVal = cast<ConstantSDNode>(Param)->getZExtValue();
2247  SDValue Offset = N->getOperand(2);
2248  unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2249  MemSDNode *Mem = cast<MemSDNode>(N);
2250  SDValue Flag = N->getOperand(N->getNumOperands() - 1);
2251
2252  // How many elements do we have?
2253  unsigned NumElts = 1;
2254  switch (N->getOpcode()) {
2255  default:
2256    return false;
2257  case NVPTXISD::StoreParamU32:
2258  case NVPTXISD::StoreParamS32:
2259  case NVPTXISD::StoreParam:
2260    NumElts = 1;
2261    break;
2262  case NVPTXISD::StoreParamV2:
2263    NumElts = 2;
2264    break;
2265  case NVPTXISD::StoreParamV4:
2266    NumElts = 4;
2267    break;
2268  }
2269
2270  // Build vector of operands
2271  SmallVector<SDValue, 8> Ops;
2272  for (unsigned i = 0; i < NumElts; ++i)
2273    Ops.push_back(N->getOperand(i + 3));
2274  Ops.push_back(CurDAG->getTargetConstant(ParamVal, DL, MVT::i32));
2275  Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
2276  Ops.push_back(Chain);
2277  Ops.push_back(Flag);
2278
2279  // Determine target opcode
2280  // If we have an i1, use an 8-bit store. The lowering code in
2281  // NVPTXISelLowering will have already emitted an upcast.
2282  Optional<unsigned> Opcode = 0;
2283  switch (N->getOpcode()) {
2284  default:
2285    switch (NumElts) {
2286    default:
2287      return false;
2288    case 1:
2289      Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2290                               NVPTX::StoreParamI8, NVPTX::StoreParamI16,
2291                               NVPTX::StoreParamI32, NVPTX::StoreParamI64,
2292                               NVPTX::StoreParamF16, NVPTX::StoreParamF16x2,
2293                               NVPTX::StoreParamF32, NVPTX::StoreParamF64);
2294      break;
2295    case 2:
2296      Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2297                               NVPTX::StoreParamV2I8, NVPTX::StoreParamV2I16,
2298                               NVPTX::StoreParamV2I32, NVPTX::StoreParamV2I64,
2299                               NVPTX::StoreParamV2F16, NVPTX::StoreParamV2F16x2,
2300                               NVPTX::StoreParamV2F32, NVPTX::StoreParamV2F64);
2301      break;
2302    case 4:
2303      Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2304                               NVPTX::StoreParamV4I8, NVPTX::StoreParamV4I16,
2305                               NVPTX::StoreParamV4I32, None,
2306                               NVPTX::StoreParamV4F16, NVPTX::StoreParamV4F16x2,
2307                               NVPTX::StoreParamV4F32, None);
2308      break;
2309    }
2310    if (!Opcode)
2311      return false;
2312    break;
2313  // Special case: if we have a sign-extend/zero-extend node, insert the
2314  // conversion instruction first, and use that as the value operand to
2315  // the selected StoreParam node.
2316  case NVPTXISD::StoreParamU32: {
2317    Opcode = NVPTX::StoreParamI32;
2318    SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL,
2319                                                MVT::i32);
2320    SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u32_u16, DL,
2321                                         MVT::i32, Ops[0], CvtNone);
2322    Ops[0] = SDValue(Cvt, 0);
2323    break;
2324  }
2325  case NVPTXISD::StoreParamS32: {
2326    Opcode = NVPTX::StoreParamI32;
2327    SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL,
2328                                                MVT::i32);
2329    SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_s32_s16, DL,
2330                                         MVT::i32, Ops[0], CvtNone);
2331    Ops[0] = SDValue(Cvt, 0);
2332    break;
2333  }
2334  }
2335
2336  SDVTList RetVTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
2337  SDNode *Ret =
2338      CurDAG->getMachineNode(Opcode.getValue(), DL, RetVTs, Ops);
2339  MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
2340  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ret), {MemRef});
2341
2342  ReplaceNode(N, Ret);
2343  return true;
2344}
2345
2346bool NVPTXDAGToDAGISel::tryTextureIntrinsic(SDNode *N) {
2347  unsigned Opc = 0;
2348
2349  switch (N->getOpcode()) {
2350  default: return false;
2351  case NVPTXISD::Tex1DFloatS32:
2352    Opc = NVPTX::TEX_1D_F32_S32;
2353    break;
2354  case NVPTXISD::Tex1DFloatFloat:
2355    Opc = NVPTX::TEX_1D_F32_F32;
2356    break;
2357  case NVPTXISD::Tex1DFloatFloatLevel:
2358    Opc = NVPTX::TEX_1D_F32_F32_LEVEL;
2359    break;
2360  case NVPTXISD::Tex1DFloatFloatGrad:
2361    Opc = NVPTX::TEX_1D_F32_F32_GRAD;
2362    break;
2363  case NVPTXISD::Tex1DS32S32:
2364    Opc = NVPTX::TEX_1D_S32_S32;
2365    break;
2366  case NVPTXISD::Tex1DS32Float:
2367    Opc = NVPTX::TEX_1D_S32_F32;
2368    break;
2369  case NVPTXISD::Tex1DS32FloatLevel:
2370    Opc = NVPTX::TEX_1D_S32_F32_LEVEL;
2371    break;
2372  case NVPTXISD::Tex1DS32FloatGrad:
2373    Opc = NVPTX::TEX_1D_S32_F32_GRAD;
2374    break;
2375  case NVPTXISD::Tex1DU32S32:
2376    Opc = NVPTX::TEX_1D_U32_S32;
2377    break;
2378  case NVPTXISD::Tex1DU32Float:
2379    Opc = NVPTX::TEX_1D_U32_F32;
2380    break;
2381  case NVPTXISD::Tex1DU32FloatLevel:
2382    Opc = NVPTX::TEX_1D_U32_F32_LEVEL;
2383    break;
2384  case NVPTXISD::Tex1DU32FloatGrad:
2385    Opc = NVPTX::TEX_1D_U32_F32_GRAD;
2386    break;
2387  case NVPTXISD::Tex1DArrayFloatS32:
2388    Opc = NVPTX::TEX_1D_ARRAY_F32_S32;
2389    break;
2390  case NVPTXISD::Tex1DArrayFloatFloat:
2391    Opc = NVPTX::TEX_1D_ARRAY_F32_F32;
2392    break;
2393  case NVPTXISD::Tex1DArrayFloatFloatLevel:
2394    Opc = NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL;
2395    break;
2396  case NVPTXISD::Tex1DArrayFloatFloatGrad:
2397    Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD;
2398    break;
2399  case NVPTXISD::Tex1DArrayS32S32:
2400    Opc = NVPTX::TEX_1D_ARRAY_S32_S32;
2401    break;
2402  case NVPTXISD::Tex1DArrayS32Float:
2403    Opc = NVPTX::TEX_1D_ARRAY_S32_F32;
2404    break;
2405  case NVPTXISD::Tex1DArrayS32FloatLevel:
2406    Opc = NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL;
2407    break;
2408  case NVPTXISD::Tex1DArrayS32FloatGrad:
2409    Opc = NVPTX::TEX_1D_ARRAY_S32_F32_GRAD;
2410    break;
2411  case NVPTXISD::Tex1DArrayU32S32:
2412    Opc = NVPTX::TEX_1D_ARRAY_U32_S32;
2413    break;
2414  case NVPTXISD::Tex1DArrayU32Float:
2415    Opc = NVPTX::TEX_1D_ARRAY_U32_F32;
2416    break;
2417  case NVPTXISD::Tex1DArrayU32FloatLevel:
2418    Opc = NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL;
2419    break;
2420  case NVPTXISD::Tex1DArrayU32FloatGrad:
2421    Opc = NVPTX::TEX_1D_ARRAY_U32_F32_GRAD;
2422    break;
2423  case NVPTXISD::Tex2DFloatS32:
2424    Opc = NVPTX::TEX_2D_F32_S32;
2425    break;
2426  case NVPTXISD::Tex2DFloatFloat:
2427    Opc = NVPTX::TEX_2D_F32_F32;
2428    break;
2429  case NVPTXISD::Tex2DFloatFloatLevel:
2430    Opc = NVPTX::TEX_2D_F32_F32_LEVEL;
2431    break;
2432  case NVPTXISD::Tex2DFloatFloatGrad:
2433    Opc = NVPTX::TEX_2D_F32_F32_GRAD;
2434    break;
2435  case NVPTXISD::Tex2DS32S32:
2436    Opc = NVPTX::TEX_2D_S32_S32;
2437    break;
2438  case NVPTXISD::Tex2DS32Float:
2439    Opc = NVPTX::TEX_2D_S32_F32;
2440    break;
2441  case NVPTXISD::Tex2DS32FloatLevel:
2442    Opc = NVPTX::TEX_2D_S32_F32_LEVEL;
2443    break;
2444  case NVPTXISD::Tex2DS32FloatGrad:
2445    Opc = NVPTX::TEX_2D_S32_F32_GRAD;
2446    break;
2447  case NVPTXISD::Tex2DU32S32:
2448    Opc = NVPTX::TEX_2D_U32_S32;
2449    break;
2450  case NVPTXISD::Tex2DU32Float:
2451    Opc = NVPTX::TEX_2D_U32_F32;
2452    break;
2453  case NVPTXISD::Tex2DU32FloatLevel:
2454    Opc = NVPTX::TEX_2D_U32_F32_LEVEL;
2455    break;
2456  case NVPTXISD::Tex2DU32FloatGrad:
2457    Opc = NVPTX::TEX_2D_U32_F32_GRAD;
2458    break;
2459  case NVPTXISD::Tex2DArrayFloatS32:
2460    Opc = NVPTX::TEX_2D_ARRAY_F32_S32;
2461    break;
2462  case NVPTXISD::Tex2DArrayFloatFloat:
2463    Opc = NVPTX::TEX_2D_ARRAY_F32_F32;
2464    break;
2465  case NVPTXISD::Tex2DArrayFloatFloatLevel:
2466    Opc = NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL;
2467    break;
2468  case NVPTXISD::Tex2DArrayFloatFloatGrad:
2469    Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD;
2470    break;
2471  case NVPTXISD::Tex2DArrayS32S32:
2472    Opc = NVPTX::TEX_2D_ARRAY_S32_S32;
2473    break;
2474  case NVPTXISD::Tex2DArrayS32Float:
2475    Opc = NVPTX::TEX_2D_ARRAY_S32_F32;
2476    break;
2477  case NVPTXISD::Tex2DArrayS32FloatLevel:
2478    Opc = NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL;
2479    break;
2480  case NVPTXISD::Tex2DArrayS32FloatGrad:
2481    Opc = NVPTX::TEX_2D_ARRAY_S32_F32_GRAD;
2482    break;
2483  case NVPTXISD::Tex2DArrayU32S32:
2484    Opc = NVPTX::TEX_2D_ARRAY_U32_S32;
2485    break;
2486  case NVPTXISD::Tex2DArrayU32Float:
2487    Opc = NVPTX::TEX_2D_ARRAY_U32_F32;
2488    break;
2489  case NVPTXISD::Tex2DArrayU32FloatLevel:
2490    Opc = NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL;
2491    break;
2492  case NVPTXISD::Tex2DArrayU32FloatGrad:
2493    Opc = NVPTX::TEX_2D_ARRAY_U32_F32_GRAD;
2494    break;
2495  case NVPTXISD::Tex3DFloatS32:
2496    Opc = NVPTX::TEX_3D_F32_S32;
2497    break;
2498  case NVPTXISD::Tex3DFloatFloat:
2499    Opc = NVPTX::TEX_3D_F32_F32;
2500    break;
2501  case NVPTXISD::Tex3DFloatFloatLevel:
2502    Opc = NVPTX::TEX_3D_F32_F32_LEVEL;
2503    break;
2504  case NVPTXISD::Tex3DFloatFloatGrad:
2505    Opc = NVPTX::TEX_3D_F32_F32_GRAD;
2506    break;
2507  case NVPTXISD::Tex3DS32S32:
2508    Opc = NVPTX::TEX_3D_S32_S32;
2509    break;
2510  case NVPTXISD::Tex3DS32Float:
2511    Opc = NVPTX::TEX_3D_S32_F32;
2512    break;
2513  case NVPTXISD::Tex3DS32FloatLevel:
2514    Opc = NVPTX::TEX_3D_S32_F32_LEVEL;
2515    break;
2516  case NVPTXISD::Tex3DS32FloatGrad:
2517    Opc = NVPTX::TEX_3D_S32_F32_GRAD;
2518    break;
2519  case NVPTXISD::Tex3DU32S32:
2520    Opc = NVPTX::TEX_3D_U32_S32;
2521    break;
2522  case NVPTXISD::Tex3DU32Float:
2523    Opc = NVPTX::TEX_3D_U32_F32;
2524    break;
2525  case NVPTXISD::Tex3DU32FloatLevel:
2526    Opc = NVPTX::TEX_3D_U32_F32_LEVEL;
2527    break;
2528  case NVPTXISD::Tex3DU32FloatGrad:
2529    Opc = NVPTX::TEX_3D_U32_F32_GRAD;
2530    break;
2531  case NVPTXISD::TexCubeFloatFloat:
2532    Opc = NVPTX::TEX_CUBE_F32_F32;
2533    break;
2534  case NVPTXISD::TexCubeFloatFloatLevel:
2535    Opc = NVPTX::TEX_CUBE_F32_F32_LEVEL;
2536    break;
2537  case NVPTXISD::TexCubeS32Float:
2538    Opc = NVPTX::TEX_CUBE_S32_F32;
2539    break;
2540  case NVPTXISD::TexCubeS32FloatLevel:
2541    Opc = NVPTX::TEX_CUBE_S32_F32_LEVEL;
2542    break;
2543  case NVPTXISD::TexCubeU32Float:
2544    Opc = NVPTX::TEX_CUBE_U32_F32;
2545    break;
2546  case NVPTXISD::TexCubeU32FloatLevel:
2547    Opc = NVPTX::TEX_CUBE_U32_F32_LEVEL;
2548    break;
2549  case NVPTXISD::TexCubeArrayFloatFloat:
2550    Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32;
2551    break;
2552  case NVPTXISD::TexCubeArrayFloatFloatLevel:
2553    Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL;
2554    break;
2555  case NVPTXISD::TexCubeArrayS32Float:
2556    Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32;
2557    break;
2558  case NVPTXISD::TexCubeArrayS32FloatLevel:
2559    Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL;
2560    break;
2561  case NVPTXISD::TexCubeArrayU32Float:
2562    Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32;
2563    break;
2564  case NVPTXISD::TexCubeArrayU32FloatLevel:
2565    Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL;
2566    break;
2567  case NVPTXISD::Tld4R2DFloatFloat:
2568    Opc = NVPTX::TLD4_R_2D_F32_F32;
2569    break;
2570  case NVPTXISD::Tld4G2DFloatFloat:
2571    Opc = NVPTX::TLD4_G_2D_F32_F32;
2572    break;
2573  case NVPTXISD::Tld4B2DFloatFloat:
2574    Opc = NVPTX::TLD4_B_2D_F32_F32;
2575    break;
2576  case NVPTXISD::Tld4A2DFloatFloat:
2577    Opc = NVPTX::TLD4_A_2D_F32_F32;
2578    break;
2579  case NVPTXISD::Tld4R2DS64Float:
2580    Opc = NVPTX::TLD4_R_2D_S32_F32;
2581    break;
2582  case NVPTXISD::Tld4G2DS64Float:
2583    Opc = NVPTX::TLD4_G_2D_S32_F32;
2584    break;
2585  case NVPTXISD::Tld4B2DS64Float:
2586    Opc = NVPTX::TLD4_B_2D_S32_F32;
2587    break;
2588  case NVPTXISD::Tld4A2DS64Float:
2589    Opc = NVPTX::TLD4_A_2D_S32_F32;
2590    break;
2591  case NVPTXISD::Tld4R2DU64Float:
2592    Opc = NVPTX::TLD4_R_2D_U32_F32;
2593    break;
2594  case NVPTXISD::Tld4G2DU64Float:
2595    Opc = NVPTX::TLD4_G_2D_U32_F32;
2596    break;
2597  case NVPTXISD::Tld4B2DU64Float:
2598    Opc = NVPTX::TLD4_B_2D_U32_F32;
2599    break;
2600  case NVPTXISD::Tld4A2DU64Float:
2601    Opc = NVPTX::TLD4_A_2D_U32_F32;
2602    break;
2603  case NVPTXISD::TexUnified1DFloatS32:
2604    Opc = NVPTX::TEX_UNIFIED_1D_F32_S32;
2605    break;
2606  case NVPTXISD::TexUnified1DFloatFloat:
2607    Opc = NVPTX::TEX_UNIFIED_1D_F32_F32;
2608    break;
2609  case NVPTXISD::TexUnified1DFloatFloatLevel:
2610    Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_LEVEL;
2611    break;
2612  case NVPTXISD::TexUnified1DFloatFloatGrad:
2613    Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_GRAD;
2614    break;
2615  case NVPTXISD::TexUnified1DS32S32:
2616    Opc = NVPTX::TEX_UNIFIED_1D_S32_S32;
2617    break;
2618  case NVPTXISD::TexUnified1DS32Float:
2619    Opc = NVPTX::TEX_UNIFIED_1D_S32_F32;
2620    break;
2621  case NVPTXISD::TexUnified1DS32FloatLevel:
2622    Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_LEVEL;
2623    break;
2624  case NVPTXISD::TexUnified1DS32FloatGrad:
2625    Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_GRAD;
2626    break;
2627  case NVPTXISD::TexUnified1DU32S32:
2628    Opc = NVPTX::TEX_UNIFIED_1D_U32_S32;
2629    break;
2630  case NVPTXISD::TexUnified1DU32Float:
2631    Opc = NVPTX::TEX_UNIFIED_1D_U32_F32;
2632    break;
2633  case NVPTXISD::TexUnified1DU32FloatLevel:
2634    Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_LEVEL;
2635    break;
2636  case NVPTXISD::TexUnified1DU32FloatGrad:
2637    Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_GRAD;
2638    break;
2639  case NVPTXISD::TexUnified1DArrayFloatS32:
2640    Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_S32;
2641    break;
2642  case NVPTXISD::TexUnified1DArrayFloatFloat:
2643    Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32;
2644    break;
2645  case NVPTXISD::TexUnified1DArrayFloatFloatLevel:
2646    Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL;
2647    break;
2648  case NVPTXISD::TexUnified1DArrayFloatFloatGrad:
2649    Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD;
2650    break;
2651  case NVPTXISD::TexUnified1DArrayS32S32:
2652    Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_S32;
2653    break;
2654  case NVPTXISD::TexUnified1DArrayS32Float:
2655    Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32;
2656    break;
2657  case NVPTXISD::TexUnified1DArrayS32FloatLevel:
2658    Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL;
2659    break;
2660  case NVPTXISD::TexUnified1DArrayS32FloatGrad:
2661    Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD;
2662    break;
2663  case NVPTXISD::TexUnified1DArrayU32S32:
2664    Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_S32;
2665    break;
2666  case NVPTXISD::TexUnified1DArrayU32Float:
2667    Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32;
2668    break;
2669  case NVPTXISD::TexUnified1DArrayU32FloatLevel:
2670    Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL;
2671    break;
2672  case NVPTXISD::TexUnified1DArrayU32FloatGrad:
2673    Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD;
2674    break;
2675  case NVPTXISD::TexUnified2DFloatS32:
2676    Opc = NVPTX::TEX_UNIFIED_2D_F32_S32;
2677    break;
2678  case NVPTXISD::TexUnified2DFloatFloat:
2679    Opc = NVPTX::TEX_UNIFIED_2D_F32_F32;
2680    break;
2681  case NVPTXISD::TexUnified2DFloatFloatLevel:
2682    Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_LEVEL;
2683    break;
2684  case NVPTXISD::TexUnified2DFloatFloatGrad:
2685    Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_GRAD;
2686    break;
2687  case NVPTXISD::TexUnified2DS32S32:
2688    Opc = NVPTX::TEX_UNIFIED_2D_S32_S32;
2689    break;
2690  case NVPTXISD::TexUnified2DS32Float:
2691    Opc = NVPTX::TEX_UNIFIED_2D_S32_F32;
2692    break;
2693  case NVPTXISD::TexUnified2DS32FloatLevel:
2694    Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_LEVEL;
2695    break;
2696  case NVPTXISD::TexUnified2DS32FloatGrad:
2697    Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_GRAD;
2698    break;
2699  case NVPTXISD::TexUnified2DU32S32:
2700    Opc = NVPTX::TEX_UNIFIED_2D_U32_S32;
2701    break;
2702  case NVPTXISD::TexUnified2DU32Float:
2703    Opc = NVPTX::TEX_UNIFIED_2D_U32_F32;
2704    break;
2705  case NVPTXISD::TexUnified2DU32FloatLevel:
2706    Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_LEVEL;
2707    break;
2708  case NVPTXISD::TexUnified2DU32FloatGrad:
2709    Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_GRAD;
2710    break;
2711  case NVPTXISD::TexUnified2DArrayFloatS32:
2712    Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_S32;
2713    break;
2714  case NVPTXISD::TexUnified2DArrayFloatFloat:
2715    Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32;
2716    break;
2717  case NVPTXISD::TexUnified2DArrayFloatFloatLevel:
2718    Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL;
2719    break;
2720  case NVPTXISD::TexUnified2DArrayFloatFloatGrad:
2721    Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD;
2722    break;
2723  case NVPTXISD::TexUnified2DArrayS32S32:
2724    Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_S32;
2725    break;
2726  case NVPTXISD::TexUnified2DArrayS32Float:
2727    Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32;
2728    break;
2729  case NVPTXISD::TexUnified2DArrayS32FloatLevel:
2730    Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL;
2731    break;
2732  case NVPTXISD::TexUnified2DArrayS32FloatGrad:
2733    Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD;
2734    break;
2735  case NVPTXISD::TexUnified2DArrayU32S32:
2736    Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_S32;
2737    break;
2738  case NVPTXISD::TexUnified2DArrayU32Float:
2739    Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32;
2740    break;
2741  case NVPTXISD::TexUnified2DArrayU32FloatLevel:
2742    Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL;
2743    break;
2744  case NVPTXISD::TexUnified2DArrayU32FloatGrad:
2745    Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD;
2746    break;
2747  case NVPTXISD::TexUnified3DFloatS32:
2748    Opc = NVPTX::TEX_UNIFIED_3D_F32_S32;
2749    break;
2750  case NVPTXISD::TexUnified3DFloatFloat:
2751    Opc = NVPTX::TEX_UNIFIED_3D_F32_F32;
2752    break;
2753  case NVPTXISD::TexUnified3DFloatFloatLevel:
2754    Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_LEVEL;
2755    break;
2756  case NVPTXISD::TexUnified3DFloatFloatGrad:
2757    Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_GRAD;
2758    break;
2759  case NVPTXISD::TexUnified3DS32S32:
2760    Opc = NVPTX::TEX_UNIFIED_3D_S32_S32;
2761    break;
2762  case NVPTXISD::TexUnified3DS32Float:
2763    Opc = NVPTX::TEX_UNIFIED_3D_S32_F32;
2764    break;
2765  case NVPTXISD::TexUnified3DS32FloatLevel:
2766    Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_LEVEL;
2767    break;
2768  case NVPTXISD::TexUnified3DS32FloatGrad:
2769    Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_GRAD;
2770    break;
2771  case NVPTXISD::TexUnified3DU32S32:
2772    Opc = NVPTX::TEX_UNIFIED_3D_U32_S32;
2773    break;
2774  case NVPTXISD::TexUnified3DU32Float:
2775    Opc = NVPTX::TEX_UNIFIED_3D_U32_F32;
2776    break;
2777  case NVPTXISD::TexUnified3DU32FloatLevel:
2778    Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_LEVEL;
2779    break;
2780  case NVPTXISD::TexUnified3DU32FloatGrad:
2781    Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_GRAD;
2782    break;
2783  case NVPTXISD::TexUnifiedCubeFloatFloat:
2784    Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32;
2785    break;
2786  case NVPTXISD::TexUnifiedCubeFloatFloatLevel:
2787    Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_LEVEL;
2788    break;
2789  case NVPTXISD::TexUnifiedCubeS32Float:
2790    Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32;
2791    break;
2792  case NVPTXISD::TexUnifiedCubeS32FloatLevel:
2793    Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_LEVEL;
2794    break;
2795  case NVPTXISD::TexUnifiedCubeU32Float:
2796    Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32;
2797    break;
2798  case NVPTXISD::TexUnifiedCubeU32FloatLevel:
2799    Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_LEVEL;
2800    break;
2801  case NVPTXISD::TexUnifiedCubeArrayFloatFloat:
2802    Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32;
2803    break;
2804  case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel:
2805    Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL;
2806    break;
2807  case NVPTXISD::TexUnifiedCubeArrayS32Float:
2808    Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32;
2809    break;
2810  case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel:
2811    Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL;
2812    break;
2813  case NVPTXISD::TexUnifiedCubeArrayU32Float:
2814    Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32;
2815    break;
2816  case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
2817    Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL;
2818    break;
2819  case NVPTXISD::Tld4UnifiedR2DFloatFloat:
2820    Opc = NVPTX::TLD4_UNIFIED_R_2D_F32_F32;
2821    break;
2822  case NVPTXISD::Tld4UnifiedG2DFloatFloat:
2823    Opc = NVPTX::TLD4_UNIFIED_G_2D_F32_F32;
2824    break;
2825  case NVPTXISD::Tld4UnifiedB2DFloatFloat:
2826    Opc = NVPTX::TLD4_UNIFIED_B_2D_F32_F32;
2827    break;
2828  case NVPTXISD::Tld4UnifiedA2DFloatFloat:
2829    Opc = NVPTX::TLD4_UNIFIED_A_2D_F32_F32;
2830    break;
2831  case NVPTXISD::Tld4UnifiedR2DS64Float:
2832    Opc = NVPTX::TLD4_UNIFIED_R_2D_S32_F32;
2833    break;
2834  case NVPTXISD::Tld4UnifiedG2DS64Float:
2835    Opc = NVPTX::TLD4_UNIFIED_G_2D_S32_F32;
2836    break;
2837  case NVPTXISD::Tld4UnifiedB2DS64Float:
2838    Opc = NVPTX::TLD4_UNIFIED_B_2D_S32_F32;
2839    break;
2840  case NVPTXISD::Tld4UnifiedA2DS64Float:
2841    Opc = NVPTX::TLD4_UNIFIED_A_2D_S32_F32;
2842    break;
2843  case NVPTXISD::Tld4UnifiedR2DU64Float:
2844    Opc = NVPTX::TLD4_UNIFIED_R_2D_U32_F32;
2845    break;
2846  case NVPTXISD::Tld4UnifiedG2DU64Float:
2847    Opc = NVPTX::TLD4_UNIFIED_G_2D_U32_F32;
2848    break;
2849  case NVPTXISD::Tld4UnifiedB2DU64Float:
2850    Opc = NVPTX::TLD4_UNIFIED_B_2D_U32_F32;
2851    break;
2852  case NVPTXISD::Tld4UnifiedA2DU64Float:
2853    Opc = NVPTX::TLD4_UNIFIED_A_2D_U32_F32;
2854    break;
2855  }
2856
2857  // Copy over operands
2858  SmallVector<SDValue, 8> Ops(N->op_begin() + 1, N->op_end());
2859  Ops.push_back(N->getOperand(0)); // Move chain to the back.
2860
2861  ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops));
2862  return true;
2863}
2864
2865bool NVPTXDAGToDAGISel::trySurfaceIntrinsic(SDNode *N) {
2866  unsigned Opc = 0;
2867  switch (N->getOpcode()) {
2868  default: return false;
2869  case NVPTXISD::Suld1DI8Clamp:
2870    Opc = NVPTX::SULD_1D_I8_CLAMP;
2871    break;
2872  case NVPTXISD::Suld1DI16Clamp:
2873    Opc = NVPTX::SULD_1D_I16_CLAMP;
2874    break;
2875  case NVPTXISD::Suld1DI32Clamp:
2876    Opc = NVPTX::SULD_1D_I32_CLAMP;
2877    break;
2878  case NVPTXISD::Suld1DI64Clamp:
2879    Opc = NVPTX::SULD_1D_I64_CLAMP;
2880    break;
2881  case NVPTXISD::Suld1DV2I8Clamp:
2882    Opc = NVPTX::SULD_1D_V2I8_CLAMP;
2883    break;
2884  case NVPTXISD::Suld1DV2I16Clamp:
2885    Opc = NVPTX::SULD_1D_V2I16_CLAMP;
2886    break;
2887  case NVPTXISD::Suld1DV2I32Clamp:
2888    Opc = NVPTX::SULD_1D_V2I32_CLAMP;
2889    break;
2890  case NVPTXISD::Suld1DV2I64Clamp:
2891    Opc = NVPTX::SULD_1D_V2I64_CLAMP;
2892    break;
2893  case NVPTXISD::Suld1DV4I8Clamp:
2894    Opc = NVPTX::SULD_1D_V4I8_CLAMP;
2895    break;
2896  case NVPTXISD::Suld1DV4I16Clamp:
2897    Opc = NVPTX::SULD_1D_V4I16_CLAMP;
2898    break;
2899  case NVPTXISD::Suld1DV4I32Clamp:
2900    Opc = NVPTX::SULD_1D_V4I32_CLAMP;
2901    break;
2902  case NVPTXISD::Suld1DArrayI8Clamp:
2903    Opc = NVPTX::SULD_1D_ARRAY_I8_CLAMP;
2904    break;
2905  case NVPTXISD::Suld1DArrayI16Clamp:
2906    Opc = NVPTX::SULD_1D_ARRAY_I16_CLAMP;
2907    break;
2908  case NVPTXISD::Suld1DArrayI32Clamp:
2909    Opc = NVPTX::SULD_1D_ARRAY_I32_CLAMP;
2910    break;
2911  case NVPTXISD::Suld1DArrayI64Clamp:
2912    Opc = NVPTX::SULD_1D_ARRAY_I64_CLAMP;
2913    break;
2914  case NVPTXISD::Suld1DArrayV2I8Clamp:
2915    Opc = NVPTX::SULD_1D_ARRAY_V2I8_CLAMP;
2916    break;
2917  case NVPTXISD::Suld1DArrayV2I16Clamp:
2918    Opc = NVPTX::SULD_1D_ARRAY_V2I16_CLAMP;
2919    break;
2920  case NVPTXISD::Suld1DArrayV2I32Clamp:
2921    Opc = NVPTX::SULD_1D_ARRAY_V2I32_CLAMP;
2922    break;
2923  case NVPTXISD::Suld1DArrayV2I64Clamp:
2924    Opc = NVPTX::SULD_1D_ARRAY_V2I64_CLAMP;
2925    break;
2926  case NVPTXISD::Suld1DArrayV4I8Clamp:
2927    Opc = NVPTX::SULD_1D_ARRAY_V4I8_CLAMP;
2928    break;
2929  case NVPTXISD::Suld1DArrayV4I16Clamp:
2930    Opc = NVPTX::SULD_1D_ARRAY_V4I16_CLAMP;
2931    break;
2932  case NVPTXISD::Suld1DArrayV4I32Clamp:
2933    Opc = NVPTX::SULD_1D_ARRAY_V4I32_CLAMP;
2934    break;
2935  case NVPTXISD::Suld2DI8Clamp:
2936    Opc = NVPTX::SULD_2D_I8_CLAMP;
2937    break;
2938  case NVPTXISD::Suld2DI16Clamp:
2939    Opc = NVPTX::SULD_2D_I16_CLAMP;
2940    break;
2941  case NVPTXISD::Suld2DI32Clamp:
2942    Opc = NVPTX::SULD_2D_I32_CLAMP;
2943    break;
2944  case NVPTXISD::Suld2DI64Clamp:
2945    Opc = NVPTX::SULD_2D_I64_CLAMP;
2946    break;
2947  case NVPTXISD::Suld2DV2I8Clamp:
2948    Opc = NVPTX::SULD_2D_V2I8_CLAMP;
2949    break;
2950  case NVPTXISD::Suld2DV2I16Clamp:
2951    Opc = NVPTX::SULD_2D_V2I16_CLAMP;
2952    break;
2953  case NVPTXISD::Suld2DV2I32Clamp:
2954    Opc = NVPTX::SULD_2D_V2I32_CLAMP;
2955    break;
2956  case NVPTXISD::Suld2DV2I64Clamp:
2957    Opc = NVPTX::SULD_2D_V2I64_CLAMP;
2958    break;
2959  case NVPTXISD::Suld2DV4I8Clamp:
2960    Opc = NVPTX::SULD_2D_V4I8_CLAMP;
2961    break;
2962  case NVPTXISD::Suld2DV4I16Clamp:
2963    Opc = NVPTX::SULD_2D_V4I16_CLAMP;
2964    break;
2965  case NVPTXISD::Suld2DV4I32Clamp:
2966    Opc = NVPTX::SULD_2D_V4I32_CLAMP;
2967    break;
2968  case NVPTXISD::Suld2DArrayI8Clamp:
2969    Opc = NVPTX::SULD_2D_ARRAY_I8_CLAMP;
2970    break;
2971  case NVPTXISD::Suld2DArrayI16Clamp:
2972    Opc = NVPTX::SULD_2D_ARRAY_I16_CLAMP;
2973    break;
2974  case NVPTXISD::Suld2DArrayI32Clamp:
2975    Opc = NVPTX::SULD_2D_ARRAY_I32_CLAMP;
2976    break;
2977  case NVPTXISD::Suld2DArrayI64Clamp:
2978    Opc = NVPTX::SULD_2D_ARRAY_I64_CLAMP;
2979    break;
2980  case NVPTXISD::Suld2DArrayV2I8Clamp:
2981    Opc = NVPTX::SULD_2D_ARRAY_V2I8_CLAMP;
2982    break;
2983  case NVPTXISD::Suld2DArrayV2I16Clamp:
2984    Opc = NVPTX::SULD_2D_ARRAY_V2I16_CLAMP;
2985    break;
2986  case NVPTXISD::Suld2DArrayV2I32Clamp:
2987    Opc = NVPTX::SULD_2D_ARRAY_V2I32_CLAMP;
2988    break;
2989  case NVPTXISD::Suld2DArrayV2I64Clamp:
2990    Opc = NVPTX::SULD_2D_ARRAY_V2I64_CLAMP;
2991    break;
2992  case NVPTXISD::Suld2DArrayV4I8Clamp:
2993    Opc = NVPTX::SULD_2D_ARRAY_V4I8_CLAMP;
2994    break;
2995  case NVPTXISD::Suld2DArrayV4I16Clamp:
2996    Opc = NVPTX::SULD_2D_ARRAY_V4I16_CLAMP;
2997    break;
2998  case NVPTXISD::Suld2DArrayV4I32Clamp:
2999    Opc = NVPTX::SULD_2D_ARRAY_V4I32_CLAMP;
3000    break;
3001  case NVPTXISD::Suld3DI8Clamp:
3002    Opc = NVPTX::SULD_3D_I8_CLAMP;
3003    break;
3004  case NVPTXISD::Suld3DI16Clamp:
3005    Opc = NVPTX::SULD_3D_I16_CLAMP;
3006    break;
3007  case NVPTXISD::Suld3DI32Clamp:
3008    Opc = NVPTX::SULD_3D_I32_CLAMP;
3009    break;
3010  case NVPTXISD::Suld3DI64Clamp:
3011    Opc = NVPTX::SULD_3D_I64_CLAMP;
3012    break;
3013  case NVPTXISD::Suld3DV2I8Clamp:
3014    Opc = NVPTX::SULD_3D_V2I8_CLAMP;
3015    break;
3016  case NVPTXISD::Suld3DV2I16Clamp:
3017    Opc = NVPTX::SULD_3D_V2I16_CLAMP;
3018    break;
3019  case NVPTXISD::Suld3DV2I32Clamp:
3020    Opc = NVPTX::SULD_3D_V2I32_CLAMP;
3021    break;
3022  case NVPTXISD::Suld3DV2I64Clamp:
3023    Opc = NVPTX::SULD_3D_V2I64_CLAMP;
3024    break;
3025  case NVPTXISD::Suld3DV4I8Clamp:
3026    Opc = NVPTX::SULD_3D_V4I8_CLAMP;
3027    break;
3028  case NVPTXISD::Suld3DV4I16Clamp:
3029    Opc = NVPTX::SULD_3D_V4I16_CLAMP;
3030    break;
3031  case NVPTXISD::Suld3DV4I32Clamp:
3032    Opc = NVPTX::SULD_3D_V4I32_CLAMP;
3033    break;
3034  case NVPTXISD::Suld1DI8Trap:
3035    Opc = NVPTX::SULD_1D_I8_TRAP;
3036    break;
3037  case NVPTXISD::Suld1DI16Trap:
3038    Opc = NVPTX::SULD_1D_I16_TRAP;
3039    break;
3040  case NVPTXISD::Suld1DI32Trap:
3041    Opc = NVPTX::SULD_1D_I32_TRAP;
3042    break;
3043  case NVPTXISD::Suld1DI64Trap:
3044    Opc = NVPTX::SULD_1D_I64_TRAP;
3045    break;
3046  case NVPTXISD::Suld1DV2I8Trap:
3047    Opc = NVPTX::SULD_1D_V2I8_TRAP;
3048    break;
3049  case NVPTXISD::Suld1DV2I16Trap:
3050    Opc = NVPTX::SULD_1D_V2I16_TRAP;
3051    break;
3052  case NVPTXISD::Suld1DV2I32Trap:
3053    Opc = NVPTX::SULD_1D_V2I32_TRAP;
3054    break;
3055  case NVPTXISD::Suld1DV2I64Trap:
3056    Opc = NVPTX::SULD_1D_V2I64_TRAP;
3057    break;
3058  case NVPTXISD::Suld1DV4I8Trap:
3059    Opc = NVPTX::SULD_1D_V4I8_TRAP;
3060    break;
3061  case NVPTXISD::Suld1DV4I16Trap:
3062    Opc = NVPTX::SULD_1D_V4I16_TRAP;
3063    break;
3064  case NVPTXISD::Suld1DV4I32Trap:
3065    Opc = NVPTX::SULD_1D_V4I32_TRAP;
3066    break;
3067  case NVPTXISD::Suld1DArrayI8Trap:
3068    Opc = NVPTX::SULD_1D_ARRAY_I8_TRAP;
3069    break;
3070  case NVPTXISD::Suld1DArrayI16Trap:
3071    Opc = NVPTX::SULD_1D_ARRAY_I16_TRAP;
3072    break;
3073  case NVPTXISD::Suld1DArrayI32Trap:
3074    Opc = NVPTX::SULD_1D_ARRAY_I32_TRAP;
3075    break;
3076  case NVPTXISD::Suld1DArrayI64Trap:
3077    Opc = NVPTX::SULD_1D_ARRAY_I64_TRAP;
3078    break;
3079  case NVPTXISD::Suld1DArrayV2I8Trap:
3080    Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP;
3081    break;
3082  case NVPTXISD::Suld1DArrayV2I16Trap:
3083    Opc = NVPTX::SULD_1D_ARRAY_V2I16_TRAP;
3084    break;
3085  case NVPTXISD::Suld1DArrayV2I32Trap:
3086    Opc = NVPTX::SULD_1D_ARRAY_V2I32_TRAP;
3087    break;
3088  case NVPTXISD::Suld1DArrayV2I64Trap:
3089    Opc = NVPTX::SULD_1D_ARRAY_V2I64_TRAP;
3090    break;
3091  case NVPTXISD::Suld1DArrayV4I8Trap:
3092    Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP;
3093    break;
3094  case NVPTXISD::Suld1DArrayV4I16Trap:
3095    Opc = NVPTX::SULD_1D_ARRAY_V4I16_TRAP;
3096    break;
3097  case NVPTXISD::Suld1DArrayV4I32Trap:
3098    Opc = NVPTX::SULD_1D_ARRAY_V4I32_TRAP;
3099    break;
3100  case NVPTXISD::Suld2DI8Trap:
3101    Opc = NVPTX::SULD_2D_I8_TRAP;
3102    break;
3103  case NVPTXISD::Suld2DI16Trap:
3104    Opc = NVPTX::SULD_2D_I16_TRAP;
3105    break;
3106  case NVPTXISD::Suld2DI32Trap:
3107    Opc = NVPTX::SULD_2D_I32_TRAP;
3108    break;
3109  case NVPTXISD::Suld2DI64Trap:
3110    Opc = NVPTX::SULD_2D_I64_TRAP;
3111    break;
3112  case NVPTXISD::Suld2DV2I8Trap:
3113    Opc = NVPTX::SULD_2D_V2I8_TRAP;
3114    break;
3115  case NVPTXISD::Suld2DV2I16Trap:
3116    Opc = NVPTX::SULD_2D_V2I16_TRAP;
3117    break;
3118  case NVPTXISD::Suld2DV2I32Trap:
3119    Opc = NVPTX::SULD_2D_V2I32_TRAP;
3120    break;
3121  case NVPTXISD::Suld2DV2I64Trap:
3122    Opc = NVPTX::SULD_2D_V2I64_TRAP;
3123    break;
3124  case NVPTXISD::Suld2DV4I8Trap:
3125    Opc = NVPTX::SULD_2D_V4I8_TRAP;
3126    break;
3127  case NVPTXISD::Suld2DV4I16Trap:
3128    Opc = NVPTX::SULD_2D_V4I16_TRAP;
3129    break;
3130  case NVPTXISD::Suld2DV4I32Trap:
3131    Opc = NVPTX::SULD_2D_V4I32_TRAP;
3132    break;
3133  case NVPTXISD::Suld2DArrayI8Trap:
3134    Opc = NVPTX::SULD_2D_ARRAY_I8_TRAP;
3135    break;
3136  case NVPTXISD::Suld2DArrayI16Trap:
3137    Opc = NVPTX::SULD_2D_ARRAY_I16_TRAP;
3138    break;
3139  case NVPTXISD::Suld2DArrayI32Trap:
3140    Opc = NVPTX::SULD_2D_ARRAY_I32_TRAP;
3141    break;
3142  case NVPTXISD::Suld2DArrayI64Trap:
3143    Opc = NVPTX::SULD_2D_ARRAY_I64_TRAP;
3144    break;
3145  case NVPTXISD::Suld2DArrayV2I8Trap:
3146    Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP;
3147    break;
3148  case NVPTXISD::Suld2DArrayV2I16Trap:
3149    Opc = NVPTX::SULD_2D_ARRAY_V2I16_TRAP;
3150    break;
3151  case NVPTXISD::Suld2DArrayV2I32Trap:
3152    Opc = NVPTX::SULD_2D_ARRAY_V2I32_TRAP;
3153    break;
3154  case NVPTXISD::Suld2DArrayV2I64Trap:
3155    Opc = NVPTX::SULD_2D_ARRAY_V2I64_TRAP;
3156    break;
3157  case NVPTXISD::Suld2DArrayV4I8Trap:
3158    Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP;
3159    break;
3160  case NVPTXISD::Suld2DArrayV4I16Trap:
3161    Opc = NVPTX::SULD_2D_ARRAY_V4I16_TRAP;
3162    break;
3163  case NVPTXISD::Suld2DArrayV4I32Trap:
3164    Opc = NVPTX::SULD_2D_ARRAY_V4I32_TRAP;
3165    break;
3166  case NVPTXISD::Suld3DI8Trap:
3167    Opc = NVPTX::SULD_3D_I8_TRAP;
3168    break;
3169  case NVPTXISD::Suld3DI16Trap:
3170    Opc = NVPTX::SULD_3D_I16_TRAP;
3171    break;
3172  case NVPTXISD::Suld3DI32Trap:
3173    Opc = NVPTX::SULD_3D_I32_TRAP;
3174    break;
3175  case NVPTXISD::Suld3DI64Trap:
3176    Opc = NVPTX::SULD_3D_I64_TRAP;
3177    break;
3178  case NVPTXISD::Suld3DV2I8Trap:
3179    Opc = NVPTX::SULD_3D_V2I8_TRAP;
3180    break;
3181  case NVPTXISD::Suld3DV2I16Trap:
3182    Opc = NVPTX::SULD_3D_V2I16_TRAP;
3183    break;
3184  case NVPTXISD::Suld3DV2I32Trap:
3185    Opc = NVPTX::SULD_3D_V2I32_TRAP;
3186    break;
3187  case NVPTXISD::Suld3DV2I64Trap:
3188    Opc = NVPTX::SULD_3D_V2I64_TRAP;
3189    break;
3190  case NVPTXISD::Suld3DV4I8Trap:
3191    Opc = NVPTX::SULD_3D_V4I8_TRAP;
3192    break;
3193  case NVPTXISD::Suld3DV4I16Trap:
3194    Opc = NVPTX::SULD_3D_V4I16_TRAP;
3195    break;
3196  case NVPTXISD::Suld3DV4I32Trap:
3197    Opc = NVPTX::SULD_3D_V4I32_TRAP;
3198    break;
3199  case NVPTXISD::Suld1DI8Zero:
3200    Opc = NVPTX::SULD_1D_I8_ZERO;
3201    break;
3202  case NVPTXISD::Suld1DI16Zero:
3203    Opc = NVPTX::SULD_1D_I16_ZERO;
3204    break;
3205  case NVPTXISD::Suld1DI32Zero:
3206    Opc = NVPTX::SULD_1D_I32_ZERO;
3207    break;
3208  case NVPTXISD::Suld1DI64Zero:
3209    Opc = NVPTX::SULD_1D_I64_ZERO;
3210    break;
3211  case NVPTXISD::Suld1DV2I8Zero:
3212    Opc = NVPTX::SULD_1D_V2I8_ZERO;
3213    break;
3214  case NVPTXISD::Suld1DV2I16Zero:
3215    Opc = NVPTX::SULD_1D_V2I16_ZERO;
3216    break;
3217  case NVPTXISD::Suld1DV2I32Zero:
3218    Opc = NVPTX::SULD_1D_V2I32_ZERO;
3219    break;
3220  case NVPTXISD::Suld1DV2I64Zero:
3221    Opc = NVPTX::SULD_1D_V2I64_ZERO;
3222    break;
3223  case NVPTXISD::Suld1DV4I8Zero:
3224    Opc = NVPTX::SULD_1D_V4I8_ZERO;
3225    break;
3226  case NVPTXISD::Suld1DV4I16Zero:
3227    Opc = NVPTX::SULD_1D_V4I16_ZERO;
3228    break;
3229  case NVPTXISD::Suld1DV4I32Zero:
3230    Opc = NVPTX::SULD_1D_V4I32_ZERO;
3231    break;
3232  case NVPTXISD::Suld1DArrayI8Zero:
3233    Opc = NVPTX::SULD_1D_ARRAY_I8_ZERO;
3234    break;
3235  case NVPTXISD::Suld1DArrayI16Zero:
3236    Opc = NVPTX::SULD_1D_ARRAY_I16_ZERO;
3237    break;
3238  case NVPTXISD::Suld1DArrayI32Zero:
3239    Opc = NVPTX::SULD_1D_ARRAY_I32_ZERO;
3240    break;
3241  case NVPTXISD::Suld1DArrayI64Zero:
3242    Opc = NVPTX::SULD_1D_ARRAY_I64_ZERO;
3243    break;
3244  case NVPTXISD::Suld1DArrayV2I8Zero:
3245    Opc = NVPTX::SULD_1D_ARRAY_V2I8_ZERO;
3246    break;
3247  case NVPTXISD::Suld1DArrayV2I16Zero:
3248    Opc = NVPTX::SULD_1D_ARRAY_V2I16_ZERO;
3249    break;
3250  case NVPTXISD::Suld1DArrayV2I32Zero:
3251    Opc = NVPTX::SULD_1D_ARRAY_V2I32_ZERO;
3252    break;
3253  case NVPTXISD::Suld1DArrayV2I64Zero:
3254    Opc = NVPTX::SULD_1D_ARRAY_V2I64_ZERO;
3255    break;
3256  case NVPTXISD::Suld1DArrayV4I8Zero:
3257    Opc = NVPTX::SULD_1D_ARRAY_V4I8_ZERO;
3258    break;
3259  case NVPTXISD::Suld1DArrayV4I16Zero:
3260    Opc = NVPTX::SULD_1D_ARRAY_V4I16_ZERO;
3261    break;
3262  case NVPTXISD::Suld1DArrayV4I32Zero:
3263    Opc = NVPTX::SULD_1D_ARRAY_V4I32_ZERO;
3264    break;
3265  case NVPTXISD::Suld2DI8Zero:
3266    Opc = NVPTX::SULD_2D_I8_ZERO;
3267    break;
3268  case NVPTXISD::Suld2DI16Zero:
3269    Opc = NVPTX::SULD_2D_I16_ZERO;
3270    break;
3271  case NVPTXISD::Suld2DI32Zero:
3272    Opc = NVPTX::SULD_2D_I32_ZERO;
3273    break;
3274  case NVPTXISD::Suld2DI64Zero:
3275    Opc = NVPTX::SULD_2D_I64_ZERO;
3276    break;
3277  case NVPTXISD::Suld2DV2I8Zero:
3278    Opc = NVPTX::SULD_2D_V2I8_ZERO;
3279    break;
3280  case NVPTXISD::Suld2DV2I16Zero:
3281    Opc = NVPTX::SULD_2D_V2I16_ZERO;
3282    break;
3283  case NVPTXISD::Suld2DV2I32Zero:
3284    Opc = NVPTX::SULD_2D_V2I32_ZERO;
3285    break;
3286  case NVPTXISD::Suld2DV2I64Zero:
3287    Opc = NVPTX::SULD_2D_V2I64_ZERO;
3288    break;
3289  case NVPTXISD::Suld2DV4I8Zero:
3290    Opc = NVPTX::SULD_2D_V4I8_ZERO;
3291    break;
3292  case NVPTXISD::Suld2DV4I16Zero:
3293    Opc = NVPTX::SULD_2D_V4I16_ZERO;
3294    break;
3295  case NVPTXISD::Suld2DV4I32Zero:
3296    Opc = NVPTX::SULD_2D_V4I32_ZERO;
3297    break;
3298  case NVPTXISD::Suld2DArrayI8Zero:
3299    Opc = NVPTX::SULD_2D_ARRAY_I8_ZERO;
3300    break;
3301  case NVPTXISD::Suld2DArrayI16Zero:
3302    Opc = NVPTX::SULD_2D_ARRAY_I16_ZERO;
3303    break;
3304  case NVPTXISD::Suld2DArrayI32Zero:
3305    Opc = NVPTX::SULD_2D_ARRAY_I32_ZERO;
3306    break;
3307  case NVPTXISD::Suld2DArrayI64Zero:
3308    Opc = NVPTX::SULD_2D_ARRAY_I64_ZERO;
3309    break;
3310  case NVPTXISD::Suld2DArrayV2I8Zero:
3311    Opc = NVPTX::SULD_2D_ARRAY_V2I8_ZERO;
3312    break;
3313  case NVPTXISD::Suld2DArrayV2I16Zero:
3314    Opc = NVPTX::SULD_2D_ARRAY_V2I16_ZERO;
3315    break;
3316  case NVPTXISD::Suld2DArrayV2I32Zero:
3317    Opc = NVPTX::SULD_2D_ARRAY_V2I32_ZERO;
3318    break;
3319  case NVPTXISD::Suld2DArrayV2I64Zero:
3320    Opc = NVPTX::SULD_2D_ARRAY_V2I64_ZERO;
3321    break;
3322  case NVPTXISD::Suld2DArrayV4I8Zero:
3323    Opc = NVPTX::SULD_2D_ARRAY_V4I8_ZERO;
3324    break;
3325  case NVPTXISD::Suld2DArrayV4I16Zero:
3326    Opc = NVPTX::SULD_2D_ARRAY_V4I16_ZERO;
3327    break;
3328  case NVPTXISD::Suld2DArrayV4I32Zero:
3329    Opc = NVPTX::SULD_2D_ARRAY_V4I32_ZERO;
3330    break;
3331  case NVPTXISD::Suld3DI8Zero:
3332    Opc = NVPTX::SULD_3D_I8_ZERO;
3333    break;
3334  case NVPTXISD::Suld3DI16Zero:
3335    Opc = NVPTX::SULD_3D_I16_ZERO;
3336    break;
3337  case NVPTXISD::Suld3DI32Zero:
3338    Opc = NVPTX::SULD_3D_I32_ZERO;
3339    break;
3340  case NVPTXISD::Suld3DI64Zero:
3341    Opc = NVPTX::SULD_3D_I64_ZERO;
3342    break;
3343  case NVPTXISD::Suld3DV2I8Zero:
3344    Opc = NVPTX::SULD_3D_V2I8_ZERO;
3345    break;
3346  case NVPTXISD::Suld3DV2I16Zero:
3347    Opc = NVPTX::SULD_3D_V2I16_ZERO;
3348    break;
3349  case NVPTXISD::Suld3DV2I32Zero:
3350    Opc = NVPTX::SULD_3D_V2I32_ZERO;
3351    break;
3352  case NVPTXISD::Suld3DV2I64Zero:
3353    Opc = NVPTX::SULD_3D_V2I64_ZERO;
3354    break;
3355  case NVPTXISD::Suld3DV4I8Zero:
3356    Opc = NVPTX::SULD_3D_V4I8_ZERO;
3357    break;
3358  case NVPTXISD::Suld3DV4I16Zero:
3359    Opc = NVPTX::SULD_3D_V4I16_ZERO;
3360    break;
3361  case NVPTXISD::Suld3DV4I32Zero:
3362    Opc = NVPTX::SULD_3D_V4I32_ZERO;
3363    break;
3364  }
3365
3366  // Copy over operands
3367  SmallVector<SDValue, 8> Ops(N->op_begin() + 1, N->op_end());
3368  Ops.push_back(N->getOperand(0)); // Move chain to the back.
3369
3370  ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops));
3371  return true;
3372}
3373
3374
3375/// SelectBFE - Look for instruction sequences that can be made more efficient
3376/// by using the 'bfe' (bit-field extract) PTX instruction
3377bool NVPTXDAGToDAGISel::tryBFE(SDNode *N) {
3378  SDLoc DL(N);
3379  SDValue LHS = N->getOperand(0);
3380  SDValue RHS = N->getOperand(1);
3381  SDValue Len;
3382  SDValue Start;
3383  SDValue Val;
3384  bool IsSigned = false;
3385
3386  if (N->getOpcode() == ISD::AND) {
3387    // Canonicalize the operands
3388    // We want 'and %val, %mask'
3389    if (isa<ConstantSDNode>(LHS) && !isa<ConstantSDNode>(RHS)) {
3390      std::swap(LHS, RHS);
3391    }
3392
3393    ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(RHS);
3394    if (!Mask) {
3395      // We need a constant mask on the RHS of the AND
3396      return false;
3397    }
3398
3399    // Extract the mask bits
3400    uint64_t MaskVal = Mask->getZExtValue();
3401    if (!isMask_64(MaskVal)) {
3402      // We *could* handle shifted masks here, but doing so would require an
3403      // 'and' operation to fix up the low-order bits so we would trade
3404      // shr+and for bfe+and, which has the same throughput
3405      return false;
3406    }
3407
3408    // How many bits are in our mask?
3409    uint64_t NumBits = countTrailingOnes(MaskVal);
3410    Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32);
3411
3412    if (LHS.getOpcode() == ISD::SRL || LHS.getOpcode() == ISD::SRA) {
3413      // We have a 'srl/and' pair, extract the effective start bit and length
3414      Val = LHS.getNode()->getOperand(0);
3415      Start = LHS.getNode()->getOperand(1);
3416      ConstantSDNode *StartConst = dyn_cast<ConstantSDNode>(Start);
3417      if (StartConst) {
3418        uint64_t StartVal = StartConst->getZExtValue();
3419        // How many "good" bits do we have left?  "good" is defined here as bits
3420        // that exist in the original value, not shifted in.
3421        uint64_t GoodBits = Start.getValueSizeInBits() - StartVal;
3422        if (NumBits > GoodBits) {
3423          // Do not handle the case where bits have been shifted in. In theory
3424          // we could handle this, but the cost is likely higher than just
3425          // emitting the srl/and pair.
3426          return false;
3427        }
3428        Start = CurDAG->getTargetConstant(StartVal, DL, MVT::i32);
3429      } else {
3430        // Do not handle the case where the shift amount (can be zero if no srl
3431        // was found) is not constant. We could handle this case, but it would
3432        // require run-time logic that would be more expensive than just
3433        // emitting the srl/and pair.
3434        return false;
3435      }
3436    } else {
3437      // Do not handle the case where the LHS of the and is not a shift. While
3438      // it would be trivial to handle this case, it would just transform
3439      // 'and' -> 'bfe', but 'and' has higher-throughput.
3440      return false;
3441    }
3442  } else if (N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) {
3443    if (LHS->getOpcode() == ISD::AND) {
3444      ConstantSDNode *ShiftCnst = dyn_cast<ConstantSDNode>(RHS);
3445      if (!ShiftCnst) {
3446        // Shift amount must be constant
3447        return false;
3448      }
3449
3450      uint64_t ShiftAmt = ShiftCnst->getZExtValue();
3451
3452      SDValue AndLHS = LHS->getOperand(0);
3453      SDValue AndRHS = LHS->getOperand(1);
3454
3455      // Canonicalize the AND to have the mask on the RHS
3456      if (isa<ConstantSDNode>(AndLHS)) {
3457        std::swap(AndLHS, AndRHS);
3458      }
3459
3460      ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(AndRHS);
3461      if (!MaskCnst) {
3462        // Mask must be constant
3463        return false;
3464      }
3465
3466      uint64_t MaskVal = MaskCnst->getZExtValue();
3467      uint64_t NumZeros;
3468      uint64_t NumBits;
3469      if (isMask_64(MaskVal)) {
3470        NumZeros = 0;
3471        // The number of bits in the result bitfield will be the number of
3472        // trailing ones (the AND) minus the number of bits we shift off
3473        NumBits = countTrailingOnes(MaskVal) - ShiftAmt;
3474      } else if (isShiftedMask_64(MaskVal)) {
3475        NumZeros = countTrailingZeros(MaskVal);
3476        unsigned NumOnes = countTrailingOnes(MaskVal >> NumZeros);
3477        // The number of bits in the result bitfield will be the number of
3478        // trailing zeros plus the number of set bits in the mask minus the
3479        // number of bits we shift off
3480        NumBits = NumZeros + NumOnes - ShiftAmt;
3481      } else {
3482        // This is not a mask we can handle
3483        return false;
3484      }
3485
3486      if (ShiftAmt < NumZeros) {
3487        // Handling this case would require extra logic that would make this
3488        // transformation non-profitable
3489        return false;
3490      }
3491
3492      Val = AndLHS;
3493      Start = CurDAG->getTargetConstant(ShiftAmt, DL, MVT::i32);
3494      Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32);
3495    } else if (LHS->getOpcode() == ISD::SHL) {
3496      // Here, we have a pattern like:
3497      //
3498      // (sra (shl val, NN), MM)
3499      // or
3500      // (srl (shl val, NN), MM)
3501      //
3502      // If MM >= NN, we can efficiently optimize this with bfe
3503      Val = LHS->getOperand(0);
3504
3505      SDValue ShlRHS = LHS->getOperand(1);
3506      ConstantSDNode *ShlCnst = dyn_cast<ConstantSDNode>(ShlRHS);
3507      if (!ShlCnst) {
3508        // Shift amount must be constant
3509        return false;
3510      }
3511      uint64_t InnerShiftAmt = ShlCnst->getZExtValue();
3512
3513      SDValue ShrRHS = RHS;
3514      ConstantSDNode *ShrCnst = dyn_cast<ConstantSDNode>(ShrRHS);
3515      if (!ShrCnst) {
3516        // Shift amount must be constant
3517        return false;
3518      }
3519      uint64_t OuterShiftAmt = ShrCnst->getZExtValue();
3520
3521      // To avoid extra codegen and be profitable, we need Outer >= Inner
3522      if (OuterShiftAmt < InnerShiftAmt) {
3523        return false;
3524      }
3525
3526      // If the outer shift is more than the type size, we have no bitfield to
3527      // extract (since we also check that the inner shift is <= the outer shift
3528      // then this also implies that the inner shift is < the type size)
3529      if (OuterShiftAmt >= Val.getValueSizeInBits()) {
3530        return false;
3531      }
3532
3533      Start = CurDAG->getTargetConstant(OuterShiftAmt - InnerShiftAmt, DL,
3534                                        MVT::i32);
3535      Len = CurDAG->getTargetConstant(Val.getValueSizeInBits() - OuterShiftAmt,
3536                                      DL, MVT::i32);
3537
3538      if (N->getOpcode() == ISD::SRA) {
3539        // If we have a arithmetic right shift, we need to use the signed bfe
3540        // variant
3541        IsSigned = true;
3542      }
3543    } else {
3544      // No can do...
3545      return false;
3546    }
3547  } else {
3548    // No can do...
3549    return false;
3550  }
3551
3552
3553  unsigned Opc;
3554  // For the BFE operations we form here from "and" and "srl", always use the
3555  // unsigned variants.
3556  if (Val.getValueType() == MVT::i32) {
3557    if (IsSigned) {
3558      Opc = NVPTX::BFE_S32rii;
3559    } else {
3560      Opc = NVPTX::BFE_U32rii;
3561    }
3562  } else if (Val.getValueType() == MVT::i64) {
3563    if (IsSigned) {
3564      Opc = NVPTX::BFE_S64rii;
3565    } else {
3566      Opc = NVPTX::BFE_U64rii;
3567    }
3568  } else {
3569    // We cannot handle this type
3570    return false;
3571  }
3572
3573  SDValue Ops[] = {
3574    Val, Start, Len
3575  };
3576
3577  ReplaceNode(N, CurDAG->getMachineNode(Opc, DL, N->getVTList(), Ops));
3578  return true;
3579}
3580
3581// SelectDirectAddr - Match a direct address for DAG.
3582// A direct address could be a globaladdress or externalsymbol.
3583bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
3584  // Return true if TGA or ES.
3585  if (N.getOpcode() == ISD::TargetGlobalAddress ||
3586      N.getOpcode() == ISD::TargetExternalSymbol) {
3587    Address = N;
3588    return true;
3589  }
3590  if (N.getOpcode() == NVPTXISD::Wrapper) {
3591    Address = N.getOperand(0);
3592    return true;
3593  }
3594  // addrspacecast(MoveParam(arg_symbol) to addrspace(PARAM)) -> arg_symbol
3595  if (AddrSpaceCastSDNode *CastN = dyn_cast<AddrSpaceCastSDNode>(N)) {
3596    if (CastN->getSrcAddressSpace() == ADDRESS_SPACE_GENERIC &&
3597        CastN->getDestAddressSpace() == ADDRESS_SPACE_PARAM &&
3598        CastN->getOperand(0).getOpcode() == NVPTXISD::MoveParam)
3599      return SelectDirectAddr(CastN->getOperand(0).getOperand(0), Address);
3600  }
3601  return false;
3602}
3603
3604// symbol+offset
3605bool NVPTXDAGToDAGISel::SelectADDRsi_imp(
3606    SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
3607  if (Addr.getOpcode() == ISD::ADD) {
3608    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
3609      SDValue base = Addr.getOperand(0);
3610      if (SelectDirectAddr(base, Base)) {
3611        Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode),
3612                                           mvt);
3613        return true;
3614      }
3615    }
3616  }
3617  return false;
3618}
3619
3620// symbol+offset
3621bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr,
3622                                     SDValue &Base, SDValue &Offset) {
3623  return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i32);
3624}
3625
3626// symbol+offset
3627bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr,
3628                                       SDValue &Base, SDValue &Offset) {
3629  return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i64);
3630}
3631
3632// register+offset
3633bool NVPTXDAGToDAGISel::SelectADDRri_imp(
3634    SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
3635  if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
3636    Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
3637    Offset = CurDAG->getTargetConstant(0, SDLoc(OpNode), mvt);
3638    return true;
3639  }
3640  if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
3641      Addr.getOpcode() == ISD::TargetGlobalAddress)
3642    return false; // direct calls.
3643
3644  if (Addr.getOpcode() == ISD::ADD) {
3645    if (SelectDirectAddr(Addr.getOperand(0), Addr)) {
3646      return false;
3647    }
3648    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
3649      if (FrameIndexSDNode *FIN =
3650              dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
3651        // Constant offset from frame ref.
3652        Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
3653      else
3654        Base = Addr.getOperand(0);
3655      Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode),
3656                                         mvt);
3657      return true;
3658    }
3659  }
3660  return false;
3661}
3662
3663// register+offset
3664bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr,
3665                                     SDValue &Base, SDValue &Offset) {
3666  return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32);
3667}
3668
3669// register+offset
3670bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr,
3671                                       SDValue &Base, SDValue &Offset) {
3672  return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64);
3673}
3674
3675bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
3676                                                 unsigned int spN) const {
3677  const Value *Src = nullptr;
3678  if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) {
3679    if (spN == 0 && mN->getMemOperand()->getPseudoValue())
3680      return true;
3681    Src = mN->getMemOperand()->getValue();
3682  }
3683  if (!Src)
3684    return false;
3685  if (auto *PT = dyn_cast<PointerType>(Src->getType()))
3686    return (PT->getAddressSpace() == spN);
3687  return false;
3688}
3689
3690/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
3691/// inline asm expressions.
3692bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(
3693    const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
3694  SDValue Op0, Op1;
3695  switch (ConstraintID) {
3696  default:
3697    return true;
3698  case InlineAsm::Constraint_m: // memory
3699    if (SelectDirectAddr(Op, Op0)) {
3700      OutOps.push_back(Op0);
3701      OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32));
3702      return false;
3703    }
3704    if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) {
3705      OutOps.push_back(Op0);
3706      OutOps.push_back(Op1);
3707      return false;
3708    }
3709    break;
3710  }
3711  return true;
3712}
3713
3714/// GetConvertOpcode - Returns the CVT_ instruction opcode that implements a
3715/// conversion from \p SrcTy to \p DestTy.
3716unsigned NVPTXDAGToDAGISel::GetConvertOpcode(MVT DestTy, MVT SrcTy,
3717                                             bool IsSigned) {
3718  switch (SrcTy.SimpleTy) {
3719  default:
3720    llvm_unreachable("Unhandled source type");
3721  case MVT::i8:
3722    switch (DestTy.SimpleTy) {
3723    default:
3724      llvm_unreachable("Unhandled dest type");
3725    case MVT::i16:
3726      return IsSigned ? NVPTX::CVT_s16_s8 : NVPTX::CVT_u16_u8;
3727    case MVT::i32:
3728      return IsSigned ? NVPTX::CVT_s32_s8 : NVPTX::CVT_u32_u8;
3729    case MVT::i64:
3730      return IsSigned ? NVPTX::CVT_s64_s8 : NVPTX::CVT_u64_u8;
3731    }
3732  case MVT::i16:
3733    switch (DestTy.SimpleTy) {
3734    default:
3735      llvm_unreachable("Unhandled dest type");
3736    case MVT::i8:
3737      return IsSigned ? NVPTX::CVT_s8_s16 : NVPTX::CVT_u8_u16;
3738    case MVT::i32:
3739      return IsSigned ? NVPTX::CVT_s32_s16 : NVPTX::CVT_u32_u16;
3740    case MVT::i64:
3741      return IsSigned ? NVPTX::CVT_s64_s16 : NVPTX::CVT_u64_u16;
3742    }
3743  case MVT::i32:
3744    switch (DestTy.SimpleTy) {
3745    default:
3746      llvm_unreachable("Unhandled dest type");
3747    case MVT::i8:
3748      return IsSigned ? NVPTX::CVT_s8_s32 : NVPTX::CVT_u8_u32;
3749    case MVT::i16:
3750      return IsSigned ? NVPTX::CVT_s16_s32 : NVPTX::CVT_u16_u32;
3751    case MVT::i64:
3752      return IsSigned ? NVPTX::CVT_s64_s32 : NVPTX::CVT_u64_u32;
3753    }
3754  case MVT::i64:
3755    switch (DestTy.SimpleTy) {
3756    default:
3757      llvm_unreachable("Unhandled dest type");
3758    case MVT::i8:
3759      return IsSigned ? NVPTX::CVT_s8_s64 : NVPTX::CVT_u8_u64;
3760    case MVT::i16:
3761      return IsSigned ? NVPTX::CVT_s16_s64 : NVPTX::CVT_u16_u64;
3762    case MVT::i32:
3763      return IsSigned ? NVPTX::CVT_s32_s64 : NVPTX::CVT_u32_u64;
3764    }
3765  }
3766}
3767