NVPTXISelLowering.h revision 360784
1//===-- NVPTXISelLowering.h - NVPTX DAG Lowering Interface ------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that NVPTX uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXISELLOWERING_H
15#define LLVM_LIB_TARGET_NVPTX_NVPTXISELLOWERING_H
16
17#include "NVPTX.h"
18#include "llvm/CodeGen/SelectionDAG.h"
19#include "llvm/CodeGen/TargetLowering.h"
20
21namespace llvm {
22namespace NVPTXISD {
23enum NodeType : unsigned {
24  // Start the numbering from where ISD NodeType finishes.
25  FIRST_NUMBER = ISD::BUILTIN_OP_END,
26  Wrapper,
27  CALL,
28  RET_FLAG,
29  LOAD_PARAM,
30  DeclareParam,
31  DeclareScalarParam,
32  DeclareRetParam,
33  DeclareRet,
34  DeclareScalarRet,
35  PrintCall,
36  PrintConvergentCall,
37  PrintCallUni,
38  PrintConvergentCallUni,
39  CallArgBegin,
40  CallArg,
41  LastCallArg,
42  CallArgEnd,
43  CallVoid,
44  CallVal,
45  CallSymbol,
46  Prototype,
47  MoveParam,
48  PseudoUseParam,
49  RETURN,
50  CallSeqBegin,
51  CallSeqEnd,
52  CallPrototype,
53  ProxyReg,
54  FUN_SHFL_CLAMP,
55  FUN_SHFR_CLAMP,
56  MUL_WIDE_SIGNED,
57  MUL_WIDE_UNSIGNED,
58  IMAD,
59  SETP_F16X2,
60  Dummy,
61
62  LoadV2 = ISD::FIRST_TARGET_MEMORY_OPCODE,
63  LoadV4,
64  LDGV2, // LDG.v2
65  LDGV4, // LDG.v4
66  LDUV2, // LDU.v2
67  LDUV4, // LDU.v4
68  StoreV2,
69  StoreV4,
70  LoadParam,
71  LoadParamV2,
72  LoadParamV4,
73  StoreParam,
74  StoreParamV2,
75  StoreParamV4,
76  StoreParamS32, // to sext and store a <32bit value, not used currently
77  StoreParamU32, // to zext and store a <32bit value, not used currently
78  StoreRetval,
79  StoreRetvalV2,
80  StoreRetvalV4,
81
82  // Texture intrinsics
83  Tex1DFloatS32,
84  Tex1DFloatFloat,
85  Tex1DFloatFloatLevel,
86  Tex1DFloatFloatGrad,
87  Tex1DS32S32,
88  Tex1DS32Float,
89  Tex1DS32FloatLevel,
90  Tex1DS32FloatGrad,
91  Tex1DU32S32,
92  Tex1DU32Float,
93  Tex1DU32FloatLevel,
94  Tex1DU32FloatGrad,
95  Tex1DArrayFloatS32,
96  Tex1DArrayFloatFloat,
97  Tex1DArrayFloatFloatLevel,
98  Tex1DArrayFloatFloatGrad,
99  Tex1DArrayS32S32,
100  Tex1DArrayS32Float,
101  Tex1DArrayS32FloatLevel,
102  Tex1DArrayS32FloatGrad,
103  Tex1DArrayU32S32,
104  Tex1DArrayU32Float,
105  Tex1DArrayU32FloatLevel,
106  Tex1DArrayU32FloatGrad,
107  Tex2DFloatS32,
108  Tex2DFloatFloat,
109  Tex2DFloatFloatLevel,
110  Tex2DFloatFloatGrad,
111  Tex2DS32S32,
112  Tex2DS32Float,
113  Tex2DS32FloatLevel,
114  Tex2DS32FloatGrad,
115  Tex2DU32S32,
116  Tex2DU32Float,
117  Tex2DU32FloatLevel,
118  Tex2DU32FloatGrad,
119  Tex2DArrayFloatS32,
120  Tex2DArrayFloatFloat,
121  Tex2DArrayFloatFloatLevel,
122  Tex2DArrayFloatFloatGrad,
123  Tex2DArrayS32S32,
124  Tex2DArrayS32Float,
125  Tex2DArrayS32FloatLevel,
126  Tex2DArrayS32FloatGrad,
127  Tex2DArrayU32S32,
128  Tex2DArrayU32Float,
129  Tex2DArrayU32FloatLevel,
130  Tex2DArrayU32FloatGrad,
131  Tex3DFloatS32,
132  Tex3DFloatFloat,
133  Tex3DFloatFloatLevel,
134  Tex3DFloatFloatGrad,
135  Tex3DS32S32,
136  Tex3DS32Float,
137  Tex3DS32FloatLevel,
138  Tex3DS32FloatGrad,
139  Tex3DU32S32,
140  Tex3DU32Float,
141  Tex3DU32FloatLevel,
142  Tex3DU32FloatGrad,
143  TexCubeFloatFloat,
144  TexCubeFloatFloatLevel,
145  TexCubeS32Float,
146  TexCubeS32FloatLevel,
147  TexCubeU32Float,
148  TexCubeU32FloatLevel,
149  TexCubeArrayFloatFloat,
150  TexCubeArrayFloatFloatLevel,
151  TexCubeArrayS32Float,
152  TexCubeArrayS32FloatLevel,
153  TexCubeArrayU32Float,
154  TexCubeArrayU32FloatLevel,
155  Tld4R2DFloatFloat,
156  Tld4G2DFloatFloat,
157  Tld4B2DFloatFloat,
158  Tld4A2DFloatFloat,
159  Tld4R2DS64Float,
160  Tld4G2DS64Float,
161  Tld4B2DS64Float,
162  Tld4A2DS64Float,
163  Tld4R2DU64Float,
164  Tld4G2DU64Float,
165  Tld4B2DU64Float,
166  Tld4A2DU64Float,
167  TexUnified1DFloatS32,
168  TexUnified1DFloatFloat,
169  TexUnified1DFloatFloatLevel,
170  TexUnified1DFloatFloatGrad,
171  TexUnified1DS32S32,
172  TexUnified1DS32Float,
173  TexUnified1DS32FloatLevel,
174  TexUnified1DS32FloatGrad,
175  TexUnified1DU32S32,
176  TexUnified1DU32Float,
177  TexUnified1DU32FloatLevel,
178  TexUnified1DU32FloatGrad,
179  TexUnified1DArrayFloatS32,
180  TexUnified1DArrayFloatFloat,
181  TexUnified1DArrayFloatFloatLevel,
182  TexUnified1DArrayFloatFloatGrad,
183  TexUnified1DArrayS32S32,
184  TexUnified1DArrayS32Float,
185  TexUnified1DArrayS32FloatLevel,
186  TexUnified1DArrayS32FloatGrad,
187  TexUnified1DArrayU32S32,
188  TexUnified1DArrayU32Float,
189  TexUnified1DArrayU32FloatLevel,
190  TexUnified1DArrayU32FloatGrad,
191  TexUnified2DFloatS32,
192  TexUnified2DFloatFloat,
193  TexUnified2DFloatFloatLevel,
194  TexUnified2DFloatFloatGrad,
195  TexUnified2DS32S32,
196  TexUnified2DS32Float,
197  TexUnified2DS32FloatLevel,
198  TexUnified2DS32FloatGrad,
199  TexUnified2DU32S32,
200  TexUnified2DU32Float,
201  TexUnified2DU32FloatLevel,
202  TexUnified2DU32FloatGrad,
203  TexUnified2DArrayFloatS32,
204  TexUnified2DArrayFloatFloat,
205  TexUnified2DArrayFloatFloatLevel,
206  TexUnified2DArrayFloatFloatGrad,
207  TexUnified2DArrayS32S32,
208  TexUnified2DArrayS32Float,
209  TexUnified2DArrayS32FloatLevel,
210  TexUnified2DArrayS32FloatGrad,
211  TexUnified2DArrayU32S32,
212  TexUnified2DArrayU32Float,
213  TexUnified2DArrayU32FloatLevel,
214  TexUnified2DArrayU32FloatGrad,
215  TexUnified3DFloatS32,
216  TexUnified3DFloatFloat,
217  TexUnified3DFloatFloatLevel,
218  TexUnified3DFloatFloatGrad,
219  TexUnified3DS32S32,
220  TexUnified3DS32Float,
221  TexUnified3DS32FloatLevel,
222  TexUnified3DS32FloatGrad,
223  TexUnified3DU32S32,
224  TexUnified3DU32Float,
225  TexUnified3DU32FloatLevel,
226  TexUnified3DU32FloatGrad,
227  TexUnifiedCubeFloatFloat,
228  TexUnifiedCubeFloatFloatLevel,
229  TexUnifiedCubeS32Float,
230  TexUnifiedCubeS32FloatLevel,
231  TexUnifiedCubeU32Float,
232  TexUnifiedCubeU32FloatLevel,
233  TexUnifiedCubeArrayFloatFloat,
234  TexUnifiedCubeArrayFloatFloatLevel,
235  TexUnifiedCubeArrayS32Float,
236  TexUnifiedCubeArrayS32FloatLevel,
237  TexUnifiedCubeArrayU32Float,
238  TexUnifiedCubeArrayU32FloatLevel,
239  Tld4UnifiedR2DFloatFloat,
240  Tld4UnifiedG2DFloatFloat,
241  Tld4UnifiedB2DFloatFloat,
242  Tld4UnifiedA2DFloatFloat,
243  Tld4UnifiedR2DS64Float,
244  Tld4UnifiedG2DS64Float,
245  Tld4UnifiedB2DS64Float,
246  Tld4UnifiedA2DS64Float,
247  Tld4UnifiedR2DU64Float,
248  Tld4UnifiedG2DU64Float,
249  Tld4UnifiedB2DU64Float,
250  Tld4UnifiedA2DU64Float,
251
252  // Surface intrinsics
253  Suld1DI8Clamp,
254  Suld1DI16Clamp,
255  Suld1DI32Clamp,
256  Suld1DI64Clamp,
257  Suld1DV2I8Clamp,
258  Suld1DV2I16Clamp,
259  Suld1DV2I32Clamp,
260  Suld1DV2I64Clamp,
261  Suld1DV4I8Clamp,
262  Suld1DV4I16Clamp,
263  Suld1DV4I32Clamp,
264
265  Suld1DArrayI8Clamp,
266  Suld1DArrayI16Clamp,
267  Suld1DArrayI32Clamp,
268  Suld1DArrayI64Clamp,
269  Suld1DArrayV2I8Clamp,
270  Suld1DArrayV2I16Clamp,
271  Suld1DArrayV2I32Clamp,
272  Suld1DArrayV2I64Clamp,
273  Suld1DArrayV4I8Clamp,
274  Suld1DArrayV4I16Clamp,
275  Suld1DArrayV4I32Clamp,
276
277  Suld2DI8Clamp,
278  Suld2DI16Clamp,
279  Suld2DI32Clamp,
280  Suld2DI64Clamp,
281  Suld2DV2I8Clamp,
282  Suld2DV2I16Clamp,
283  Suld2DV2I32Clamp,
284  Suld2DV2I64Clamp,
285  Suld2DV4I8Clamp,
286  Suld2DV4I16Clamp,
287  Suld2DV4I32Clamp,
288
289  Suld2DArrayI8Clamp,
290  Suld2DArrayI16Clamp,
291  Suld2DArrayI32Clamp,
292  Suld2DArrayI64Clamp,
293  Suld2DArrayV2I8Clamp,
294  Suld2DArrayV2I16Clamp,
295  Suld2DArrayV2I32Clamp,
296  Suld2DArrayV2I64Clamp,
297  Suld2DArrayV4I8Clamp,
298  Suld2DArrayV4I16Clamp,
299  Suld2DArrayV4I32Clamp,
300
301  Suld3DI8Clamp,
302  Suld3DI16Clamp,
303  Suld3DI32Clamp,
304  Suld3DI64Clamp,
305  Suld3DV2I8Clamp,
306  Suld3DV2I16Clamp,
307  Suld3DV2I32Clamp,
308  Suld3DV2I64Clamp,
309  Suld3DV4I8Clamp,
310  Suld3DV4I16Clamp,
311  Suld3DV4I32Clamp,
312
313  Suld1DI8Trap,
314  Suld1DI16Trap,
315  Suld1DI32Trap,
316  Suld1DI64Trap,
317  Suld1DV2I8Trap,
318  Suld1DV2I16Trap,
319  Suld1DV2I32Trap,
320  Suld1DV2I64Trap,
321  Suld1DV4I8Trap,
322  Suld1DV4I16Trap,
323  Suld1DV4I32Trap,
324
325  Suld1DArrayI8Trap,
326  Suld1DArrayI16Trap,
327  Suld1DArrayI32Trap,
328  Suld1DArrayI64Trap,
329  Suld1DArrayV2I8Trap,
330  Suld1DArrayV2I16Trap,
331  Suld1DArrayV2I32Trap,
332  Suld1DArrayV2I64Trap,
333  Suld1DArrayV4I8Trap,
334  Suld1DArrayV4I16Trap,
335  Suld1DArrayV4I32Trap,
336
337  Suld2DI8Trap,
338  Suld2DI16Trap,
339  Suld2DI32Trap,
340  Suld2DI64Trap,
341  Suld2DV2I8Trap,
342  Suld2DV2I16Trap,
343  Suld2DV2I32Trap,
344  Suld2DV2I64Trap,
345  Suld2DV4I8Trap,
346  Suld2DV4I16Trap,
347  Suld2DV4I32Trap,
348
349  Suld2DArrayI8Trap,
350  Suld2DArrayI16Trap,
351  Suld2DArrayI32Trap,
352  Suld2DArrayI64Trap,
353  Suld2DArrayV2I8Trap,
354  Suld2DArrayV2I16Trap,
355  Suld2DArrayV2I32Trap,
356  Suld2DArrayV2I64Trap,
357  Suld2DArrayV4I8Trap,
358  Suld2DArrayV4I16Trap,
359  Suld2DArrayV4I32Trap,
360
361  Suld3DI8Trap,
362  Suld3DI16Trap,
363  Suld3DI32Trap,
364  Suld3DI64Trap,
365  Suld3DV2I8Trap,
366  Suld3DV2I16Trap,
367  Suld3DV2I32Trap,
368  Suld3DV2I64Trap,
369  Suld3DV4I8Trap,
370  Suld3DV4I16Trap,
371  Suld3DV4I32Trap,
372
373  Suld1DI8Zero,
374  Suld1DI16Zero,
375  Suld1DI32Zero,
376  Suld1DI64Zero,
377  Suld1DV2I8Zero,
378  Suld1DV2I16Zero,
379  Suld1DV2I32Zero,
380  Suld1DV2I64Zero,
381  Suld1DV4I8Zero,
382  Suld1DV4I16Zero,
383  Suld1DV4I32Zero,
384
385  Suld1DArrayI8Zero,
386  Suld1DArrayI16Zero,
387  Suld1DArrayI32Zero,
388  Suld1DArrayI64Zero,
389  Suld1DArrayV2I8Zero,
390  Suld1DArrayV2I16Zero,
391  Suld1DArrayV2I32Zero,
392  Suld1DArrayV2I64Zero,
393  Suld1DArrayV4I8Zero,
394  Suld1DArrayV4I16Zero,
395  Suld1DArrayV4I32Zero,
396
397  Suld2DI8Zero,
398  Suld2DI16Zero,
399  Suld2DI32Zero,
400  Suld2DI64Zero,
401  Suld2DV2I8Zero,
402  Suld2DV2I16Zero,
403  Suld2DV2I32Zero,
404  Suld2DV2I64Zero,
405  Suld2DV4I8Zero,
406  Suld2DV4I16Zero,
407  Suld2DV4I32Zero,
408
409  Suld2DArrayI8Zero,
410  Suld2DArrayI16Zero,
411  Suld2DArrayI32Zero,
412  Suld2DArrayI64Zero,
413  Suld2DArrayV2I8Zero,
414  Suld2DArrayV2I16Zero,
415  Suld2DArrayV2I32Zero,
416  Suld2DArrayV2I64Zero,
417  Suld2DArrayV4I8Zero,
418  Suld2DArrayV4I16Zero,
419  Suld2DArrayV4I32Zero,
420
421  Suld3DI8Zero,
422  Suld3DI16Zero,
423  Suld3DI32Zero,
424  Suld3DI64Zero,
425  Suld3DV2I8Zero,
426  Suld3DV2I16Zero,
427  Suld3DV2I32Zero,
428  Suld3DV2I64Zero,
429  Suld3DV4I8Zero,
430  Suld3DV4I16Zero,
431  Suld3DV4I32Zero
432};
433}
434
435class NVPTXSubtarget;
436
437//===--------------------------------------------------------------------===//
438// TargetLowering Implementation
439//===--------------------------------------------------------------------===//
440class NVPTXTargetLowering : public TargetLowering {
441public:
442  explicit NVPTXTargetLowering(const NVPTXTargetMachine &TM,
443                               const NVPTXSubtarget &STI);
444  SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
445
446  SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
447
448  const char *getTargetNodeName(unsigned Opcode) const override;
449
450  bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
451                          MachineFunction &MF,
452                          unsigned Intrinsic) const override;
453
454  /// isLegalAddressingMode - Return true if the addressing mode represented
455  /// by AM is legal for this target, for a load/store of the specified type
456  /// Used to guide target specific optimizations, like loop strength
457  /// reduction (LoopStrengthReduce.cpp) and memory optimization for
458  /// address mode (CodeGenPrepare.cpp)
459  bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
460                             unsigned AS,
461                             Instruction *I = nullptr) const override;
462
463  bool isTruncateFree(Type *SrcTy, Type *DstTy) const override {
464    // Truncating 64-bit to 32-bit is free in SASS.
465    if (!SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
466      return false;
467    return SrcTy->getPrimitiveSizeInBits() == 64 &&
468           DstTy->getPrimitiveSizeInBits() == 32;
469  }
470
471  EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Ctx,
472                         EVT VT) const override {
473    if (VT.isVector())
474      return EVT::getVectorVT(Ctx, MVT::i1, VT.getVectorNumElements());
475    return MVT::i1;
476  }
477
478  ConstraintType getConstraintType(StringRef Constraint) const override;
479  std::pair<unsigned, const TargetRegisterClass *>
480  getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
481                               StringRef Constraint, MVT VT) const override;
482
483  SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
484                               bool isVarArg,
485                               const SmallVectorImpl<ISD::InputArg> &Ins,
486                               const SDLoc &dl, SelectionDAG &DAG,
487                               SmallVectorImpl<SDValue> &InVals) const override;
488
489  SDValue LowerCall(CallLoweringInfo &CLI,
490                    SmallVectorImpl<SDValue> &InVals) const override;
491
492  std::string getPrototype(const DataLayout &DL, Type *, const ArgListTy &,
493                           const SmallVectorImpl<ISD::OutputArg> &,
494                           unsigned retAlignment,
495                           ImmutableCallSite CS) const;
496
497  SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
498                      const SmallVectorImpl<ISD::OutputArg> &Outs,
499                      const SmallVectorImpl<SDValue> &OutVals, const SDLoc &dl,
500                      SelectionDAG &DAG) const override;
501
502  void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
503                                    std::vector<SDValue> &Ops,
504                                    SelectionDAG &DAG) const override;
505
506  const NVPTXTargetMachine *nvTM;
507
508  // PTX always uses 32-bit shift amounts
509  MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override {
510    return MVT::i32;
511  }
512
513  TargetLoweringBase::LegalizeTypeAction
514  getPreferredVectorAction(MVT VT) const override;
515
516  // Get the degree of precision we want from 32-bit floating point division
517  // operations.
518  //
519  //  0 - Use ptx div.approx
520  //  1 - Use ptx.div.full (approximate, but less so than div.approx)
521  //  2 - Use IEEE-compliant div instructions, if available.
522  int getDivF32Level() const;
523
524  // Get whether we should use a precise or approximate 32-bit floating point
525  // sqrt instruction.
526  bool usePrecSqrtF32() const;
527
528  // Get whether we should use instructions that flush floating-point denormals
529  // to sign-preserving zero.
530  bool useF32FTZ(const MachineFunction &MF) const;
531
532  SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
533                          int &ExtraSteps, bool &UseOneConst,
534                          bool Reciprocal) const override;
535
536  unsigned combineRepeatedFPDivisors() const override { return 2; }
537
538  bool allowFMA(MachineFunction &MF, CodeGenOpt::Level OptLevel) const;
539  bool allowUnsafeFPMath(MachineFunction &MF) const;
540
541  bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
542                                  EVT) const override {
543    return true;
544  }
545
546  bool enableAggressiveFMAFusion(EVT VT) const override { return true; }
547
548  // The default is to transform llvm.ctlz(x, false) (where false indicates that
549  // x == 0 is not undefined behavior) into a branch that checks whether x is 0
550  // and avoids calling ctlz in that case.  We have a dedicated ctlz
551  // instruction, so we say that ctlz is cheap to speculate.
552  bool isCheapToSpeculateCtlz() const override { return true; }
553
554private:
555  const NVPTXSubtarget &STI; // cache the subtarget here
556  SDValue getParamSymbol(SelectionDAG &DAG, int idx, EVT) const;
557
558  SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
559  SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
560  SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
561
562  SDValue LowerFROUND(SDValue Op, SelectionDAG &DAG) const;
563  SDValue LowerFROUND32(SDValue Op, SelectionDAG &DAG) const;
564  SDValue LowerFROUND64(SDValue Op, SelectionDAG &DAG) const;
565
566  SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
567  SDValue LowerLOADi1(SDValue Op, SelectionDAG &DAG) const;
568
569  SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
570  SDValue LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const;
571  SDValue LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const;
572
573  SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
574  SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
575
576  SDValue LowerSelect(SDValue Op, SelectionDAG &DAG) const;
577
578  void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
579                          SelectionDAG &DAG) const override;
580  SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
581
582  unsigned getArgumentAlignment(SDValue Callee, ImmutableCallSite CS, Type *Ty,
583                                unsigned Idx, const DataLayout &DL) const;
584};
585} // namespace llvm
586
587#endif
588