1//===-- NVPTXISelDAGToDAG.cpp - A dag to dag inst selector for NVPTX ------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines an instruction selector for the NVPTX target.
11//
12//===----------------------------------------------------------------------===//
13
14#include "NVPTXISelDAGToDAG.h"
15#include "llvm/IR/GlobalValue.h"
16#include "llvm/IR/Instructions.h"
17#include "llvm/Support/CommandLine.h"
18#include "llvm/Support/Debug.h"
19#include "llvm/Support/ErrorHandling.h"
20#include "llvm/Support/raw_ostream.h"
21#include "llvm/Target/TargetIntrinsicInfo.h"
22
23#undef DEBUG_TYPE
24#define DEBUG_TYPE "nvptx-isel"
25
26using namespace llvm;
27
28static cl::opt<bool> UseFMADInstruction(
29    "nvptx-mad-enable", cl::ZeroOrMore,
30    cl::desc("NVPTX Specific: Enable generating FMAD instructions"),
31    cl::init(false));
32
33static cl::opt<int>
34FMAContractLevel("nvptx-fma-level", cl::ZeroOrMore,
35                 cl::desc("NVPTX Specific: FMA contraction (0: don't do it"
36                          " 1: do it  2: do it aggressively"),
37                 cl::init(2));
38
39static cl::opt<int> UsePrecDivF32(
40    "nvptx-prec-divf32", cl::ZeroOrMore,
41    cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use"
42             " IEEE Compliant F32 div.rnd if avaiable."),
43    cl::init(2));
44
45static cl::opt<bool>
46UsePrecSqrtF32("nvptx-prec-sqrtf32",
47          cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."),
48          cl::init(true));
49
50/// createNVPTXISelDag - This pass converts a legalized DAG into a
51/// NVPTX-specific DAG, ready for instruction scheduling.
52FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM,
53                                       llvm::CodeGenOpt::Level OptLevel) {
54  return new NVPTXDAGToDAGISel(TM, OptLevel);
55}
56
57NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
58                                     CodeGenOpt::Level OptLevel)
59    : SelectionDAGISel(tm, OptLevel),
60      Subtarget(tm.getSubtarget<NVPTXSubtarget>()) {
61  // Always do fma.f32 fpcontract if the target supports the instruction.
62  // Always do fma.f64 fpcontract if the target supports the instruction.
63  // Do mad.f32 is nvptx-mad-enable is specified and the target does not
64  // support fma.f32.
65
66  doFMADF32 = (OptLevel > 0) && UseFMADInstruction && !Subtarget.hasFMAF32();
67  doFMAF32 = (OptLevel > 0) && Subtarget.hasFMAF32() && (FMAContractLevel >= 1);
68  doFMAF64 = (OptLevel > 0) && Subtarget.hasFMAF64() && (FMAContractLevel >= 1);
69  doFMAF32AGG =
70      (OptLevel > 0) && Subtarget.hasFMAF32() && (FMAContractLevel == 2);
71  doFMAF64AGG =
72      (OptLevel > 0) && Subtarget.hasFMAF64() && (FMAContractLevel == 2);
73
74  allowFMA = (FMAContractLevel >= 1) || UseFMADInstruction;
75
76  UseF32FTZ = false;
77
78  doMulWide = (OptLevel > 0);
79
80  // Decide how to translate f32 div
81  do_DIVF32_PREC = UsePrecDivF32;
82  // Decide how to translate f32 sqrt
83  do_SQRTF32_PREC = UsePrecSqrtF32;
84  // sm less than sm_20 does not support div.rnd. Use div.full.
85  if (do_DIVF32_PREC == 2 && !Subtarget.reqPTX20())
86    do_DIVF32_PREC = 1;
87
88}
89
90/// Select - Select instructions not customized! Used for
91/// expanded, promoted and normal instructions.
92SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) {
93
94  if (N->isMachineOpcode()) {
95    N->setNodeId(-1);
96    return NULL; // Already selected.
97  }
98
99  SDNode *ResNode = NULL;
100  switch (N->getOpcode()) {
101  case ISD::LOAD:
102    ResNode = SelectLoad(N);
103    break;
104  case ISD::STORE:
105    ResNode = SelectStore(N);
106    break;
107  case NVPTXISD::LoadV2:
108  case NVPTXISD::LoadV4:
109    ResNode = SelectLoadVector(N);
110    break;
111  case NVPTXISD::LDGV2:
112  case NVPTXISD::LDGV4:
113  case NVPTXISD::LDUV2:
114  case NVPTXISD::LDUV4:
115    ResNode = SelectLDGLDUVector(N);
116    break;
117  case NVPTXISD::StoreV2:
118  case NVPTXISD::StoreV4:
119    ResNode = SelectStoreVector(N);
120    break;
121  default:
122    break;
123  }
124  if (ResNode)
125    return ResNode;
126  return SelectCode(N);
127}
128
129static unsigned int getCodeAddrSpace(MemSDNode *N,
130                                     const NVPTXSubtarget &Subtarget) {
131  const Value *Src = N->getSrcValue();
132  if (!Src)
133    return NVPTX::PTXLdStInstCode::LOCAL;
134
135  if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) {
136    switch (PT->getAddressSpace()) {
137    case llvm::ADDRESS_SPACE_LOCAL:
138      return NVPTX::PTXLdStInstCode::LOCAL;
139    case llvm::ADDRESS_SPACE_GLOBAL:
140      return NVPTX::PTXLdStInstCode::GLOBAL;
141    case llvm::ADDRESS_SPACE_SHARED:
142      return NVPTX::PTXLdStInstCode::SHARED;
143    case llvm::ADDRESS_SPACE_CONST_NOT_GEN:
144      return NVPTX::PTXLdStInstCode::CONSTANT;
145    case llvm::ADDRESS_SPACE_GENERIC:
146      return NVPTX::PTXLdStInstCode::GENERIC;
147    case llvm::ADDRESS_SPACE_PARAM:
148      return NVPTX::PTXLdStInstCode::PARAM;
149    case llvm::ADDRESS_SPACE_CONST:
150      // If the arch supports generic address space, translate it to GLOBAL
151      // for correctness.
152      // If the arch does not support generic address space, then the arch
153      // does not really support ADDRESS_SPACE_CONST, translate it to
154      // to CONSTANT for better performance.
155      if (Subtarget.hasGenericLdSt())
156        return NVPTX::PTXLdStInstCode::GLOBAL;
157      else
158        return NVPTX::PTXLdStInstCode::CONSTANT;
159    default:
160      break;
161    }
162  }
163  return NVPTX::PTXLdStInstCode::LOCAL;
164}
165
166SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
167  DebugLoc dl = N->getDebugLoc();
168  LoadSDNode *LD = cast<LoadSDNode>(N);
169  EVT LoadedVT = LD->getMemoryVT();
170  SDNode *NVPTXLD = NULL;
171
172  // do not support pre/post inc/dec
173  if (LD->isIndexed())
174    return NULL;
175
176  if (!LoadedVT.isSimple())
177    return NULL;
178
179  // Address Space Setting
180  unsigned int codeAddrSpace = getCodeAddrSpace(LD, Subtarget);
181
182  // Volatile Setting
183  // - .volatile is only availalble for .global and .shared
184  bool isVolatile = LD->isVolatile();
185  if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
186      codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
187      codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
188    isVolatile = false;
189
190  // Vector Setting
191  MVT SimpleVT = LoadedVT.getSimpleVT();
192  unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
193  if (SimpleVT.isVector()) {
194    unsigned num = SimpleVT.getVectorNumElements();
195    if (num == 2)
196      vecType = NVPTX::PTXLdStInstCode::V2;
197    else if (num == 4)
198      vecType = NVPTX::PTXLdStInstCode::V4;
199    else
200      return NULL;
201  }
202
203  // Type Setting: fromType + fromTypeWidth
204  //
205  // Sign   : ISD::SEXTLOAD
206  // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
207  //          type is integer
208  // Float  : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
209  MVT ScalarVT = SimpleVT.getScalarType();
210  unsigned fromTypeWidth = ScalarVT.getSizeInBits();
211  unsigned int fromType;
212  if ((LD->getExtensionType() == ISD::SEXTLOAD))
213    fromType = NVPTX::PTXLdStInstCode::Signed;
214  else if (ScalarVT.isFloatingPoint())
215    fromType = NVPTX::PTXLdStInstCode::Float;
216  else
217    fromType = NVPTX::PTXLdStInstCode::Unsigned;
218
219  // Create the machine instruction DAG
220  SDValue Chain = N->getOperand(0);
221  SDValue N1 = N->getOperand(1);
222  SDValue Addr;
223  SDValue Offset, Base;
224  unsigned Opcode;
225  MVT::SimpleValueType TargetVT = LD->getValueType(0).getSimpleVT().SimpleTy;
226
227  if (SelectDirectAddr(N1, Addr)) {
228    switch (TargetVT) {
229    case MVT::i8:
230      Opcode = NVPTX::LD_i8_avar;
231      break;
232    case MVT::i16:
233      Opcode = NVPTX::LD_i16_avar;
234      break;
235    case MVT::i32:
236      Opcode = NVPTX::LD_i32_avar;
237      break;
238    case MVT::i64:
239      Opcode = NVPTX::LD_i64_avar;
240      break;
241    case MVT::f32:
242      Opcode = NVPTX::LD_f32_avar;
243      break;
244    case MVT::f64:
245      Opcode = NVPTX::LD_f64_avar;
246      break;
247    default:
248      return NULL;
249    }
250    SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
251                      getI32Imm(vecType), getI32Imm(fromType),
252                      getI32Imm(fromTypeWidth), Addr, Chain };
253    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
254  } else if (Subtarget.is64Bit()
255                 ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
256                 : SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
257    switch (TargetVT) {
258    case MVT::i8:
259      Opcode = NVPTX::LD_i8_asi;
260      break;
261    case MVT::i16:
262      Opcode = NVPTX::LD_i16_asi;
263      break;
264    case MVT::i32:
265      Opcode = NVPTX::LD_i32_asi;
266      break;
267    case MVT::i64:
268      Opcode = NVPTX::LD_i64_asi;
269      break;
270    case MVT::f32:
271      Opcode = NVPTX::LD_f32_asi;
272      break;
273    case MVT::f64:
274      Opcode = NVPTX::LD_f64_asi;
275      break;
276    default:
277      return NULL;
278    }
279    SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
280                      getI32Imm(vecType), getI32Imm(fromType),
281                      getI32Imm(fromTypeWidth), Base, Offset, Chain };
282    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
283  } else if (Subtarget.is64Bit()
284                 ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
285                 : SelectADDRri(N1.getNode(), N1, Base, Offset)) {
286    if (Subtarget.is64Bit()) {
287      switch (TargetVT) {
288      case MVT::i8:
289        Opcode = NVPTX::LD_i8_ari_64;
290        break;
291      case MVT::i16:
292        Opcode = NVPTX::LD_i16_ari_64;
293        break;
294      case MVT::i32:
295        Opcode = NVPTX::LD_i32_ari_64;
296        break;
297      case MVT::i64:
298        Opcode = NVPTX::LD_i64_ari_64;
299        break;
300      case MVT::f32:
301        Opcode = NVPTX::LD_f32_ari_64;
302        break;
303      case MVT::f64:
304        Opcode = NVPTX::LD_f64_ari_64;
305        break;
306      default:
307        return NULL;
308      }
309    } else {
310      switch (TargetVT) {
311      case MVT::i8:
312        Opcode = NVPTX::LD_i8_ari;
313        break;
314      case MVT::i16:
315        Opcode = NVPTX::LD_i16_ari;
316        break;
317      case MVT::i32:
318        Opcode = NVPTX::LD_i32_ari;
319        break;
320      case MVT::i64:
321        Opcode = NVPTX::LD_i64_ari;
322        break;
323      case MVT::f32:
324        Opcode = NVPTX::LD_f32_ari;
325        break;
326      case MVT::f64:
327        Opcode = NVPTX::LD_f64_ari;
328        break;
329      default:
330        return NULL;
331      }
332    }
333    SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
334                      getI32Imm(vecType), getI32Imm(fromType),
335                      getI32Imm(fromTypeWidth), Base, Offset, Chain };
336    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
337  } else {
338    if (Subtarget.is64Bit()) {
339      switch (TargetVT) {
340      case MVT::i8:
341        Opcode = NVPTX::LD_i8_areg_64;
342        break;
343      case MVT::i16:
344        Opcode = NVPTX::LD_i16_areg_64;
345        break;
346      case MVT::i32:
347        Opcode = NVPTX::LD_i32_areg_64;
348        break;
349      case MVT::i64:
350        Opcode = NVPTX::LD_i64_areg_64;
351        break;
352      case MVT::f32:
353        Opcode = NVPTX::LD_f32_areg_64;
354        break;
355      case MVT::f64:
356        Opcode = NVPTX::LD_f64_areg_64;
357        break;
358      default:
359        return NULL;
360      }
361    } else {
362      switch (TargetVT) {
363      case MVT::i8:
364        Opcode = NVPTX::LD_i8_areg;
365        break;
366      case MVT::i16:
367        Opcode = NVPTX::LD_i16_areg;
368        break;
369      case MVT::i32:
370        Opcode = NVPTX::LD_i32_areg;
371        break;
372      case MVT::i64:
373        Opcode = NVPTX::LD_i64_areg;
374        break;
375      case MVT::f32:
376        Opcode = NVPTX::LD_f32_areg;
377        break;
378      case MVT::f64:
379        Opcode = NVPTX::LD_f64_areg;
380        break;
381      default:
382        return NULL;
383      }
384    }
385    SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
386                      getI32Imm(vecType), getI32Imm(fromType),
387                      getI32Imm(fromTypeWidth), N1, Chain };
388    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
389  }
390
391  if (NVPTXLD != NULL) {
392    MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
393    MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
394    cast<MachineSDNode>(NVPTXLD)->setMemRefs(MemRefs0, MemRefs0 + 1);
395  }
396
397  return NVPTXLD;
398}
399
400SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
401
402  SDValue Chain = N->getOperand(0);
403  SDValue Op1 = N->getOperand(1);
404  SDValue Addr, Offset, Base;
405  unsigned Opcode;
406  DebugLoc DL = N->getDebugLoc();
407  SDNode *LD;
408  MemSDNode *MemSD = cast<MemSDNode>(N);
409  EVT LoadedVT = MemSD->getMemoryVT();
410
411  if (!LoadedVT.isSimple())
412    return NULL;
413
414  // Address Space Setting
415  unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget);
416
417  // Volatile Setting
418  // - .volatile is only availalble for .global and .shared
419  bool IsVolatile = MemSD->isVolatile();
420  if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
421      CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
422      CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
423    IsVolatile = false;
424
425  // Vector Setting
426  MVT SimpleVT = LoadedVT.getSimpleVT();
427
428  // Type Setting: fromType + fromTypeWidth
429  //
430  // Sign   : ISD::SEXTLOAD
431  // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
432  //          type is integer
433  // Float  : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
434  MVT ScalarVT = SimpleVT.getScalarType();
435  unsigned FromTypeWidth = ScalarVT.getSizeInBits();
436  unsigned int FromType;
437  // The last operand holds the original LoadSDNode::getExtensionType() value
438  unsigned ExtensionType = cast<ConstantSDNode>(
439      N->getOperand(N->getNumOperands() - 1))->getZExtValue();
440  if (ExtensionType == ISD::SEXTLOAD)
441    FromType = NVPTX::PTXLdStInstCode::Signed;
442  else if (ScalarVT.isFloatingPoint())
443    FromType = NVPTX::PTXLdStInstCode::Float;
444  else
445    FromType = NVPTX::PTXLdStInstCode::Unsigned;
446
447  unsigned VecType;
448
449  switch (N->getOpcode()) {
450  case NVPTXISD::LoadV2:
451    VecType = NVPTX::PTXLdStInstCode::V2;
452    break;
453  case NVPTXISD::LoadV4:
454    VecType = NVPTX::PTXLdStInstCode::V4;
455    break;
456  default:
457    return NULL;
458  }
459
460  EVT EltVT = N->getValueType(0);
461
462  if (SelectDirectAddr(Op1, Addr)) {
463    switch (N->getOpcode()) {
464    default:
465      return NULL;
466    case NVPTXISD::LoadV2:
467      switch (EltVT.getSimpleVT().SimpleTy) {
468      default:
469        return NULL;
470      case MVT::i8:
471        Opcode = NVPTX::LDV_i8_v2_avar;
472        break;
473      case MVT::i16:
474        Opcode = NVPTX::LDV_i16_v2_avar;
475        break;
476      case MVT::i32:
477        Opcode = NVPTX::LDV_i32_v2_avar;
478        break;
479      case MVT::i64:
480        Opcode = NVPTX::LDV_i64_v2_avar;
481        break;
482      case MVT::f32:
483        Opcode = NVPTX::LDV_f32_v2_avar;
484        break;
485      case MVT::f64:
486        Opcode = NVPTX::LDV_f64_v2_avar;
487        break;
488      }
489      break;
490    case NVPTXISD::LoadV4:
491      switch (EltVT.getSimpleVT().SimpleTy) {
492      default:
493        return NULL;
494      case MVT::i8:
495        Opcode = NVPTX::LDV_i8_v4_avar;
496        break;
497      case MVT::i16:
498        Opcode = NVPTX::LDV_i16_v4_avar;
499        break;
500      case MVT::i32:
501        Opcode = NVPTX::LDV_i32_v4_avar;
502        break;
503      case MVT::f32:
504        Opcode = NVPTX::LDV_f32_v4_avar;
505        break;
506      }
507      break;
508    }
509
510    SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
511                      getI32Imm(VecType), getI32Imm(FromType),
512                      getI32Imm(FromTypeWidth), Addr, Chain };
513    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
514  } else if (Subtarget.is64Bit()
515                 ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
516                 : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
517    switch (N->getOpcode()) {
518    default:
519      return NULL;
520    case NVPTXISD::LoadV2:
521      switch (EltVT.getSimpleVT().SimpleTy) {
522      default:
523        return NULL;
524      case MVT::i8:
525        Opcode = NVPTX::LDV_i8_v2_asi;
526        break;
527      case MVT::i16:
528        Opcode = NVPTX::LDV_i16_v2_asi;
529        break;
530      case MVT::i32:
531        Opcode = NVPTX::LDV_i32_v2_asi;
532        break;
533      case MVT::i64:
534        Opcode = NVPTX::LDV_i64_v2_asi;
535        break;
536      case MVT::f32:
537        Opcode = NVPTX::LDV_f32_v2_asi;
538        break;
539      case MVT::f64:
540        Opcode = NVPTX::LDV_f64_v2_asi;
541        break;
542      }
543      break;
544    case NVPTXISD::LoadV4:
545      switch (EltVT.getSimpleVT().SimpleTy) {
546      default:
547        return NULL;
548      case MVT::i8:
549        Opcode = NVPTX::LDV_i8_v4_asi;
550        break;
551      case MVT::i16:
552        Opcode = NVPTX::LDV_i16_v4_asi;
553        break;
554      case MVT::i32:
555        Opcode = NVPTX::LDV_i32_v4_asi;
556        break;
557      case MVT::f32:
558        Opcode = NVPTX::LDV_f32_v4_asi;
559        break;
560      }
561      break;
562    }
563
564    SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
565                      getI32Imm(VecType), getI32Imm(FromType),
566                      getI32Imm(FromTypeWidth), Base, Offset, Chain };
567    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
568  } else if (Subtarget.is64Bit()
569                 ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
570                 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
571    if (Subtarget.is64Bit()) {
572      switch (N->getOpcode()) {
573      default:
574        return NULL;
575      case NVPTXISD::LoadV2:
576        switch (EltVT.getSimpleVT().SimpleTy) {
577        default:
578          return NULL;
579        case MVT::i8:
580          Opcode = NVPTX::LDV_i8_v2_ari_64;
581          break;
582        case MVT::i16:
583          Opcode = NVPTX::LDV_i16_v2_ari_64;
584          break;
585        case MVT::i32:
586          Opcode = NVPTX::LDV_i32_v2_ari_64;
587          break;
588        case MVT::i64:
589          Opcode = NVPTX::LDV_i64_v2_ari_64;
590          break;
591        case MVT::f32:
592          Opcode = NVPTX::LDV_f32_v2_ari_64;
593          break;
594        case MVT::f64:
595          Opcode = NVPTX::LDV_f64_v2_ari_64;
596          break;
597        }
598        break;
599      case NVPTXISD::LoadV4:
600        switch (EltVT.getSimpleVT().SimpleTy) {
601        default:
602          return NULL;
603        case MVT::i8:
604          Opcode = NVPTX::LDV_i8_v4_ari_64;
605          break;
606        case MVT::i16:
607          Opcode = NVPTX::LDV_i16_v4_ari_64;
608          break;
609        case MVT::i32:
610          Opcode = NVPTX::LDV_i32_v4_ari_64;
611          break;
612        case MVT::f32:
613          Opcode = NVPTX::LDV_f32_v4_ari_64;
614          break;
615        }
616        break;
617      }
618    } else {
619      switch (N->getOpcode()) {
620      default:
621        return NULL;
622      case NVPTXISD::LoadV2:
623        switch (EltVT.getSimpleVT().SimpleTy) {
624        default:
625          return NULL;
626        case MVT::i8:
627          Opcode = NVPTX::LDV_i8_v2_ari;
628          break;
629        case MVT::i16:
630          Opcode = NVPTX::LDV_i16_v2_ari;
631          break;
632        case MVT::i32:
633          Opcode = NVPTX::LDV_i32_v2_ari;
634          break;
635        case MVT::i64:
636          Opcode = NVPTX::LDV_i64_v2_ari;
637          break;
638        case MVT::f32:
639          Opcode = NVPTX::LDV_f32_v2_ari;
640          break;
641        case MVT::f64:
642          Opcode = NVPTX::LDV_f64_v2_ari;
643          break;
644        }
645        break;
646      case NVPTXISD::LoadV4:
647        switch (EltVT.getSimpleVT().SimpleTy) {
648        default:
649          return NULL;
650        case MVT::i8:
651          Opcode = NVPTX::LDV_i8_v4_ari;
652          break;
653        case MVT::i16:
654          Opcode = NVPTX::LDV_i16_v4_ari;
655          break;
656        case MVT::i32:
657          Opcode = NVPTX::LDV_i32_v4_ari;
658          break;
659        case MVT::f32:
660          Opcode = NVPTX::LDV_f32_v4_ari;
661          break;
662        }
663        break;
664      }
665    }
666
667    SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
668                      getI32Imm(VecType), getI32Imm(FromType),
669                      getI32Imm(FromTypeWidth), Base, Offset, Chain };
670
671    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
672  } else {
673    if (Subtarget.is64Bit()) {
674      switch (N->getOpcode()) {
675      default:
676        return NULL;
677      case NVPTXISD::LoadV2:
678        switch (EltVT.getSimpleVT().SimpleTy) {
679        default:
680          return NULL;
681        case MVT::i8:
682          Opcode = NVPTX::LDV_i8_v2_areg_64;
683          break;
684        case MVT::i16:
685          Opcode = NVPTX::LDV_i16_v2_areg_64;
686          break;
687        case MVT::i32:
688          Opcode = NVPTX::LDV_i32_v2_areg_64;
689          break;
690        case MVT::i64:
691          Opcode = NVPTX::LDV_i64_v2_areg_64;
692          break;
693        case MVT::f32:
694          Opcode = NVPTX::LDV_f32_v2_areg_64;
695          break;
696        case MVT::f64:
697          Opcode = NVPTX::LDV_f64_v2_areg_64;
698          break;
699        }
700        break;
701      case NVPTXISD::LoadV4:
702        switch (EltVT.getSimpleVT().SimpleTy) {
703        default:
704          return NULL;
705        case MVT::i8:
706          Opcode = NVPTX::LDV_i8_v4_areg_64;
707          break;
708        case MVT::i16:
709          Opcode = NVPTX::LDV_i16_v4_areg_64;
710          break;
711        case MVT::i32:
712          Opcode = NVPTX::LDV_i32_v4_areg_64;
713          break;
714        case MVT::f32:
715          Opcode = NVPTX::LDV_f32_v4_areg_64;
716          break;
717        }
718        break;
719      }
720    } else {
721      switch (N->getOpcode()) {
722      default:
723        return NULL;
724      case NVPTXISD::LoadV2:
725        switch (EltVT.getSimpleVT().SimpleTy) {
726        default:
727          return NULL;
728        case MVT::i8:
729          Opcode = NVPTX::LDV_i8_v2_areg;
730          break;
731        case MVT::i16:
732          Opcode = NVPTX::LDV_i16_v2_areg;
733          break;
734        case MVT::i32:
735          Opcode = NVPTX::LDV_i32_v2_areg;
736          break;
737        case MVT::i64:
738          Opcode = NVPTX::LDV_i64_v2_areg;
739          break;
740        case MVT::f32:
741          Opcode = NVPTX::LDV_f32_v2_areg;
742          break;
743        case MVT::f64:
744          Opcode = NVPTX::LDV_f64_v2_areg;
745          break;
746        }
747        break;
748      case NVPTXISD::LoadV4:
749        switch (EltVT.getSimpleVT().SimpleTy) {
750        default:
751          return NULL;
752        case MVT::i8:
753          Opcode = NVPTX::LDV_i8_v4_areg;
754          break;
755        case MVT::i16:
756          Opcode = NVPTX::LDV_i16_v4_areg;
757          break;
758        case MVT::i32:
759          Opcode = NVPTX::LDV_i32_v4_areg;
760          break;
761        case MVT::f32:
762          Opcode = NVPTX::LDV_f32_v4_areg;
763          break;
764        }
765        break;
766      }
767    }
768
769    SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
770                      getI32Imm(VecType), getI32Imm(FromType),
771                      getI32Imm(FromTypeWidth), Op1, Chain };
772    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
773  }
774
775  MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
776  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
777  cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
778
779  return LD;
780}
781
782SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
783
784  SDValue Chain = N->getOperand(0);
785  SDValue Op1 = N->getOperand(1);
786  unsigned Opcode;
787  DebugLoc DL = N->getDebugLoc();
788  SDNode *LD;
789
790  EVT RetVT = N->getValueType(0);
791
792  // Select opcode
793  if (Subtarget.is64Bit()) {
794    switch (N->getOpcode()) {
795    default:
796      return NULL;
797    case NVPTXISD::LDGV2:
798      switch (RetVT.getSimpleVT().SimpleTy) {
799      default:
800        return NULL;
801      case MVT::i8:
802        Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_64;
803        break;
804      case MVT::i16:
805        Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_64;
806        break;
807      case MVT::i32:
808        Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_64;
809        break;
810      case MVT::i64:
811        Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_64;
812        break;
813      case MVT::f32:
814        Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_64;
815        break;
816      case MVT::f64:
817        Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_64;
818        break;
819      }
820      break;
821    case NVPTXISD::LDGV4:
822      switch (RetVT.getSimpleVT().SimpleTy) {
823      default:
824        return NULL;
825      case MVT::i8:
826        Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_64;
827        break;
828      case MVT::i16:
829        Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_64;
830        break;
831      case MVT::i32:
832        Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_64;
833        break;
834      case MVT::f32:
835        Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_64;
836        break;
837      }
838      break;
839    case NVPTXISD::LDUV2:
840      switch (RetVT.getSimpleVT().SimpleTy) {
841      default:
842        return NULL;
843      case MVT::i8:
844        Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_64;
845        break;
846      case MVT::i16:
847        Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_64;
848        break;
849      case MVT::i32:
850        Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_64;
851        break;
852      case MVT::i64:
853        Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_64;
854        break;
855      case MVT::f32:
856        Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_64;
857        break;
858      case MVT::f64:
859        Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_64;
860        break;
861      }
862      break;
863    case NVPTXISD::LDUV4:
864      switch (RetVT.getSimpleVT().SimpleTy) {
865      default:
866        return NULL;
867      case MVT::i8:
868        Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_64;
869        break;
870      case MVT::i16:
871        Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_64;
872        break;
873      case MVT::i32:
874        Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_64;
875        break;
876      case MVT::f32:
877        Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_64;
878        break;
879      }
880      break;
881    }
882  } else {
883    switch (N->getOpcode()) {
884    default:
885      return NULL;
886    case NVPTXISD::LDGV2:
887      switch (RetVT.getSimpleVT().SimpleTy) {
888      default:
889        return NULL;
890      case MVT::i8:
891        Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_32;
892        break;
893      case MVT::i16:
894        Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_32;
895        break;
896      case MVT::i32:
897        Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_32;
898        break;
899      case MVT::i64:
900        Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_32;
901        break;
902      case MVT::f32:
903        Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_32;
904        break;
905      case MVT::f64:
906        Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_32;
907        break;
908      }
909      break;
910    case NVPTXISD::LDGV4:
911      switch (RetVT.getSimpleVT().SimpleTy) {
912      default:
913        return NULL;
914      case MVT::i8:
915        Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_32;
916        break;
917      case MVT::i16:
918        Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_32;
919        break;
920      case MVT::i32:
921        Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_32;
922        break;
923      case MVT::f32:
924        Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_32;
925        break;
926      }
927      break;
928    case NVPTXISD::LDUV2:
929      switch (RetVT.getSimpleVT().SimpleTy) {
930      default:
931        return NULL;
932      case MVT::i8:
933        Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_32;
934        break;
935      case MVT::i16:
936        Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_32;
937        break;
938      case MVT::i32:
939        Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_32;
940        break;
941      case MVT::i64:
942        Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_32;
943        break;
944      case MVT::f32:
945        Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_32;
946        break;
947      case MVT::f64:
948        Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_32;
949        break;
950      }
951      break;
952    case NVPTXISD::LDUV4:
953      switch (RetVT.getSimpleVT().SimpleTy) {
954      default:
955        return NULL;
956      case MVT::i8:
957        Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_32;
958        break;
959      case MVT::i16:
960        Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_32;
961        break;
962      case MVT::i32:
963        Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_32;
964        break;
965      case MVT::f32:
966        Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_32;
967        break;
968      }
969      break;
970    }
971  }
972
973  SDValue Ops[] = { Op1, Chain };
974  LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
975
976  MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
977  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
978  cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
979
980  return LD;
981}
982
983SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
984  DebugLoc dl = N->getDebugLoc();
985  StoreSDNode *ST = cast<StoreSDNode>(N);
986  EVT StoreVT = ST->getMemoryVT();
987  SDNode *NVPTXST = NULL;
988
989  // do not support pre/post inc/dec
990  if (ST->isIndexed())
991    return NULL;
992
993  if (!StoreVT.isSimple())
994    return NULL;
995
996  // Address Space Setting
997  unsigned int codeAddrSpace = getCodeAddrSpace(ST, Subtarget);
998
999  // Volatile Setting
1000  // - .volatile is only availalble for .global and .shared
1001  bool isVolatile = ST->isVolatile();
1002  if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
1003      codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
1004      codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
1005    isVolatile = false;
1006
1007  // Vector Setting
1008  MVT SimpleVT = StoreVT.getSimpleVT();
1009  unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
1010  if (SimpleVT.isVector()) {
1011    unsigned num = SimpleVT.getVectorNumElements();
1012    if (num == 2)
1013      vecType = NVPTX::PTXLdStInstCode::V2;
1014    else if (num == 4)
1015      vecType = NVPTX::PTXLdStInstCode::V4;
1016    else
1017      return NULL;
1018  }
1019
1020  // Type Setting: toType + toTypeWidth
1021  // - for integer type, always use 'u'
1022  //
1023  MVT ScalarVT = SimpleVT.getScalarType();
1024  unsigned toTypeWidth = ScalarVT.getSizeInBits();
1025  unsigned int toType;
1026  if (ScalarVT.isFloatingPoint())
1027    toType = NVPTX::PTXLdStInstCode::Float;
1028  else
1029    toType = NVPTX::PTXLdStInstCode::Unsigned;
1030
1031  // Create the machine instruction DAG
1032  SDValue Chain = N->getOperand(0);
1033  SDValue N1 = N->getOperand(1);
1034  SDValue N2 = N->getOperand(2);
1035  SDValue Addr;
1036  SDValue Offset, Base;
1037  unsigned Opcode;
1038  MVT::SimpleValueType SourceVT =
1039      N1.getNode()->getValueType(0).getSimpleVT().SimpleTy;
1040
1041  if (SelectDirectAddr(N2, Addr)) {
1042    switch (SourceVT) {
1043    case MVT::i8:
1044      Opcode = NVPTX::ST_i8_avar;
1045      break;
1046    case MVT::i16:
1047      Opcode = NVPTX::ST_i16_avar;
1048      break;
1049    case MVT::i32:
1050      Opcode = NVPTX::ST_i32_avar;
1051      break;
1052    case MVT::i64:
1053      Opcode = NVPTX::ST_i64_avar;
1054      break;
1055    case MVT::f32:
1056      Opcode = NVPTX::ST_f32_avar;
1057      break;
1058    case MVT::f64:
1059      Opcode = NVPTX::ST_f64_avar;
1060      break;
1061    default:
1062      return NULL;
1063    }
1064    SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
1065                      getI32Imm(vecType), getI32Imm(toType),
1066                      getI32Imm(toTypeWidth), Addr, Chain };
1067    NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
1068  } else if (Subtarget.is64Bit()
1069                 ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
1070                 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
1071    switch (SourceVT) {
1072    case MVT::i8:
1073      Opcode = NVPTX::ST_i8_asi;
1074      break;
1075    case MVT::i16:
1076      Opcode = NVPTX::ST_i16_asi;
1077      break;
1078    case MVT::i32:
1079      Opcode = NVPTX::ST_i32_asi;
1080      break;
1081    case MVT::i64:
1082      Opcode = NVPTX::ST_i64_asi;
1083      break;
1084    case MVT::f32:
1085      Opcode = NVPTX::ST_f32_asi;
1086      break;
1087    case MVT::f64:
1088      Opcode = NVPTX::ST_f64_asi;
1089      break;
1090    default:
1091      return NULL;
1092    }
1093    SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
1094                      getI32Imm(vecType), getI32Imm(toType),
1095                      getI32Imm(toTypeWidth), Base, Offset, Chain };
1096    NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
1097  } else if (Subtarget.is64Bit()
1098                 ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
1099                 : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
1100    if (Subtarget.is64Bit()) {
1101      switch (SourceVT) {
1102      case MVT::i8:
1103        Opcode = NVPTX::ST_i8_ari_64;
1104        break;
1105      case MVT::i16:
1106        Opcode = NVPTX::ST_i16_ari_64;
1107        break;
1108      case MVT::i32:
1109        Opcode = NVPTX::ST_i32_ari_64;
1110        break;
1111      case MVT::i64:
1112        Opcode = NVPTX::ST_i64_ari_64;
1113        break;
1114      case MVT::f32:
1115        Opcode = NVPTX::ST_f32_ari_64;
1116        break;
1117      case MVT::f64:
1118        Opcode = NVPTX::ST_f64_ari_64;
1119        break;
1120      default:
1121        return NULL;
1122      }
1123    } else {
1124      switch (SourceVT) {
1125      case MVT::i8:
1126        Opcode = NVPTX::ST_i8_ari;
1127        break;
1128      case MVT::i16:
1129        Opcode = NVPTX::ST_i16_ari;
1130        break;
1131      case MVT::i32:
1132        Opcode = NVPTX::ST_i32_ari;
1133        break;
1134      case MVT::i64:
1135        Opcode = NVPTX::ST_i64_ari;
1136        break;
1137      case MVT::f32:
1138        Opcode = NVPTX::ST_f32_ari;
1139        break;
1140      case MVT::f64:
1141        Opcode = NVPTX::ST_f64_ari;
1142        break;
1143      default:
1144        return NULL;
1145      }
1146    }
1147    SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
1148                      getI32Imm(vecType), getI32Imm(toType),
1149                      getI32Imm(toTypeWidth), Base, Offset, Chain };
1150    NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
1151  } else {
1152    if (Subtarget.is64Bit()) {
1153      switch (SourceVT) {
1154      case MVT::i8:
1155        Opcode = NVPTX::ST_i8_areg_64;
1156        break;
1157      case MVT::i16:
1158        Opcode = NVPTX::ST_i16_areg_64;
1159        break;
1160      case MVT::i32:
1161        Opcode = NVPTX::ST_i32_areg_64;
1162        break;
1163      case MVT::i64:
1164        Opcode = NVPTX::ST_i64_areg_64;
1165        break;
1166      case MVT::f32:
1167        Opcode = NVPTX::ST_f32_areg_64;
1168        break;
1169      case MVT::f64:
1170        Opcode = NVPTX::ST_f64_areg_64;
1171        break;
1172      default:
1173        return NULL;
1174      }
1175    } else {
1176      switch (SourceVT) {
1177      case MVT::i8:
1178        Opcode = NVPTX::ST_i8_areg;
1179        break;
1180      case MVT::i16:
1181        Opcode = NVPTX::ST_i16_areg;
1182        break;
1183      case MVT::i32:
1184        Opcode = NVPTX::ST_i32_areg;
1185        break;
1186      case MVT::i64:
1187        Opcode = NVPTX::ST_i64_areg;
1188        break;
1189      case MVT::f32:
1190        Opcode = NVPTX::ST_f32_areg;
1191        break;
1192      case MVT::f64:
1193        Opcode = NVPTX::ST_f64_areg;
1194        break;
1195      default:
1196        return NULL;
1197      }
1198    }
1199    SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
1200                      getI32Imm(vecType), getI32Imm(toType),
1201                      getI32Imm(toTypeWidth), N2, Chain };
1202    NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
1203  }
1204
1205  if (NVPTXST != NULL) {
1206    MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
1207    MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
1208    cast<MachineSDNode>(NVPTXST)->setMemRefs(MemRefs0, MemRefs0 + 1);
1209  }
1210
1211  return NVPTXST;
1212}
1213
1214SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
1215  SDValue Chain = N->getOperand(0);
1216  SDValue Op1 = N->getOperand(1);
1217  SDValue Addr, Offset, Base;
1218  unsigned Opcode;
1219  DebugLoc DL = N->getDebugLoc();
1220  SDNode *ST;
1221  EVT EltVT = Op1.getValueType();
1222  MemSDNode *MemSD = cast<MemSDNode>(N);
1223  EVT StoreVT = MemSD->getMemoryVT();
1224
1225  // Address Space Setting
1226  unsigned CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget);
1227
1228  if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) {
1229    report_fatal_error("Cannot store to pointer that points to constant "
1230                       "memory space");
1231  }
1232
1233  // Volatile Setting
1234  // - .volatile is only availalble for .global and .shared
1235  bool IsVolatile = MemSD->isVolatile();
1236  if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
1237      CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
1238      CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
1239    IsVolatile = false;
1240
1241  // Type Setting: toType + toTypeWidth
1242  // - for integer type, always use 'u'
1243  assert(StoreVT.isSimple() && "Store value is not simple");
1244  MVT ScalarVT = StoreVT.getSimpleVT().getScalarType();
1245  unsigned ToTypeWidth = ScalarVT.getSizeInBits();
1246  unsigned ToType;
1247  if (ScalarVT.isFloatingPoint())
1248    ToType = NVPTX::PTXLdStInstCode::Float;
1249  else
1250    ToType = NVPTX::PTXLdStInstCode::Unsigned;
1251
1252  SmallVector<SDValue, 12> StOps;
1253  SDValue N2;
1254  unsigned VecType;
1255
1256  switch (N->getOpcode()) {
1257  case NVPTXISD::StoreV2:
1258    VecType = NVPTX::PTXLdStInstCode::V2;
1259    StOps.push_back(N->getOperand(1));
1260    StOps.push_back(N->getOperand(2));
1261    N2 = N->getOperand(3);
1262    break;
1263  case NVPTXISD::StoreV4:
1264    VecType = NVPTX::PTXLdStInstCode::V4;
1265    StOps.push_back(N->getOperand(1));
1266    StOps.push_back(N->getOperand(2));
1267    StOps.push_back(N->getOperand(3));
1268    StOps.push_back(N->getOperand(4));
1269    N2 = N->getOperand(5);
1270    break;
1271  default:
1272    return NULL;
1273  }
1274
1275  StOps.push_back(getI32Imm(IsVolatile));
1276  StOps.push_back(getI32Imm(CodeAddrSpace));
1277  StOps.push_back(getI32Imm(VecType));
1278  StOps.push_back(getI32Imm(ToType));
1279  StOps.push_back(getI32Imm(ToTypeWidth));
1280
1281  if (SelectDirectAddr(N2, Addr)) {
1282    switch (N->getOpcode()) {
1283    default:
1284      return NULL;
1285    case NVPTXISD::StoreV2:
1286      switch (EltVT.getSimpleVT().SimpleTy) {
1287      default:
1288        return NULL;
1289      case MVT::i8:
1290        Opcode = NVPTX::STV_i8_v2_avar;
1291        break;
1292      case MVT::i16:
1293        Opcode = NVPTX::STV_i16_v2_avar;
1294        break;
1295      case MVT::i32:
1296        Opcode = NVPTX::STV_i32_v2_avar;
1297        break;
1298      case MVT::i64:
1299        Opcode = NVPTX::STV_i64_v2_avar;
1300        break;
1301      case MVT::f32:
1302        Opcode = NVPTX::STV_f32_v2_avar;
1303        break;
1304      case MVT::f64:
1305        Opcode = NVPTX::STV_f64_v2_avar;
1306        break;
1307      }
1308      break;
1309    case NVPTXISD::StoreV4:
1310      switch (EltVT.getSimpleVT().SimpleTy) {
1311      default:
1312        return NULL;
1313      case MVT::i8:
1314        Opcode = NVPTX::STV_i8_v4_avar;
1315        break;
1316      case MVT::i16:
1317        Opcode = NVPTX::STV_i16_v4_avar;
1318        break;
1319      case MVT::i32:
1320        Opcode = NVPTX::STV_i32_v4_avar;
1321        break;
1322      case MVT::f32:
1323        Opcode = NVPTX::STV_f32_v4_avar;
1324        break;
1325      }
1326      break;
1327    }
1328    StOps.push_back(Addr);
1329  } else if (Subtarget.is64Bit()
1330                 ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
1331                 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
1332    switch (N->getOpcode()) {
1333    default:
1334      return NULL;
1335    case NVPTXISD::StoreV2:
1336      switch (EltVT.getSimpleVT().SimpleTy) {
1337      default:
1338        return NULL;
1339      case MVT::i8:
1340        Opcode = NVPTX::STV_i8_v2_asi;
1341        break;
1342      case MVT::i16:
1343        Opcode = NVPTX::STV_i16_v2_asi;
1344        break;
1345      case MVT::i32:
1346        Opcode = NVPTX::STV_i32_v2_asi;
1347        break;
1348      case MVT::i64:
1349        Opcode = NVPTX::STV_i64_v2_asi;
1350        break;
1351      case MVT::f32:
1352        Opcode = NVPTX::STV_f32_v2_asi;
1353        break;
1354      case MVT::f64:
1355        Opcode = NVPTX::STV_f64_v2_asi;
1356        break;
1357      }
1358      break;
1359    case NVPTXISD::StoreV4:
1360      switch (EltVT.getSimpleVT().SimpleTy) {
1361      default:
1362        return NULL;
1363      case MVT::i8:
1364        Opcode = NVPTX::STV_i8_v4_asi;
1365        break;
1366      case MVT::i16:
1367        Opcode = NVPTX::STV_i16_v4_asi;
1368        break;
1369      case MVT::i32:
1370        Opcode = NVPTX::STV_i32_v4_asi;
1371        break;
1372      case MVT::f32:
1373        Opcode = NVPTX::STV_f32_v4_asi;
1374        break;
1375      }
1376      break;
1377    }
1378    StOps.push_back(Base);
1379    StOps.push_back(Offset);
1380  } else if (Subtarget.is64Bit()
1381                 ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
1382                 : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
1383    if (Subtarget.is64Bit()) {
1384      switch (N->getOpcode()) {
1385      default:
1386        return NULL;
1387      case NVPTXISD::StoreV2:
1388        switch (EltVT.getSimpleVT().SimpleTy) {
1389        default:
1390          return NULL;
1391        case MVT::i8:
1392          Opcode = NVPTX::STV_i8_v2_ari_64;
1393          break;
1394        case MVT::i16:
1395          Opcode = NVPTX::STV_i16_v2_ari_64;
1396          break;
1397        case MVT::i32:
1398          Opcode = NVPTX::STV_i32_v2_ari_64;
1399          break;
1400        case MVT::i64:
1401          Opcode = NVPTX::STV_i64_v2_ari_64;
1402          break;
1403        case MVT::f32:
1404          Opcode = NVPTX::STV_f32_v2_ari_64;
1405          break;
1406        case MVT::f64:
1407          Opcode = NVPTX::STV_f64_v2_ari_64;
1408          break;
1409        }
1410        break;
1411      case NVPTXISD::StoreV4:
1412        switch (EltVT.getSimpleVT().SimpleTy) {
1413        default:
1414          return NULL;
1415        case MVT::i8:
1416          Opcode = NVPTX::STV_i8_v4_ari_64;
1417          break;
1418        case MVT::i16:
1419          Opcode = NVPTX::STV_i16_v4_ari_64;
1420          break;
1421        case MVT::i32:
1422          Opcode = NVPTX::STV_i32_v4_ari_64;
1423          break;
1424        case MVT::f32:
1425          Opcode = NVPTX::STV_f32_v4_ari_64;
1426          break;
1427        }
1428        break;
1429      }
1430    } else {
1431      switch (N->getOpcode()) {
1432      default:
1433        return NULL;
1434      case NVPTXISD::StoreV2:
1435        switch (EltVT.getSimpleVT().SimpleTy) {
1436        default:
1437          return NULL;
1438        case MVT::i8:
1439          Opcode = NVPTX::STV_i8_v2_ari;
1440          break;
1441        case MVT::i16:
1442          Opcode = NVPTX::STV_i16_v2_ari;
1443          break;
1444        case MVT::i32:
1445          Opcode = NVPTX::STV_i32_v2_ari;
1446          break;
1447        case MVT::i64:
1448          Opcode = NVPTX::STV_i64_v2_ari;
1449          break;
1450        case MVT::f32:
1451          Opcode = NVPTX::STV_f32_v2_ari;
1452          break;
1453        case MVT::f64:
1454          Opcode = NVPTX::STV_f64_v2_ari;
1455          break;
1456        }
1457        break;
1458      case NVPTXISD::StoreV4:
1459        switch (EltVT.getSimpleVT().SimpleTy) {
1460        default:
1461          return NULL;
1462        case MVT::i8:
1463          Opcode = NVPTX::STV_i8_v4_ari;
1464          break;
1465        case MVT::i16:
1466          Opcode = NVPTX::STV_i16_v4_ari;
1467          break;
1468        case MVT::i32:
1469          Opcode = NVPTX::STV_i32_v4_ari;
1470          break;
1471        case MVT::f32:
1472          Opcode = NVPTX::STV_f32_v4_ari;
1473          break;
1474        }
1475        break;
1476      }
1477    }
1478    StOps.push_back(Base);
1479    StOps.push_back(Offset);
1480  } else {
1481    if (Subtarget.is64Bit()) {
1482      switch (N->getOpcode()) {
1483      default:
1484        return NULL;
1485      case NVPTXISD::StoreV2:
1486        switch (EltVT.getSimpleVT().SimpleTy) {
1487        default:
1488          return NULL;
1489        case MVT::i8:
1490          Opcode = NVPTX::STV_i8_v2_areg_64;
1491          break;
1492        case MVT::i16:
1493          Opcode = NVPTX::STV_i16_v2_areg_64;
1494          break;
1495        case MVT::i32:
1496          Opcode = NVPTX::STV_i32_v2_areg_64;
1497          break;
1498        case MVT::i64:
1499          Opcode = NVPTX::STV_i64_v2_areg_64;
1500          break;
1501        case MVT::f32:
1502          Opcode = NVPTX::STV_f32_v2_areg_64;
1503          break;
1504        case MVT::f64:
1505          Opcode = NVPTX::STV_f64_v2_areg_64;
1506          break;
1507        }
1508        break;
1509      case NVPTXISD::StoreV4:
1510        switch (EltVT.getSimpleVT().SimpleTy) {
1511        default:
1512          return NULL;
1513        case MVT::i8:
1514          Opcode = NVPTX::STV_i8_v4_areg_64;
1515          break;
1516        case MVT::i16:
1517          Opcode = NVPTX::STV_i16_v4_areg_64;
1518          break;
1519        case MVT::i32:
1520          Opcode = NVPTX::STV_i32_v4_areg_64;
1521          break;
1522        case MVT::f32:
1523          Opcode = NVPTX::STV_f32_v4_areg_64;
1524          break;
1525        }
1526        break;
1527      }
1528    } else {
1529      switch (N->getOpcode()) {
1530      default:
1531        return NULL;
1532      case NVPTXISD::StoreV2:
1533        switch (EltVT.getSimpleVT().SimpleTy) {
1534        default:
1535          return NULL;
1536        case MVT::i8:
1537          Opcode = NVPTX::STV_i8_v2_areg;
1538          break;
1539        case MVT::i16:
1540          Opcode = NVPTX::STV_i16_v2_areg;
1541          break;
1542        case MVT::i32:
1543          Opcode = NVPTX::STV_i32_v2_areg;
1544          break;
1545        case MVT::i64:
1546          Opcode = NVPTX::STV_i64_v2_areg;
1547          break;
1548        case MVT::f32:
1549          Opcode = NVPTX::STV_f32_v2_areg;
1550          break;
1551        case MVT::f64:
1552          Opcode = NVPTX::STV_f64_v2_areg;
1553          break;
1554        }
1555        break;
1556      case NVPTXISD::StoreV4:
1557        switch (EltVT.getSimpleVT().SimpleTy) {
1558        default:
1559          return NULL;
1560        case MVT::i8:
1561          Opcode = NVPTX::STV_i8_v4_areg;
1562          break;
1563        case MVT::i16:
1564          Opcode = NVPTX::STV_i16_v4_areg;
1565          break;
1566        case MVT::i32:
1567          Opcode = NVPTX::STV_i32_v4_areg;
1568          break;
1569        case MVT::f32:
1570          Opcode = NVPTX::STV_f32_v4_areg;
1571          break;
1572        }
1573        break;
1574      }
1575    }
1576    StOps.push_back(N2);
1577  }
1578
1579  StOps.push_back(Chain);
1580
1581  ST = CurDAG->getMachineNode(Opcode, DL, MVT::Other, StOps);
1582
1583  MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
1584  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
1585  cast<MachineSDNode>(ST)->setMemRefs(MemRefs0, MemRefs0 + 1);
1586
1587  return ST;
1588}
1589
1590// SelectDirectAddr - Match a direct address for DAG.
1591// A direct address could be a globaladdress or externalsymbol.
1592bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
1593  // Return true if TGA or ES.
1594  if (N.getOpcode() == ISD::TargetGlobalAddress ||
1595      N.getOpcode() == ISD::TargetExternalSymbol) {
1596    Address = N;
1597    return true;
1598  }
1599  if (N.getOpcode() == NVPTXISD::Wrapper) {
1600    Address = N.getOperand(0);
1601    return true;
1602  }
1603  if (N.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
1604    unsigned IID = cast<ConstantSDNode>(N.getOperand(0))->getZExtValue();
1605    if (IID == Intrinsic::nvvm_ptr_gen_to_param)
1606      if (N.getOperand(1).getOpcode() == NVPTXISD::MoveParam)
1607        return (SelectDirectAddr(N.getOperand(1).getOperand(0), Address));
1608  }
1609  return false;
1610}
1611
1612// symbol+offset
1613bool NVPTXDAGToDAGISel::SelectADDRsi_imp(
1614    SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
1615  if (Addr.getOpcode() == ISD::ADD) {
1616    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
1617      SDValue base = Addr.getOperand(0);
1618      if (SelectDirectAddr(base, Base)) {
1619        Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt);
1620        return true;
1621      }
1622    }
1623  }
1624  return false;
1625}
1626
1627// symbol+offset
1628bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr,
1629                                     SDValue &Base, SDValue &Offset) {
1630  return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i32);
1631}
1632
1633// symbol+offset
1634bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr,
1635                                       SDValue &Base, SDValue &Offset) {
1636  return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i64);
1637}
1638
1639// register+offset
1640bool NVPTXDAGToDAGISel::SelectADDRri_imp(
1641    SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
1642  if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
1643    Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
1644    Offset = CurDAG->getTargetConstant(0, mvt);
1645    return true;
1646  }
1647  if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
1648      Addr.getOpcode() == ISD::TargetGlobalAddress)
1649    return false; // direct calls.
1650
1651  if (Addr.getOpcode() == ISD::ADD) {
1652    if (SelectDirectAddr(Addr.getOperand(0), Addr)) {
1653      return false;
1654    }
1655    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
1656      if (FrameIndexSDNode *FIN =
1657              dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
1658        // Constant offset from frame ref.
1659        Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
1660      else
1661        Base = Addr.getOperand(0);
1662      Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt);
1663      return true;
1664    }
1665  }
1666  return false;
1667}
1668
1669// register+offset
1670bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr,
1671                                     SDValue &Base, SDValue &Offset) {
1672  return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32);
1673}
1674
1675// register+offset
1676bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr,
1677                                       SDValue &Base, SDValue &Offset) {
1678  return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64);
1679}
1680
1681bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
1682                                                 unsigned int spN) const {
1683  const Value *Src = NULL;
1684  // Even though MemIntrinsicSDNode is a subclas of MemSDNode,
1685  // the classof() for MemSDNode does not include MemIntrinsicSDNode
1686  // (See SelectionDAGNodes.h). So we need to check for both.
1687  if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) {
1688    Src = mN->getSrcValue();
1689  } else if (MemSDNode *mN = dyn_cast<MemIntrinsicSDNode>(N)) {
1690    Src = mN->getSrcValue();
1691  }
1692  if (!Src)
1693    return false;
1694  if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
1695    return (PT->getAddressSpace() == spN);
1696  return false;
1697}
1698
1699/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
1700/// inline asm expressions.
1701bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(
1702    const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps) {
1703  SDValue Op0, Op1;
1704  switch (ConstraintCode) {
1705  default:
1706    return true;
1707  case 'm': // memory
1708    if (SelectDirectAddr(Op, Op0)) {
1709      OutOps.push_back(Op0);
1710      OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32));
1711      return false;
1712    }
1713    if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) {
1714      OutOps.push_back(Op0);
1715      OutOps.push_back(Op1);
1716      return false;
1717    }
1718    break;
1719  }
1720  return true;
1721}
1722
1723// Return true if N is a undef or a constant.
1724// If N was undef, return a (i8imm 0) in Retval
1725// If N was imm, convert it to i8imm and return in Retval
1726// Note: The convert to i8imm is required, otherwise the
1727// pattern matcher inserts a bunch of IMOVi8rr to convert
1728// the imm to i8imm, and this causes instruction selection
1729// to fail.
1730bool NVPTXDAGToDAGISel::UndefOrImm(SDValue Op, SDValue N, SDValue &Retval) {
1731  if (!(N.getOpcode() == ISD::UNDEF) && !(N.getOpcode() == ISD::Constant))
1732    return false;
1733
1734  if (N.getOpcode() == ISD::UNDEF)
1735    Retval = CurDAG->getTargetConstant(0, MVT::i8);
1736  else {
1737    ConstantSDNode *cn = cast<ConstantSDNode>(N.getNode());
1738    unsigned retval = cn->getZExtValue();
1739    Retval = CurDAG->getTargetConstant(retval, MVT::i8);
1740  }
1741  return true;
1742}
1743