AMDGPUISelLowering.cpp revision 263508
1//===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions -----===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief This is the parent TargetLowering class for hardware code gen
12/// targets.
13//
14//===----------------------------------------------------------------------===//
15
16#include "AMDGPUISelLowering.h"
17#include "AMDGPU.h"
18#include "AMDGPUFrameLowering.h"
19#include "AMDGPURegisterInfo.h"
20#include "AMDGPUSubtarget.h"
21#include "AMDILIntrinsicInfo.h"
22#include "R600MachineFunctionInfo.h"
23#include "SIMachineFunctionInfo.h"
24#include "llvm/CodeGen/CallingConvLower.h"
25#include "llvm/CodeGen/MachineFunction.h"
26#include "llvm/CodeGen/MachineRegisterInfo.h"
27#include "llvm/CodeGen/SelectionDAG.h"
28#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
29#include "llvm/IR/DataLayout.h"
30
31using namespace llvm;
32static bool allocateStack(unsigned ValNo, MVT ValVT, MVT LocVT,
33                      CCValAssign::LocInfo LocInfo,
34                      ISD::ArgFlagsTy ArgFlags, CCState &State) {
35  unsigned Offset = State.AllocateStack(ValVT.getSizeInBits() / 8, ArgFlags.getOrigAlign());
36    State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
37
38  return true;
39}
40
41#include "AMDGPUGenCallingConv.inc"
42
43AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
44  TargetLowering(TM, new TargetLoweringObjectFileELF()) {
45
46  // Initialize target lowering borrowed from AMDIL
47  InitAMDILLowering();
48
49  // We need to custom lower some of the intrinsics
50  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
51
52  // Library functions.  These default to Expand, but we have instructions
53  // for them.
54  setOperationAction(ISD::FCEIL,  MVT::f32, Legal);
55  setOperationAction(ISD::FEXP2,  MVT::f32, Legal);
56  setOperationAction(ISD::FPOW,   MVT::f32, Legal);
57  setOperationAction(ISD::FLOG2,  MVT::f32, Legal);
58  setOperationAction(ISD::FABS,   MVT::f32, Legal);
59  setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
60  setOperationAction(ISD::FRINT,  MVT::f32, Legal);
61  setOperationAction(ISD::FROUND, MVT::f32, Legal);
62
63  // The hardware supports ROTR, but not ROTL
64  setOperationAction(ISD::ROTL, MVT::i32, Expand);
65
66  // Lower floating point store/load to integer store/load to reduce the number
67  // of patterns in tablegen.
68  setOperationAction(ISD::STORE, MVT::f32, Promote);
69  AddPromotedToType(ISD::STORE, MVT::f32, MVT::i32);
70
71  setOperationAction(ISD::STORE, MVT::v2f32, Promote);
72  AddPromotedToType(ISD::STORE, MVT::v2f32, MVT::v2i32);
73
74  setOperationAction(ISD::STORE, MVT::v4f32, Promote);
75  AddPromotedToType(ISD::STORE, MVT::v4f32, MVT::v4i32);
76
77  setOperationAction(ISD::STORE, MVT::v8f32, Promote);
78  AddPromotedToType(ISD::STORE, MVT::v8f32, MVT::v8i32);
79
80  setOperationAction(ISD::STORE, MVT::v16f32, Promote);
81  AddPromotedToType(ISD::STORE, MVT::v16f32, MVT::v16i32);
82
83  setOperationAction(ISD::STORE, MVT::f64, Promote);
84  AddPromotedToType(ISD::STORE, MVT::f64, MVT::i64);
85
86  // Custom lowering of vector stores is required for local address space
87  // stores.
88  setOperationAction(ISD::STORE, MVT::v4i32, Custom);
89  // XXX: Native v2i32 local address space stores are possible, but not
90  // currently implemented.
91  setOperationAction(ISD::STORE, MVT::v2i32, Custom);
92
93  setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom);
94  setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom);
95  setTruncStoreAction(MVT::v4i32, MVT::v4i8, Custom);
96  // XXX: This can be change to Custom, once ExpandVectorStores can
97  // handle 64-bit stores.
98  setTruncStoreAction(MVT::v4i32, MVT::v4i16, Expand);
99
100  setOperationAction(ISD::LOAD, MVT::f32, Promote);
101  AddPromotedToType(ISD::LOAD, MVT::f32, MVT::i32);
102
103  setOperationAction(ISD::LOAD, MVT::v2f32, Promote);
104  AddPromotedToType(ISD::LOAD, MVT::v2f32, MVT::v2i32);
105
106  setOperationAction(ISD::LOAD, MVT::v4f32, Promote);
107  AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32);
108
109  setOperationAction(ISD::LOAD, MVT::v8f32, Promote);
110  AddPromotedToType(ISD::LOAD, MVT::v8f32, MVT::v8i32);
111
112  setOperationAction(ISD::LOAD, MVT::v16f32, Promote);
113  AddPromotedToType(ISD::LOAD, MVT::v16f32, MVT::v16i32);
114
115  setOperationAction(ISD::LOAD, MVT::f64, Promote);
116  AddPromotedToType(ISD::LOAD, MVT::f64, MVT::i64);
117
118  setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Custom);
119  setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f32, Custom);
120  setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i32, Custom);
121  setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f32, Custom);
122
123  setLoadExtAction(ISD::EXTLOAD, MVT::v2i8, Expand);
124  setLoadExtAction(ISD::SEXTLOAD, MVT::v2i8, Expand);
125  setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i8, Expand);
126  setLoadExtAction(ISD::EXTLOAD, MVT::v4i8, Expand);
127  setLoadExtAction(ISD::SEXTLOAD, MVT::v4i8, Expand);
128  setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i8, Expand);
129  setLoadExtAction(ISD::EXTLOAD, MVT::v2i16, Expand);
130  setLoadExtAction(ISD::SEXTLOAD, MVT::v2i16, Expand);
131  setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i16, Expand);
132  setLoadExtAction(ISD::EXTLOAD, MVT::v4i16, Expand);
133  setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, Expand);
134  setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i16, Expand);
135
136  setOperationAction(ISD::FNEG, MVT::v2f32, Expand);
137  setOperationAction(ISD::FNEG, MVT::v4f32, Expand);
138
139  setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
140
141  setOperationAction(ISD::MUL, MVT::i64, Expand);
142
143  setOperationAction(ISD::UDIV, MVT::i32, Expand);
144  setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
145  setOperationAction(ISD::UREM, MVT::i32, Expand);
146  setOperationAction(ISD::VSELECT, MVT::v2f32, Expand);
147  setOperationAction(ISD::VSELECT, MVT::v4f32, Expand);
148
149  static const MVT::SimpleValueType IntTypes[] = {
150    MVT::v2i32, MVT::v4i32
151  };
152  const size_t NumIntTypes = array_lengthof(IntTypes);
153
154  for (unsigned int x  = 0; x < NumIntTypes; ++x) {
155    MVT::SimpleValueType VT = IntTypes[x];
156    //Expand the following operations for the current type by default
157    setOperationAction(ISD::ADD,  VT, Expand);
158    setOperationAction(ISD::AND,  VT, Expand);
159    setOperationAction(ISD::FP_TO_SINT, VT, Expand);
160    setOperationAction(ISD::FP_TO_UINT, VT, Expand);
161    setOperationAction(ISD::MUL,  VT, Expand);
162    setOperationAction(ISD::OR,   VT, Expand);
163    setOperationAction(ISD::SHL,  VT, Expand);
164    setOperationAction(ISD::SINT_TO_FP, VT, Expand);
165    setOperationAction(ISD::SRL,  VT, Expand);
166    setOperationAction(ISD::SRA,  VT, Expand);
167    setOperationAction(ISD::SUB,  VT, Expand);
168    setOperationAction(ISD::UDIV, VT, Expand);
169    setOperationAction(ISD::UINT_TO_FP, VT, Expand);
170    setOperationAction(ISD::UREM, VT, Expand);
171    setOperationAction(ISD::VSELECT, VT, Expand);
172    setOperationAction(ISD::XOR,  VT, Expand);
173  }
174
175  static const MVT::SimpleValueType FloatTypes[] = {
176    MVT::v2f32, MVT::v4f32
177  };
178  const size_t NumFloatTypes = array_lengthof(FloatTypes);
179
180  for (unsigned int x = 0; x < NumFloatTypes; ++x) {
181    MVT::SimpleValueType VT = FloatTypes[x];
182    setOperationAction(ISD::FABS, VT, Expand);
183    setOperationAction(ISD::FADD, VT, Expand);
184    setOperationAction(ISD::FDIV, VT, Expand);
185    setOperationAction(ISD::FFLOOR, VT, Expand);
186    setOperationAction(ISD::FMUL, VT, Expand);
187    setOperationAction(ISD::FRINT, VT, Expand);
188    setOperationAction(ISD::FSQRT, VT, Expand);
189    setOperationAction(ISD::FSUB, VT, Expand);
190  }
191}
192
193//===----------------------------------------------------------------------===//
194// Target Information
195//===----------------------------------------------------------------------===//
196
197MVT AMDGPUTargetLowering::getVectorIdxTy() const {
198  return MVT::i32;
199}
200
201bool AMDGPUTargetLowering::isLoadBitCastBeneficial(EVT LoadTy,
202                                                   EVT CastTy) const {
203  if (LoadTy.getSizeInBits() != CastTy.getSizeInBits())
204    return true;
205
206  unsigned LScalarSize = LoadTy.getScalarType().getSizeInBits();
207  unsigned CastScalarSize = CastTy.getScalarType().getSizeInBits();
208
209  return ((LScalarSize <= CastScalarSize) ||
210          (CastScalarSize >= 32) ||
211          (LScalarSize < 32));
212}
213
214//===---------------------------------------------------------------------===//
215// Target Properties
216//===---------------------------------------------------------------------===//
217
218bool AMDGPUTargetLowering::isFAbsFree(EVT VT) const {
219  assert(VT.isFloatingPoint());
220  return VT == MVT::f32;
221}
222
223bool AMDGPUTargetLowering::isFNegFree(EVT VT) const {
224  assert(VT.isFloatingPoint());
225  return VT == MVT::f32;
226}
227
228//===---------------------------------------------------------------------===//
229// TargetLowering Callbacks
230//===---------------------------------------------------------------------===//
231
232void AMDGPUTargetLowering::AnalyzeFormalArguments(CCState &State,
233                             const SmallVectorImpl<ISD::InputArg> &Ins) const {
234
235  State.AnalyzeFormalArguments(Ins, CC_AMDGPU);
236}
237
238SDValue AMDGPUTargetLowering::LowerReturn(
239                                     SDValue Chain,
240                                     CallingConv::ID CallConv,
241                                     bool isVarArg,
242                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
243                                     const SmallVectorImpl<SDValue> &OutVals,
244                                     SDLoc DL, SelectionDAG &DAG) const {
245  return DAG.getNode(AMDGPUISD::RET_FLAG, DL, MVT::Other, Chain);
246}
247
248//===---------------------------------------------------------------------===//
249// Target specific lowering
250//===---------------------------------------------------------------------===//
251
252SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
253    const {
254  switch (Op.getOpcode()) {
255  default:
256    Op.getNode()->dump();
257    assert(0 && "Custom lowering code for this"
258        "instruction is not implemented yet!");
259    break;
260  // AMDIL DAG lowering
261  case ISD::SDIV: return LowerSDIV(Op, DAG);
262  case ISD::SREM: return LowerSREM(Op, DAG);
263  case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG);
264  case ISD::BRCOND: return LowerBRCOND(Op, DAG);
265  // AMDGPU DAG lowering
266  case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
267  case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG);
268  case ISD::FrameIndex: return LowerFrameIndex(Op, DAG);
269  case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
270  case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
271  case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
272  }
273  return Op;
274}
275
276SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
277                                                 SDValue Op,
278                                                 SelectionDAG &DAG) const {
279
280  const DataLayout *TD = getTargetMachine().getDataLayout();
281  GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Op);
282
283  assert(G->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS);
284  // XXX: What does the value of G->getOffset() mean?
285  assert(G->getOffset() == 0 &&
286         "Do not know what to do with an non-zero offset");
287
288  const GlobalValue *GV = G->getGlobal();
289
290  unsigned Offset;
291  if (MFI->LocalMemoryObjects.count(GV) == 0) {
292    uint64_t Size = TD->getTypeAllocSize(GV->getType()->getElementType());
293    Offset = MFI->LDSSize;
294    MFI->LocalMemoryObjects[GV] = Offset;
295    // XXX: Account for alignment?
296    MFI->LDSSize += Size;
297  } else {
298    Offset = MFI->LocalMemoryObjects[GV];
299  }
300
301  return DAG.getConstant(Offset, getPointerTy(G->getAddressSpace()));
302}
303
304void AMDGPUTargetLowering::ExtractVectorElements(SDValue Op, SelectionDAG &DAG,
305                                         SmallVectorImpl<SDValue> &Args,
306                                         unsigned Start,
307                                         unsigned Count) const {
308  EVT VT = Op.getValueType();
309  for (unsigned i = Start, e = Start + Count; i != e; ++i) {
310    Args.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op),
311                               VT.getVectorElementType(),
312                               Op, DAG.getConstant(i, MVT::i32)));
313  }
314}
315
316SDValue AMDGPUTargetLowering::LowerCONCAT_VECTORS(SDValue Op,
317                                                  SelectionDAG &DAG) const {
318  SmallVector<SDValue, 8> Args;
319  SDValue A = Op.getOperand(0);
320  SDValue B = Op.getOperand(1);
321
322  ExtractVectorElements(A, DAG, Args, 0,
323                        A.getValueType().getVectorNumElements());
324  ExtractVectorElements(B, DAG, Args, 0,
325                        B.getValueType().getVectorNumElements());
326
327  return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), Op.getValueType(),
328                     &Args[0], Args.size());
329}
330
331SDValue AMDGPUTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
332                                                     SelectionDAG &DAG) const {
333
334  SmallVector<SDValue, 8> Args;
335  EVT VT = Op.getValueType();
336  unsigned Start = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
337  ExtractVectorElements(Op.getOperand(0), DAG, Args, Start,
338                        VT.getVectorNumElements());
339
340  return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), Op.getValueType(),
341                     &Args[0], Args.size());
342}
343
344SDValue AMDGPUTargetLowering::LowerFrameIndex(SDValue Op,
345                                              SelectionDAG &DAG) const {
346
347  MachineFunction &MF = DAG.getMachineFunction();
348  const AMDGPUFrameLowering *TFL =
349   static_cast<const AMDGPUFrameLowering*>(getTargetMachine().getFrameLowering());
350
351  FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Op);
352  assert(FIN);
353
354  unsigned FrameIndex = FIN->getIndex();
355  unsigned Offset = TFL->getFrameIndexOffset(MF, FrameIndex);
356  return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF),
357                         Op.getValueType());
358}
359
360SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
361    SelectionDAG &DAG) const {
362  unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
363  SDLoc DL(Op);
364  EVT VT = Op.getValueType();
365
366  switch (IntrinsicID) {
367    default: return Op;
368    case AMDGPUIntrinsic::AMDIL_abs:
369      return LowerIntrinsicIABS(Op, DAG);
370    case AMDGPUIntrinsic::AMDIL_exp:
371      return DAG.getNode(ISD::FEXP2, DL, VT, Op.getOperand(1));
372    case AMDGPUIntrinsic::AMDGPU_lrp:
373      return LowerIntrinsicLRP(Op, DAG);
374    case AMDGPUIntrinsic::AMDIL_fraction:
375      return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
376    case AMDGPUIntrinsic::AMDIL_max:
377      return DAG.getNode(AMDGPUISD::FMAX, DL, VT, Op.getOperand(1),
378                                                  Op.getOperand(2));
379    case AMDGPUIntrinsic::AMDGPU_imax:
380      return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Op.getOperand(1),
381                                                  Op.getOperand(2));
382    case AMDGPUIntrinsic::AMDGPU_umax:
383      return DAG.getNode(AMDGPUISD::UMAX, DL, VT, Op.getOperand(1),
384                                                  Op.getOperand(2));
385    case AMDGPUIntrinsic::AMDIL_min:
386      return DAG.getNode(AMDGPUISD::FMIN, DL, VT, Op.getOperand(1),
387                                                  Op.getOperand(2));
388    case AMDGPUIntrinsic::AMDGPU_imin:
389      return DAG.getNode(AMDGPUISD::SMIN, DL, VT, Op.getOperand(1),
390                                                  Op.getOperand(2));
391    case AMDGPUIntrinsic::AMDGPU_umin:
392      return DAG.getNode(AMDGPUISD::UMIN, DL, VT, Op.getOperand(1),
393                                                  Op.getOperand(2));
394    case AMDGPUIntrinsic::AMDIL_round_nearest:
395      return DAG.getNode(ISD::FRINT, DL, VT, Op.getOperand(1));
396  }
397}
398
399///IABS(a) = SMAX(sub(0, a), a)
400SDValue AMDGPUTargetLowering::LowerIntrinsicIABS(SDValue Op,
401    SelectionDAG &DAG) const {
402
403  SDLoc DL(Op);
404  EVT VT = Op.getValueType();
405  SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
406                                              Op.getOperand(1));
407
408  return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Neg, Op.getOperand(1));
409}
410
411/// Linear Interpolation
412/// LRP(a, b, c) = muladd(a,  b, (1 - a) * c)
413SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op,
414    SelectionDAG &DAG) const {
415  SDLoc DL(Op);
416  EVT VT = Op.getValueType();
417  SDValue OneSubA = DAG.getNode(ISD::FSUB, DL, VT,
418                                DAG.getConstantFP(1.0f, MVT::f32),
419                                Op.getOperand(1));
420  SDValue OneSubAC = DAG.getNode(ISD::FMUL, DL, VT, OneSubA,
421                                                    Op.getOperand(3));
422  return DAG.getNode(ISD::FADD, DL, VT,
423      DAG.getNode(ISD::FMUL, DL, VT, Op.getOperand(1), Op.getOperand(2)),
424      OneSubAC);
425}
426
427/// \brief Generate Min/Max node
428SDValue AMDGPUTargetLowering::LowerMinMax(SDValue Op,
429    SelectionDAG &DAG) const {
430  SDLoc DL(Op);
431  EVT VT = Op.getValueType();
432
433  SDValue LHS = Op.getOperand(0);
434  SDValue RHS = Op.getOperand(1);
435  SDValue True = Op.getOperand(2);
436  SDValue False = Op.getOperand(3);
437  SDValue CC = Op.getOperand(4);
438
439  if (VT != MVT::f32 ||
440      !((LHS == True && RHS == False) || (LHS == False && RHS == True))) {
441    return SDValue();
442  }
443
444  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
445  switch (CCOpcode) {
446  case ISD::SETOEQ:
447  case ISD::SETONE:
448  case ISD::SETUNE:
449  case ISD::SETNE:
450  case ISD::SETUEQ:
451  case ISD::SETEQ:
452  case ISD::SETFALSE:
453  case ISD::SETFALSE2:
454  case ISD::SETTRUE:
455  case ISD::SETTRUE2:
456  case ISD::SETUO:
457  case ISD::SETO:
458    assert(0 && "Operation should already be optimised !");
459  case ISD::SETULE:
460  case ISD::SETULT:
461  case ISD::SETOLE:
462  case ISD::SETOLT:
463  case ISD::SETLE:
464  case ISD::SETLT: {
465    if (LHS == True)
466      return DAG.getNode(AMDGPUISD::FMIN, DL, VT, LHS, RHS);
467    else
468      return DAG.getNode(AMDGPUISD::FMAX, DL, VT, LHS, RHS);
469  }
470  case ISD::SETGT:
471  case ISD::SETGE:
472  case ISD::SETUGE:
473  case ISD::SETOGE:
474  case ISD::SETUGT:
475  case ISD::SETOGT: {
476    if (LHS == True)
477      return DAG.getNode(AMDGPUISD::FMAX, DL, VT, LHS, RHS);
478    else
479      return DAG.getNode(AMDGPUISD::FMIN, DL, VT, LHS, RHS);
480  }
481  case ISD::SETCC_INVALID:
482    assert(0 && "Invalid setcc condcode !");
483  }
484  return Op;
485}
486
487SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue &Op,
488                                              SelectionDAG &DAG) const {
489  LoadSDNode *Load = dyn_cast<LoadSDNode>(Op);
490  EVT MemEltVT = Load->getMemoryVT().getVectorElementType();
491  EVT EltVT = Op.getValueType().getVectorElementType();
492  EVT PtrVT = Load->getBasePtr().getValueType();
493  unsigned NumElts = Load->getMemoryVT().getVectorNumElements();
494  SmallVector<SDValue, 8> Loads;
495  SDLoc SL(Op);
496
497  for (unsigned i = 0, e = NumElts; i != e; ++i) {
498    SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT, Load->getBasePtr(),
499                    DAG.getConstant(i * (MemEltVT.getSizeInBits() / 8), PtrVT));
500    Loads.push_back(DAG.getExtLoad(Load->getExtensionType(), SL, EltVT,
501                        Load->getChain(), Ptr,
502                        MachinePointerInfo(Load->getMemOperand()->getValue()),
503                        MemEltVT, Load->isVolatile(), Load->isNonTemporal(),
504                        Load->getAlignment()));
505  }
506  return DAG.getNode(ISD::BUILD_VECTOR, SL, Op.getValueType(), &Loads[0],
507                     Loads.size());
508}
509
510SDValue AMDGPUTargetLowering::MergeVectorStore(const SDValue &Op,
511                                               SelectionDAG &DAG) const {
512  StoreSDNode *Store = dyn_cast<StoreSDNode>(Op);
513  EVT MemVT = Store->getMemoryVT();
514  unsigned MemBits = MemVT.getSizeInBits();
515
516  // Byte stores are really expensive, so if possible, try to pack
517  // 32-bit vector truncatating store into an i32 store.
518  // XXX: We could also handle optimize other vector bitwidths
519  if (!MemVT.isVector() || MemBits > 32) {
520    return SDValue();
521  }
522
523  SDLoc DL(Op);
524  const SDValue &Value = Store->getValue();
525  EVT VT = Value.getValueType();
526  const SDValue &Ptr = Store->getBasePtr();
527  EVT MemEltVT = MemVT.getVectorElementType();
528  unsigned MemEltBits = MemEltVT.getSizeInBits();
529  unsigned MemNumElements = MemVT.getVectorNumElements();
530  EVT PackedVT = EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
531  SDValue Mask;
532  switch(MemEltBits) {
533  case 8:
534    Mask = DAG.getConstant(0xFF, PackedVT);
535    break;
536  case 16:
537    Mask = DAG.getConstant(0xFFFF, PackedVT);
538    break;
539  default:
540    llvm_unreachable("Cannot lower this vector store");
541  }
542  SDValue PackedValue;
543  for (unsigned i = 0; i < MemNumElements; ++i) {
544    EVT ElemVT = VT.getVectorElementType();
545    SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT, Value,
546                              DAG.getConstant(i, MVT::i32));
547    Elt = DAG.getZExtOrTrunc(Elt, DL, PackedVT);
548    Elt = DAG.getNode(ISD::AND, DL, PackedVT, Elt, Mask);
549    SDValue Shift = DAG.getConstant(MemEltBits * i, PackedVT);
550    Elt = DAG.getNode(ISD::SHL, DL, PackedVT, Elt, Shift);
551    if (i == 0) {
552      PackedValue = Elt;
553    } else {
554      PackedValue = DAG.getNode(ISD::OR, DL, PackedVT, PackedValue, Elt);
555    }
556  }
557  return DAG.getStore(Store->getChain(), DL, PackedValue, Ptr,
558                      MachinePointerInfo(Store->getMemOperand()->getValue()),
559                      Store->isVolatile(),  Store->isNonTemporal(),
560                      Store->getAlignment());
561}
562
563SDValue AMDGPUTargetLowering::SplitVectorStore(SDValue Op,
564                                            SelectionDAG &DAG) const {
565  StoreSDNode *Store = cast<StoreSDNode>(Op);
566  EVT MemEltVT = Store->getMemoryVT().getVectorElementType();
567  EVT EltVT = Store->getValue().getValueType().getVectorElementType();
568  EVT PtrVT = Store->getBasePtr().getValueType();
569  unsigned NumElts = Store->getMemoryVT().getVectorNumElements();
570  SDLoc SL(Op);
571
572  SmallVector<SDValue, 8> Chains;
573
574  for (unsigned i = 0, e = NumElts; i != e; ++i) {
575    SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT,
576                              Store->getValue(), DAG.getConstant(i, MVT::i32));
577    SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT,
578                              Store->getBasePtr(),
579                            DAG.getConstant(i * (MemEltVT.getSizeInBits() / 8),
580                                            PtrVT));
581    Chains.push_back(DAG.getTruncStore(Store->getChain(), SL, Val, Ptr,
582                         MachinePointerInfo(Store->getMemOperand()->getValue()),
583                         MemEltVT, Store->isVolatile(), Store->isNonTemporal(),
584                         Store->getAlignment()));
585  }
586  return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, &Chains[0], NumElts);
587}
588
589SDValue AMDGPUTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
590  SDValue Result = AMDGPUTargetLowering::MergeVectorStore(Op, DAG);
591  if (Result.getNode()) {
592    return Result;
593  }
594
595  StoreSDNode *Store = cast<StoreSDNode>(Op);
596  if ((Store->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
597       Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) &&
598      Store->getValue().getValueType().isVector()) {
599    return SplitVectorStore(Op, DAG);
600  }
601  return SDValue();
602}
603
604SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
605    SelectionDAG &DAG) const {
606  SDLoc DL(Op);
607  EVT VT = Op.getValueType();
608
609  SDValue Num = Op.getOperand(0);
610  SDValue Den = Op.getOperand(1);
611
612  SmallVector<SDValue, 8> Results;
613
614  // RCP =  URECIP(Den) = 2^32 / Den + e
615  // e is rounding error.
616  SDValue RCP = DAG.getNode(AMDGPUISD::URECIP, DL, VT, Den);
617
618  // RCP_LO = umulo(RCP, Den) */
619  SDValue RCP_LO = DAG.getNode(ISD::UMULO, DL, VT, RCP, Den);
620
621  // RCP_HI = mulhu (RCP, Den) */
622  SDValue RCP_HI = DAG.getNode(ISD::MULHU, DL, VT, RCP, Den);
623
624  // NEG_RCP_LO = -RCP_LO
625  SDValue NEG_RCP_LO = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
626                                                     RCP_LO);
627
628  // ABS_RCP_LO = (RCP_HI == 0 ? NEG_RCP_LO : RCP_LO)
629  SDValue ABS_RCP_LO = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
630                                           NEG_RCP_LO, RCP_LO,
631                                           ISD::SETEQ);
632  // Calculate the rounding error from the URECIP instruction
633  // E = mulhu(ABS_RCP_LO, RCP)
634  SDValue E = DAG.getNode(ISD::MULHU, DL, VT, ABS_RCP_LO, RCP);
635
636  // RCP_A_E = RCP + E
637  SDValue RCP_A_E = DAG.getNode(ISD::ADD, DL, VT, RCP, E);
638
639  // RCP_S_E = RCP - E
640  SDValue RCP_S_E = DAG.getNode(ISD::SUB, DL, VT, RCP, E);
641
642  // Tmp0 = (RCP_HI == 0 ? RCP_A_E : RCP_SUB_E)
643  SDValue Tmp0 = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
644                                     RCP_A_E, RCP_S_E,
645                                     ISD::SETEQ);
646  // Quotient = mulhu(Tmp0, Num)
647  SDValue Quotient = DAG.getNode(ISD::MULHU, DL, VT, Tmp0, Num);
648
649  // Num_S_Remainder = Quotient * Den
650  SDValue Num_S_Remainder = DAG.getNode(ISD::UMULO, DL, VT, Quotient, Den);
651
652  // Remainder = Num - Num_S_Remainder
653  SDValue Remainder = DAG.getNode(ISD::SUB, DL, VT, Num, Num_S_Remainder);
654
655  // Remainder_GE_Den = (Remainder >= Den ? -1 : 0)
656  SDValue Remainder_GE_Den = DAG.getSelectCC(DL, Remainder, Den,
657                                                 DAG.getConstant(-1, VT),
658                                                 DAG.getConstant(0, VT),
659                                                 ISD::SETUGE);
660  // Remainder_GE_Zero = (Num >= Num_S_Remainder ? -1 : 0)
661  SDValue Remainder_GE_Zero = DAG.getSelectCC(DL, Num,
662                                                  Num_S_Remainder,
663                                                  DAG.getConstant(-1, VT),
664                                                  DAG.getConstant(0, VT),
665                                                  ISD::SETUGE);
666  // Tmp1 = Remainder_GE_Den & Remainder_GE_Zero
667  SDValue Tmp1 = DAG.getNode(ISD::AND, DL, VT, Remainder_GE_Den,
668                                               Remainder_GE_Zero);
669
670  // Calculate Division result:
671
672  // Quotient_A_One = Quotient + 1
673  SDValue Quotient_A_One = DAG.getNode(ISD::ADD, DL, VT, Quotient,
674                                                         DAG.getConstant(1, VT));
675
676  // Quotient_S_One = Quotient - 1
677  SDValue Quotient_S_One = DAG.getNode(ISD::SUB, DL, VT, Quotient,
678                                                         DAG.getConstant(1, VT));
679
680  // Div = (Tmp1 == 0 ? Quotient : Quotient_A_One)
681  SDValue Div = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
682                                     Quotient, Quotient_A_One, ISD::SETEQ);
683
684  // Div = (Remainder_GE_Zero == 0 ? Quotient_S_One : Div)
685  Div = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
686                            Quotient_S_One, Div, ISD::SETEQ);
687
688  // Calculate Rem result:
689
690  // Remainder_S_Den = Remainder - Den
691  SDValue Remainder_S_Den = DAG.getNode(ISD::SUB, DL, VT, Remainder, Den);
692
693  // Remainder_A_Den = Remainder + Den
694  SDValue Remainder_A_Den = DAG.getNode(ISD::ADD, DL, VT, Remainder, Den);
695
696  // Rem = (Tmp1 == 0 ? Remainder : Remainder_S_Den)
697  SDValue Rem = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
698                                    Remainder, Remainder_S_Den, ISD::SETEQ);
699
700  // Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem)
701  Rem = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
702                            Remainder_A_Den, Rem, ISD::SETEQ);
703  SDValue Ops[2];
704  Ops[0] = Div;
705  Ops[1] = Rem;
706  return DAG.getMergeValues(Ops, 2, DL);
707}
708
709SDValue AMDGPUTargetLowering::LowerUINT_TO_FP(SDValue Op,
710                                               SelectionDAG &DAG) const {
711  SDValue S0 = Op.getOperand(0);
712  SDLoc DL(Op);
713  if (Op.getValueType() != MVT::f32 || S0.getValueType() != MVT::i64)
714    return SDValue();
715
716  // f32 uint_to_fp i64
717  SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, S0,
718                           DAG.getConstant(0, MVT::i32));
719  SDValue FloatLo = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, Lo);
720  SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, S0,
721                           DAG.getConstant(1, MVT::i32));
722  SDValue FloatHi = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, Hi);
723  FloatHi = DAG.getNode(ISD::FMUL, DL, MVT::f32, FloatHi,
724                        DAG.getConstantFP(4294967296.0f, MVT::f32)); // 2^32
725  return DAG.getNode(ISD::FADD, DL, MVT::f32, FloatLo, FloatHi);
726
727}
728
729//===----------------------------------------------------------------------===//
730// Helper functions
731//===----------------------------------------------------------------------===//
732
733void AMDGPUTargetLowering::getOriginalFunctionArgs(
734                               SelectionDAG &DAG,
735                               const Function *F,
736                               const SmallVectorImpl<ISD::InputArg> &Ins,
737                               SmallVectorImpl<ISD::InputArg> &OrigIns) const {
738
739  for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
740    if (Ins[i].ArgVT == Ins[i].VT) {
741      OrigIns.push_back(Ins[i]);
742      continue;
743    }
744
745    EVT VT;
746    if (Ins[i].ArgVT.isVector() && !Ins[i].VT.isVector()) {
747      // Vector has been split into scalars.
748      VT = Ins[i].ArgVT.getVectorElementType();
749    } else if (Ins[i].VT.isVector() && Ins[i].ArgVT.isVector() &&
750               Ins[i].ArgVT.getVectorElementType() !=
751               Ins[i].VT.getVectorElementType()) {
752      // Vector elements have been promoted
753      VT = Ins[i].ArgVT;
754    } else {
755      // Vector has been spilt into smaller vectors.
756      VT = Ins[i].VT;
757    }
758
759    ISD::InputArg Arg(Ins[i].Flags, VT, VT, Ins[i].Used,
760                      Ins[i].OrigArgIndex, Ins[i].PartOffset);
761    OrigIns.push_back(Arg);
762  }
763}
764
765bool AMDGPUTargetLowering::isHWTrueValue(SDValue Op) const {
766  if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
767    return CFP->isExactlyValue(1.0);
768  }
769  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
770    return C->isAllOnesValue();
771  }
772  return false;
773}
774
775bool AMDGPUTargetLowering::isHWFalseValue(SDValue Op) const {
776  if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
777    return CFP->getValueAPF().isZero();
778  }
779  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
780    return C->isNullValue();
781  }
782  return false;
783}
784
785SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
786                                                  const TargetRegisterClass *RC,
787                                                   unsigned Reg, EVT VT) const {
788  MachineFunction &MF = DAG.getMachineFunction();
789  MachineRegisterInfo &MRI = MF.getRegInfo();
790  unsigned VirtualRegister;
791  if (!MRI.isLiveIn(Reg)) {
792    VirtualRegister = MRI.createVirtualRegister(RC);
793    MRI.addLiveIn(Reg, VirtualRegister);
794  } else {
795    VirtualRegister = MRI.getLiveInVirtReg(Reg);
796  }
797  return DAG.getRegister(VirtualRegister, VT);
798}
799
800#define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node;
801
802const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
803  switch (Opcode) {
804  default: return 0;
805  // AMDIL DAG nodes
806  NODE_NAME_CASE(CALL);
807  NODE_NAME_CASE(UMUL);
808  NODE_NAME_CASE(DIV_INF);
809  NODE_NAME_CASE(RET_FLAG);
810  NODE_NAME_CASE(BRANCH_COND);
811
812  // AMDGPU DAG nodes
813  NODE_NAME_CASE(DWORDADDR)
814  NODE_NAME_CASE(FRACT)
815  NODE_NAME_CASE(FMAX)
816  NODE_NAME_CASE(SMAX)
817  NODE_NAME_CASE(UMAX)
818  NODE_NAME_CASE(FMIN)
819  NODE_NAME_CASE(SMIN)
820  NODE_NAME_CASE(UMIN)
821  NODE_NAME_CASE(URECIP)
822  NODE_NAME_CASE(EXPORT)
823  NODE_NAME_CASE(CONST_ADDRESS)
824  NODE_NAME_CASE(REGISTER_LOAD)
825  NODE_NAME_CASE(REGISTER_STORE)
826  NODE_NAME_CASE(LOAD_CONSTANT)
827  NODE_NAME_CASE(LOAD_INPUT)
828  NODE_NAME_CASE(SAMPLE)
829  NODE_NAME_CASE(SAMPLEB)
830  NODE_NAME_CASE(SAMPLED)
831  NODE_NAME_CASE(SAMPLEL)
832  NODE_NAME_CASE(STORE_MSKOR)
833  NODE_NAME_CASE(TBUFFER_STORE_FORMAT)
834  }
835}
836