AMDGPUISelLowering.cpp revision 263508
1//===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions -----===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10/// \file 11/// \brief This is the parent TargetLowering class for hardware code gen 12/// targets. 13// 14//===----------------------------------------------------------------------===// 15 16#include "AMDGPUISelLowering.h" 17#include "AMDGPU.h" 18#include "AMDGPUFrameLowering.h" 19#include "AMDGPURegisterInfo.h" 20#include "AMDGPUSubtarget.h" 21#include "AMDILIntrinsicInfo.h" 22#include "R600MachineFunctionInfo.h" 23#include "SIMachineFunctionInfo.h" 24#include "llvm/CodeGen/CallingConvLower.h" 25#include "llvm/CodeGen/MachineFunction.h" 26#include "llvm/CodeGen/MachineRegisterInfo.h" 27#include "llvm/CodeGen/SelectionDAG.h" 28#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 29#include "llvm/IR/DataLayout.h" 30 31using namespace llvm; 32static bool allocateStack(unsigned ValNo, MVT ValVT, MVT LocVT, 33 CCValAssign::LocInfo LocInfo, 34 ISD::ArgFlagsTy ArgFlags, CCState &State) { 35 unsigned Offset = State.AllocateStack(ValVT.getSizeInBits() / 8, ArgFlags.getOrigAlign()); 36 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); 37 38 return true; 39} 40 41#include "AMDGPUGenCallingConv.inc" 42 43AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : 44 TargetLowering(TM, new TargetLoweringObjectFileELF()) { 45 46 // Initialize target lowering borrowed from AMDIL 47 InitAMDILLowering(); 48 49 // We need to custom lower some of the intrinsics 50 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 51 52 // Library functions. These default to Expand, but we have instructions 53 // for them. 54 setOperationAction(ISD::FCEIL, MVT::f32, Legal); 55 setOperationAction(ISD::FEXP2, MVT::f32, Legal); 56 setOperationAction(ISD::FPOW, MVT::f32, Legal); 57 setOperationAction(ISD::FLOG2, MVT::f32, Legal); 58 setOperationAction(ISD::FABS, MVT::f32, Legal); 59 setOperationAction(ISD::FFLOOR, MVT::f32, Legal); 60 setOperationAction(ISD::FRINT, MVT::f32, Legal); 61 setOperationAction(ISD::FROUND, MVT::f32, Legal); 62 63 // The hardware supports ROTR, but not ROTL 64 setOperationAction(ISD::ROTL, MVT::i32, Expand); 65 66 // Lower floating point store/load to integer store/load to reduce the number 67 // of patterns in tablegen. 68 setOperationAction(ISD::STORE, MVT::f32, Promote); 69 AddPromotedToType(ISD::STORE, MVT::f32, MVT::i32); 70 71 setOperationAction(ISD::STORE, MVT::v2f32, Promote); 72 AddPromotedToType(ISD::STORE, MVT::v2f32, MVT::v2i32); 73 74 setOperationAction(ISD::STORE, MVT::v4f32, Promote); 75 AddPromotedToType(ISD::STORE, MVT::v4f32, MVT::v4i32); 76 77 setOperationAction(ISD::STORE, MVT::v8f32, Promote); 78 AddPromotedToType(ISD::STORE, MVT::v8f32, MVT::v8i32); 79 80 setOperationAction(ISD::STORE, MVT::v16f32, Promote); 81 AddPromotedToType(ISD::STORE, MVT::v16f32, MVT::v16i32); 82 83 setOperationAction(ISD::STORE, MVT::f64, Promote); 84 AddPromotedToType(ISD::STORE, MVT::f64, MVT::i64); 85 86 // Custom lowering of vector stores is required for local address space 87 // stores. 88 setOperationAction(ISD::STORE, MVT::v4i32, Custom); 89 // XXX: Native v2i32 local address space stores are possible, but not 90 // currently implemented. 91 setOperationAction(ISD::STORE, MVT::v2i32, Custom); 92 93 setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom); 94 setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom); 95 setTruncStoreAction(MVT::v4i32, MVT::v4i8, Custom); 96 // XXX: This can be change to Custom, once ExpandVectorStores can 97 // handle 64-bit stores. 98 setTruncStoreAction(MVT::v4i32, MVT::v4i16, Expand); 99 100 setOperationAction(ISD::LOAD, MVT::f32, Promote); 101 AddPromotedToType(ISD::LOAD, MVT::f32, MVT::i32); 102 103 setOperationAction(ISD::LOAD, MVT::v2f32, Promote); 104 AddPromotedToType(ISD::LOAD, MVT::v2f32, MVT::v2i32); 105 106 setOperationAction(ISD::LOAD, MVT::v4f32, Promote); 107 AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32); 108 109 setOperationAction(ISD::LOAD, MVT::v8f32, Promote); 110 AddPromotedToType(ISD::LOAD, MVT::v8f32, MVT::v8i32); 111 112 setOperationAction(ISD::LOAD, MVT::v16f32, Promote); 113 AddPromotedToType(ISD::LOAD, MVT::v16f32, MVT::v16i32); 114 115 setOperationAction(ISD::LOAD, MVT::f64, Promote); 116 AddPromotedToType(ISD::LOAD, MVT::f64, MVT::i64); 117 118 setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Custom); 119 setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f32, Custom); 120 setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i32, Custom); 121 setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f32, Custom); 122 123 setLoadExtAction(ISD::EXTLOAD, MVT::v2i8, Expand); 124 setLoadExtAction(ISD::SEXTLOAD, MVT::v2i8, Expand); 125 setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i8, Expand); 126 setLoadExtAction(ISD::EXTLOAD, MVT::v4i8, Expand); 127 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i8, Expand); 128 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i8, Expand); 129 setLoadExtAction(ISD::EXTLOAD, MVT::v2i16, Expand); 130 setLoadExtAction(ISD::SEXTLOAD, MVT::v2i16, Expand); 131 setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i16, Expand); 132 setLoadExtAction(ISD::EXTLOAD, MVT::v4i16, Expand); 133 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, Expand); 134 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i16, Expand); 135 136 setOperationAction(ISD::FNEG, MVT::v2f32, Expand); 137 setOperationAction(ISD::FNEG, MVT::v4f32, Expand); 138 139 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); 140 141 setOperationAction(ISD::MUL, MVT::i64, Expand); 142 143 setOperationAction(ISD::UDIV, MVT::i32, Expand); 144 setOperationAction(ISD::UDIVREM, MVT::i32, Custom); 145 setOperationAction(ISD::UREM, MVT::i32, Expand); 146 setOperationAction(ISD::VSELECT, MVT::v2f32, Expand); 147 setOperationAction(ISD::VSELECT, MVT::v4f32, Expand); 148 149 static const MVT::SimpleValueType IntTypes[] = { 150 MVT::v2i32, MVT::v4i32 151 }; 152 const size_t NumIntTypes = array_lengthof(IntTypes); 153 154 for (unsigned int x = 0; x < NumIntTypes; ++x) { 155 MVT::SimpleValueType VT = IntTypes[x]; 156 //Expand the following operations for the current type by default 157 setOperationAction(ISD::ADD, VT, Expand); 158 setOperationAction(ISD::AND, VT, Expand); 159 setOperationAction(ISD::FP_TO_SINT, VT, Expand); 160 setOperationAction(ISD::FP_TO_UINT, VT, Expand); 161 setOperationAction(ISD::MUL, VT, Expand); 162 setOperationAction(ISD::OR, VT, Expand); 163 setOperationAction(ISD::SHL, VT, Expand); 164 setOperationAction(ISD::SINT_TO_FP, VT, Expand); 165 setOperationAction(ISD::SRL, VT, Expand); 166 setOperationAction(ISD::SRA, VT, Expand); 167 setOperationAction(ISD::SUB, VT, Expand); 168 setOperationAction(ISD::UDIV, VT, Expand); 169 setOperationAction(ISD::UINT_TO_FP, VT, Expand); 170 setOperationAction(ISD::UREM, VT, Expand); 171 setOperationAction(ISD::VSELECT, VT, Expand); 172 setOperationAction(ISD::XOR, VT, Expand); 173 } 174 175 static const MVT::SimpleValueType FloatTypes[] = { 176 MVT::v2f32, MVT::v4f32 177 }; 178 const size_t NumFloatTypes = array_lengthof(FloatTypes); 179 180 for (unsigned int x = 0; x < NumFloatTypes; ++x) { 181 MVT::SimpleValueType VT = FloatTypes[x]; 182 setOperationAction(ISD::FABS, VT, Expand); 183 setOperationAction(ISD::FADD, VT, Expand); 184 setOperationAction(ISD::FDIV, VT, Expand); 185 setOperationAction(ISD::FFLOOR, VT, Expand); 186 setOperationAction(ISD::FMUL, VT, Expand); 187 setOperationAction(ISD::FRINT, VT, Expand); 188 setOperationAction(ISD::FSQRT, VT, Expand); 189 setOperationAction(ISD::FSUB, VT, Expand); 190 } 191} 192 193//===----------------------------------------------------------------------===// 194// Target Information 195//===----------------------------------------------------------------------===// 196 197MVT AMDGPUTargetLowering::getVectorIdxTy() const { 198 return MVT::i32; 199} 200 201bool AMDGPUTargetLowering::isLoadBitCastBeneficial(EVT LoadTy, 202 EVT CastTy) const { 203 if (LoadTy.getSizeInBits() != CastTy.getSizeInBits()) 204 return true; 205 206 unsigned LScalarSize = LoadTy.getScalarType().getSizeInBits(); 207 unsigned CastScalarSize = CastTy.getScalarType().getSizeInBits(); 208 209 return ((LScalarSize <= CastScalarSize) || 210 (CastScalarSize >= 32) || 211 (LScalarSize < 32)); 212} 213 214//===---------------------------------------------------------------------===// 215// Target Properties 216//===---------------------------------------------------------------------===// 217 218bool AMDGPUTargetLowering::isFAbsFree(EVT VT) const { 219 assert(VT.isFloatingPoint()); 220 return VT == MVT::f32; 221} 222 223bool AMDGPUTargetLowering::isFNegFree(EVT VT) const { 224 assert(VT.isFloatingPoint()); 225 return VT == MVT::f32; 226} 227 228//===---------------------------------------------------------------------===// 229// TargetLowering Callbacks 230//===---------------------------------------------------------------------===// 231 232void AMDGPUTargetLowering::AnalyzeFormalArguments(CCState &State, 233 const SmallVectorImpl<ISD::InputArg> &Ins) const { 234 235 State.AnalyzeFormalArguments(Ins, CC_AMDGPU); 236} 237 238SDValue AMDGPUTargetLowering::LowerReturn( 239 SDValue Chain, 240 CallingConv::ID CallConv, 241 bool isVarArg, 242 const SmallVectorImpl<ISD::OutputArg> &Outs, 243 const SmallVectorImpl<SDValue> &OutVals, 244 SDLoc DL, SelectionDAG &DAG) const { 245 return DAG.getNode(AMDGPUISD::RET_FLAG, DL, MVT::Other, Chain); 246} 247 248//===---------------------------------------------------------------------===// 249// Target specific lowering 250//===---------------------------------------------------------------------===// 251 252SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) 253 const { 254 switch (Op.getOpcode()) { 255 default: 256 Op.getNode()->dump(); 257 assert(0 && "Custom lowering code for this" 258 "instruction is not implemented yet!"); 259 break; 260 // AMDIL DAG lowering 261 case ISD::SDIV: return LowerSDIV(Op, DAG); 262 case ISD::SREM: return LowerSREM(Op, DAG); 263 case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG); 264 case ISD::BRCOND: return LowerBRCOND(Op, DAG); 265 // AMDGPU DAG lowering 266 case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); 267 case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG); 268 case ISD::FrameIndex: return LowerFrameIndex(Op, DAG); 269 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); 270 case ISD::UDIVREM: return LowerUDIVREM(Op, DAG); 271 case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG); 272 } 273 return Op; 274} 275 276SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI, 277 SDValue Op, 278 SelectionDAG &DAG) const { 279 280 const DataLayout *TD = getTargetMachine().getDataLayout(); 281 GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Op); 282 283 assert(G->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS); 284 // XXX: What does the value of G->getOffset() mean? 285 assert(G->getOffset() == 0 && 286 "Do not know what to do with an non-zero offset"); 287 288 const GlobalValue *GV = G->getGlobal(); 289 290 unsigned Offset; 291 if (MFI->LocalMemoryObjects.count(GV) == 0) { 292 uint64_t Size = TD->getTypeAllocSize(GV->getType()->getElementType()); 293 Offset = MFI->LDSSize; 294 MFI->LocalMemoryObjects[GV] = Offset; 295 // XXX: Account for alignment? 296 MFI->LDSSize += Size; 297 } else { 298 Offset = MFI->LocalMemoryObjects[GV]; 299 } 300 301 return DAG.getConstant(Offset, getPointerTy(G->getAddressSpace())); 302} 303 304void AMDGPUTargetLowering::ExtractVectorElements(SDValue Op, SelectionDAG &DAG, 305 SmallVectorImpl<SDValue> &Args, 306 unsigned Start, 307 unsigned Count) const { 308 EVT VT = Op.getValueType(); 309 for (unsigned i = Start, e = Start + Count; i != e; ++i) { 310 Args.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op), 311 VT.getVectorElementType(), 312 Op, DAG.getConstant(i, MVT::i32))); 313 } 314} 315 316SDValue AMDGPUTargetLowering::LowerCONCAT_VECTORS(SDValue Op, 317 SelectionDAG &DAG) const { 318 SmallVector<SDValue, 8> Args; 319 SDValue A = Op.getOperand(0); 320 SDValue B = Op.getOperand(1); 321 322 ExtractVectorElements(A, DAG, Args, 0, 323 A.getValueType().getVectorNumElements()); 324 ExtractVectorElements(B, DAG, Args, 0, 325 B.getValueType().getVectorNumElements()); 326 327 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), Op.getValueType(), 328 &Args[0], Args.size()); 329} 330 331SDValue AMDGPUTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op, 332 SelectionDAG &DAG) const { 333 334 SmallVector<SDValue, 8> Args; 335 EVT VT = Op.getValueType(); 336 unsigned Start = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 337 ExtractVectorElements(Op.getOperand(0), DAG, Args, Start, 338 VT.getVectorNumElements()); 339 340 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), Op.getValueType(), 341 &Args[0], Args.size()); 342} 343 344SDValue AMDGPUTargetLowering::LowerFrameIndex(SDValue Op, 345 SelectionDAG &DAG) const { 346 347 MachineFunction &MF = DAG.getMachineFunction(); 348 const AMDGPUFrameLowering *TFL = 349 static_cast<const AMDGPUFrameLowering*>(getTargetMachine().getFrameLowering()); 350 351 FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Op); 352 assert(FIN); 353 354 unsigned FrameIndex = FIN->getIndex(); 355 unsigned Offset = TFL->getFrameIndexOffset(MF, FrameIndex); 356 return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), 357 Op.getValueType()); 358} 359 360SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, 361 SelectionDAG &DAG) const { 362 unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 363 SDLoc DL(Op); 364 EVT VT = Op.getValueType(); 365 366 switch (IntrinsicID) { 367 default: return Op; 368 case AMDGPUIntrinsic::AMDIL_abs: 369 return LowerIntrinsicIABS(Op, DAG); 370 case AMDGPUIntrinsic::AMDIL_exp: 371 return DAG.getNode(ISD::FEXP2, DL, VT, Op.getOperand(1)); 372 case AMDGPUIntrinsic::AMDGPU_lrp: 373 return LowerIntrinsicLRP(Op, DAG); 374 case AMDGPUIntrinsic::AMDIL_fraction: 375 return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1)); 376 case AMDGPUIntrinsic::AMDIL_max: 377 return DAG.getNode(AMDGPUISD::FMAX, DL, VT, Op.getOperand(1), 378 Op.getOperand(2)); 379 case AMDGPUIntrinsic::AMDGPU_imax: 380 return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Op.getOperand(1), 381 Op.getOperand(2)); 382 case AMDGPUIntrinsic::AMDGPU_umax: 383 return DAG.getNode(AMDGPUISD::UMAX, DL, VT, Op.getOperand(1), 384 Op.getOperand(2)); 385 case AMDGPUIntrinsic::AMDIL_min: 386 return DAG.getNode(AMDGPUISD::FMIN, DL, VT, Op.getOperand(1), 387 Op.getOperand(2)); 388 case AMDGPUIntrinsic::AMDGPU_imin: 389 return DAG.getNode(AMDGPUISD::SMIN, DL, VT, Op.getOperand(1), 390 Op.getOperand(2)); 391 case AMDGPUIntrinsic::AMDGPU_umin: 392 return DAG.getNode(AMDGPUISD::UMIN, DL, VT, Op.getOperand(1), 393 Op.getOperand(2)); 394 case AMDGPUIntrinsic::AMDIL_round_nearest: 395 return DAG.getNode(ISD::FRINT, DL, VT, Op.getOperand(1)); 396 } 397} 398 399///IABS(a) = SMAX(sub(0, a), a) 400SDValue AMDGPUTargetLowering::LowerIntrinsicIABS(SDValue Op, 401 SelectionDAG &DAG) const { 402 403 SDLoc DL(Op); 404 EVT VT = Op.getValueType(); 405 SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT), 406 Op.getOperand(1)); 407 408 return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Neg, Op.getOperand(1)); 409} 410 411/// Linear Interpolation 412/// LRP(a, b, c) = muladd(a, b, (1 - a) * c) 413SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op, 414 SelectionDAG &DAG) const { 415 SDLoc DL(Op); 416 EVT VT = Op.getValueType(); 417 SDValue OneSubA = DAG.getNode(ISD::FSUB, DL, VT, 418 DAG.getConstantFP(1.0f, MVT::f32), 419 Op.getOperand(1)); 420 SDValue OneSubAC = DAG.getNode(ISD::FMUL, DL, VT, OneSubA, 421 Op.getOperand(3)); 422 return DAG.getNode(ISD::FADD, DL, VT, 423 DAG.getNode(ISD::FMUL, DL, VT, Op.getOperand(1), Op.getOperand(2)), 424 OneSubAC); 425} 426 427/// \brief Generate Min/Max node 428SDValue AMDGPUTargetLowering::LowerMinMax(SDValue Op, 429 SelectionDAG &DAG) const { 430 SDLoc DL(Op); 431 EVT VT = Op.getValueType(); 432 433 SDValue LHS = Op.getOperand(0); 434 SDValue RHS = Op.getOperand(1); 435 SDValue True = Op.getOperand(2); 436 SDValue False = Op.getOperand(3); 437 SDValue CC = Op.getOperand(4); 438 439 if (VT != MVT::f32 || 440 !((LHS == True && RHS == False) || (LHS == False && RHS == True))) { 441 return SDValue(); 442 } 443 444 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get(); 445 switch (CCOpcode) { 446 case ISD::SETOEQ: 447 case ISD::SETONE: 448 case ISD::SETUNE: 449 case ISD::SETNE: 450 case ISD::SETUEQ: 451 case ISD::SETEQ: 452 case ISD::SETFALSE: 453 case ISD::SETFALSE2: 454 case ISD::SETTRUE: 455 case ISD::SETTRUE2: 456 case ISD::SETUO: 457 case ISD::SETO: 458 assert(0 && "Operation should already be optimised !"); 459 case ISD::SETULE: 460 case ISD::SETULT: 461 case ISD::SETOLE: 462 case ISD::SETOLT: 463 case ISD::SETLE: 464 case ISD::SETLT: { 465 if (LHS == True) 466 return DAG.getNode(AMDGPUISD::FMIN, DL, VT, LHS, RHS); 467 else 468 return DAG.getNode(AMDGPUISD::FMAX, DL, VT, LHS, RHS); 469 } 470 case ISD::SETGT: 471 case ISD::SETGE: 472 case ISD::SETUGE: 473 case ISD::SETOGE: 474 case ISD::SETUGT: 475 case ISD::SETOGT: { 476 if (LHS == True) 477 return DAG.getNode(AMDGPUISD::FMAX, DL, VT, LHS, RHS); 478 else 479 return DAG.getNode(AMDGPUISD::FMIN, DL, VT, LHS, RHS); 480 } 481 case ISD::SETCC_INVALID: 482 assert(0 && "Invalid setcc condcode !"); 483 } 484 return Op; 485} 486 487SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue &Op, 488 SelectionDAG &DAG) const { 489 LoadSDNode *Load = dyn_cast<LoadSDNode>(Op); 490 EVT MemEltVT = Load->getMemoryVT().getVectorElementType(); 491 EVT EltVT = Op.getValueType().getVectorElementType(); 492 EVT PtrVT = Load->getBasePtr().getValueType(); 493 unsigned NumElts = Load->getMemoryVT().getVectorNumElements(); 494 SmallVector<SDValue, 8> Loads; 495 SDLoc SL(Op); 496 497 for (unsigned i = 0, e = NumElts; i != e; ++i) { 498 SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT, Load->getBasePtr(), 499 DAG.getConstant(i * (MemEltVT.getSizeInBits() / 8), PtrVT)); 500 Loads.push_back(DAG.getExtLoad(Load->getExtensionType(), SL, EltVT, 501 Load->getChain(), Ptr, 502 MachinePointerInfo(Load->getMemOperand()->getValue()), 503 MemEltVT, Load->isVolatile(), Load->isNonTemporal(), 504 Load->getAlignment())); 505 } 506 return DAG.getNode(ISD::BUILD_VECTOR, SL, Op.getValueType(), &Loads[0], 507 Loads.size()); 508} 509 510SDValue AMDGPUTargetLowering::MergeVectorStore(const SDValue &Op, 511 SelectionDAG &DAG) const { 512 StoreSDNode *Store = dyn_cast<StoreSDNode>(Op); 513 EVT MemVT = Store->getMemoryVT(); 514 unsigned MemBits = MemVT.getSizeInBits(); 515 516 // Byte stores are really expensive, so if possible, try to pack 517 // 32-bit vector truncatating store into an i32 store. 518 // XXX: We could also handle optimize other vector bitwidths 519 if (!MemVT.isVector() || MemBits > 32) { 520 return SDValue(); 521 } 522 523 SDLoc DL(Op); 524 const SDValue &Value = Store->getValue(); 525 EVT VT = Value.getValueType(); 526 const SDValue &Ptr = Store->getBasePtr(); 527 EVT MemEltVT = MemVT.getVectorElementType(); 528 unsigned MemEltBits = MemEltVT.getSizeInBits(); 529 unsigned MemNumElements = MemVT.getVectorNumElements(); 530 EVT PackedVT = EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits()); 531 SDValue Mask; 532 switch(MemEltBits) { 533 case 8: 534 Mask = DAG.getConstant(0xFF, PackedVT); 535 break; 536 case 16: 537 Mask = DAG.getConstant(0xFFFF, PackedVT); 538 break; 539 default: 540 llvm_unreachable("Cannot lower this vector store"); 541 } 542 SDValue PackedValue; 543 for (unsigned i = 0; i < MemNumElements; ++i) { 544 EVT ElemVT = VT.getVectorElementType(); 545 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT, Value, 546 DAG.getConstant(i, MVT::i32)); 547 Elt = DAG.getZExtOrTrunc(Elt, DL, PackedVT); 548 Elt = DAG.getNode(ISD::AND, DL, PackedVT, Elt, Mask); 549 SDValue Shift = DAG.getConstant(MemEltBits * i, PackedVT); 550 Elt = DAG.getNode(ISD::SHL, DL, PackedVT, Elt, Shift); 551 if (i == 0) { 552 PackedValue = Elt; 553 } else { 554 PackedValue = DAG.getNode(ISD::OR, DL, PackedVT, PackedValue, Elt); 555 } 556 } 557 return DAG.getStore(Store->getChain(), DL, PackedValue, Ptr, 558 MachinePointerInfo(Store->getMemOperand()->getValue()), 559 Store->isVolatile(), Store->isNonTemporal(), 560 Store->getAlignment()); 561} 562 563SDValue AMDGPUTargetLowering::SplitVectorStore(SDValue Op, 564 SelectionDAG &DAG) const { 565 StoreSDNode *Store = cast<StoreSDNode>(Op); 566 EVT MemEltVT = Store->getMemoryVT().getVectorElementType(); 567 EVT EltVT = Store->getValue().getValueType().getVectorElementType(); 568 EVT PtrVT = Store->getBasePtr().getValueType(); 569 unsigned NumElts = Store->getMemoryVT().getVectorNumElements(); 570 SDLoc SL(Op); 571 572 SmallVector<SDValue, 8> Chains; 573 574 for (unsigned i = 0, e = NumElts; i != e; ++i) { 575 SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT, 576 Store->getValue(), DAG.getConstant(i, MVT::i32)); 577 SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT, 578 Store->getBasePtr(), 579 DAG.getConstant(i * (MemEltVT.getSizeInBits() / 8), 580 PtrVT)); 581 Chains.push_back(DAG.getTruncStore(Store->getChain(), SL, Val, Ptr, 582 MachinePointerInfo(Store->getMemOperand()->getValue()), 583 MemEltVT, Store->isVolatile(), Store->isNonTemporal(), 584 Store->getAlignment())); 585 } 586 return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, &Chains[0], NumElts); 587} 588 589SDValue AMDGPUTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { 590 SDValue Result = AMDGPUTargetLowering::MergeVectorStore(Op, DAG); 591 if (Result.getNode()) { 592 return Result; 593 } 594 595 StoreSDNode *Store = cast<StoreSDNode>(Op); 596 if ((Store->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS || 597 Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) && 598 Store->getValue().getValueType().isVector()) { 599 return SplitVectorStore(Op, DAG); 600 } 601 return SDValue(); 602} 603 604SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op, 605 SelectionDAG &DAG) const { 606 SDLoc DL(Op); 607 EVT VT = Op.getValueType(); 608 609 SDValue Num = Op.getOperand(0); 610 SDValue Den = Op.getOperand(1); 611 612 SmallVector<SDValue, 8> Results; 613 614 // RCP = URECIP(Den) = 2^32 / Den + e 615 // e is rounding error. 616 SDValue RCP = DAG.getNode(AMDGPUISD::URECIP, DL, VT, Den); 617 618 // RCP_LO = umulo(RCP, Den) */ 619 SDValue RCP_LO = DAG.getNode(ISD::UMULO, DL, VT, RCP, Den); 620 621 // RCP_HI = mulhu (RCP, Den) */ 622 SDValue RCP_HI = DAG.getNode(ISD::MULHU, DL, VT, RCP, Den); 623 624 // NEG_RCP_LO = -RCP_LO 625 SDValue NEG_RCP_LO = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT), 626 RCP_LO); 627 628 // ABS_RCP_LO = (RCP_HI == 0 ? NEG_RCP_LO : RCP_LO) 629 SDValue ABS_RCP_LO = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT), 630 NEG_RCP_LO, RCP_LO, 631 ISD::SETEQ); 632 // Calculate the rounding error from the URECIP instruction 633 // E = mulhu(ABS_RCP_LO, RCP) 634 SDValue E = DAG.getNode(ISD::MULHU, DL, VT, ABS_RCP_LO, RCP); 635 636 // RCP_A_E = RCP + E 637 SDValue RCP_A_E = DAG.getNode(ISD::ADD, DL, VT, RCP, E); 638 639 // RCP_S_E = RCP - E 640 SDValue RCP_S_E = DAG.getNode(ISD::SUB, DL, VT, RCP, E); 641 642 // Tmp0 = (RCP_HI == 0 ? RCP_A_E : RCP_SUB_E) 643 SDValue Tmp0 = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT), 644 RCP_A_E, RCP_S_E, 645 ISD::SETEQ); 646 // Quotient = mulhu(Tmp0, Num) 647 SDValue Quotient = DAG.getNode(ISD::MULHU, DL, VT, Tmp0, Num); 648 649 // Num_S_Remainder = Quotient * Den 650 SDValue Num_S_Remainder = DAG.getNode(ISD::UMULO, DL, VT, Quotient, Den); 651 652 // Remainder = Num - Num_S_Remainder 653 SDValue Remainder = DAG.getNode(ISD::SUB, DL, VT, Num, Num_S_Remainder); 654 655 // Remainder_GE_Den = (Remainder >= Den ? -1 : 0) 656 SDValue Remainder_GE_Den = DAG.getSelectCC(DL, Remainder, Den, 657 DAG.getConstant(-1, VT), 658 DAG.getConstant(0, VT), 659 ISD::SETUGE); 660 // Remainder_GE_Zero = (Num >= Num_S_Remainder ? -1 : 0) 661 SDValue Remainder_GE_Zero = DAG.getSelectCC(DL, Num, 662 Num_S_Remainder, 663 DAG.getConstant(-1, VT), 664 DAG.getConstant(0, VT), 665 ISD::SETUGE); 666 // Tmp1 = Remainder_GE_Den & Remainder_GE_Zero 667 SDValue Tmp1 = DAG.getNode(ISD::AND, DL, VT, Remainder_GE_Den, 668 Remainder_GE_Zero); 669 670 // Calculate Division result: 671 672 // Quotient_A_One = Quotient + 1 673 SDValue Quotient_A_One = DAG.getNode(ISD::ADD, DL, VT, Quotient, 674 DAG.getConstant(1, VT)); 675 676 // Quotient_S_One = Quotient - 1 677 SDValue Quotient_S_One = DAG.getNode(ISD::SUB, DL, VT, Quotient, 678 DAG.getConstant(1, VT)); 679 680 // Div = (Tmp1 == 0 ? Quotient : Quotient_A_One) 681 SDValue Div = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT), 682 Quotient, Quotient_A_One, ISD::SETEQ); 683 684 // Div = (Remainder_GE_Zero == 0 ? Quotient_S_One : Div) 685 Div = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT), 686 Quotient_S_One, Div, ISD::SETEQ); 687 688 // Calculate Rem result: 689 690 // Remainder_S_Den = Remainder - Den 691 SDValue Remainder_S_Den = DAG.getNode(ISD::SUB, DL, VT, Remainder, Den); 692 693 // Remainder_A_Den = Remainder + Den 694 SDValue Remainder_A_Den = DAG.getNode(ISD::ADD, DL, VT, Remainder, Den); 695 696 // Rem = (Tmp1 == 0 ? Remainder : Remainder_S_Den) 697 SDValue Rem = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT), 698 Remainder, Remainder_S_Den, ISD::SETEQ); 699 700 // Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem) 701 Rem = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT), 702 Remainder_A_Den, Rem, ISD::SETEQ); 703 SDValue Ops[2]; 704 Ops[0] = Div; 705 Ops[1] = Rem; 706 return DAG.getMergeValues(Ops, 2, DL); 707} 708 709SDValue AMDGPUTargetLowering::LowerUINT_TO_FP(SDValue Op, 710 SelectionDAG &DAG) const { 711 SDValue S0 = Op.getOperand(0); 712 SDLoc DL(Op); 713 if (Op.getValueType() != MVT::f32 || S0.getValueType() != MVT::i64) 714 return SDValue(); 715 716 // f32 uint_to_fp i64 717 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, S0, 718 DAG.getConstant(0, MVT::i32)); 719 SDValue FloatLo = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, Lo); 720 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, S0, 721 DAG.getConstant(1, MVT::i32)); 722 SDValue FloatHi = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, Hi); 723 FloatHi = DAG.getNode(ISD::FMUL, DL, MVT::f32, FloatHi, 724 DAG.getConstantFP(4294967296.0f, MVT::f32)); // 2^32 725 return DAG.getNode(ISD::FADD, DL, MVT::f32, FloatLo, FloatHi); 726 727} 728 729//===----------------------------------------------------------------------===// 730// Helper functions 731//===----------------------------------------------------------------------===// 732 733void AMDGPUTargetLowering::getOriginalFunctionArgs( 734 SelectionDAG &DAG, 735 const Function *F, 736 const SmallVectorImpl<ISD::InputArg> &Ins, 737 SmallVectorImpl<ISD::InputArg> &OrigIns) const { 738 739 for (unsigned i = 0, e = Ins.size(); i < e; ++i) { 740 if (Ins[i].ArgVT == Ins[i].VT) { 741 OrigIns.push_back(Ins[i]); 742 continue; 743 } 744 745 EVT VT; 746 if (Ins[i].ArgVT.isVector() && !Ins[i].VT.isVector()) { 747 // Vector has been split into scalars. 748 VT = Ins[i].ArgVT.getVectorElementType(); 749 } else if (Ins[i].VT.isVector() && Ins[i].ArgVT.isVector() && 750 Ins[i].ArgVT.getVectorElementType() != 751 Ins[i].VT.getVectorElementType()) { 752 // Vector elements have been promoted 753 VT = Ins[i].ArgVT; 754 } else { 755 // Vector has been spilt into smaller vectors. 756 VT = Ins[i].VT; 757 } 758 759 ISD::InputArg Arg(Ins[i].Flags, VT, VT, Ins[i].Used, 760 Ins[i].OrigArgIndex, Ins[i].PartOffset); 761 OrigIns.push_back(Arg); 762 } 763} 764 765bool AMDGPUTargetLowering::isHWTrueValue(SDValue Op) const { 766 if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) { 767 return CFP->isExactlyValue(1.0); 768 } 769 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { 770 return C->isAllOnesValue(); 771 } 772 return false; 773} 774 775bool AMDGPUTargetLowering::isHWFalseValue(SDValue Op) const { 776 if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) { 777 return CFP->getValueAPF().isZero(); 778 } 779 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { 780 return C->isNullValue(); 781 } 782 return false; 783} 784 785SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG, 786 const TargetRegisterClass *RC, 787 unsigned Reg, EVT VT) const { 788 MachineFunction &MF = DAG.getMachineFunction(); 789 MachineRegisterInfo &MRI = MF.getRegInfo(); 790 unsigned VirtualRegister; 791 if (!MRI.isLiveIn(Reg)) { 792 VirtualRegister = MRI.createVirtualRegister(RC); 793 MRI.addLiveIn(Reg, VirtualRegister); 794 } else { 795 VirtualRegister = MRI.getLiveInVirtReg(Reg); 796 } 797 return DAG.getRegister(VirtualRegister, VT); 798} 799 800#define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node; 801 802const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { 803 switch (Opcode) { 804 default: return 0; 805 // AMDIL DAG nodes 806 NODE_NAME_CASE(CALL); 807 NODE_NAME_CASE(UMUL); 808 NODE_NAME_CASE(DIV_INF); 809 NODE_NAME_CASE(RET_FLAG); 810 NODE_NAME_CASE(BRANCH_COND); 811 812 // AMDGPU DAG nodes 813 NODE_NAME_CASE(DWORDADDR) 814 NODE_NAME_CASE(FRACT) 815 NODE_NAME_CASE(FMAX) 816 NODE_NAME_CASE(SMAX) 817 NODE_NAME_CASE(UMAX) 818 NODE_NAME_CASE(FMIN) 819 NODE_NAME_CASE(SMIN) 820 NODE_NAME_CASE(UMIN) 821 NODE_NAME_CASE(URECIP) 822 NODE_NAME_CASE(EXPORT) 823 NODE_NAME_CASE(CONST_ADDRESS) 824 NODE_NAME_CASE(REGISTER_LOAD) 825 NODE_NAME_CASE(REGISTER_STORE) 826 NODE_NAME_CASE(LOAD_CONSTANT) 827 NODE_NAME_CASE(LOAD_INPUT) 828 NODE_NAME_CASE(SAMPLE) 829 NODE_NAME_CASE(SAMPLEB) 830 NODE_NAME_CASE(SAMPLED) 831 NODE_NAME_CASE(SAMPLEL) 832 NODE_NAME_CASE(STORE_MSKOR) 833 NODE_NAME_CASE(TBUFFER_STORE_FORMAT) 834 } 835} 836