1193323Sed//===-- LegalizeVectorOps.cpp - Implement SelectionDAG::LegalizeVectors ---===// 2193323Sed// 3193323Sed// The LLVM Compiler Infrastructure 4193323Sed// 5193323Sed// This file is distributed under the University of Illinois Open Source 6193323Sed// License. See LICENSE.TXT for details. 7193323Sed// 8193323Sed//===----------------------------------------------------------------------===// 9193323Sed// 10193323Sed// This file implements the SelectionDAG::LegalizeVectors method. 11193323Sed// 12193323Sed// The vector legalizer looks for vector operations which might need to be 13193323Sed// scalarized and legalizes them. This is a separate step from Legalize because 14193323Sed// scalarizing can introduce illegal types. For example, suppose we have an 15193323Sed// ISD::SDIV of type v2i64 on x86-32. The type is legal (for example, addition 16193323Sed// on a v2i64 is legal), but ISD::SDIV isn't legal, so we have to unroll the 17193323Sed// operation, which introduces nodes with the illegal type i64 which must be 18193323Sed// expanded. Similarly, suppose we have an ISD::SRA of type v16i8 on PowerPC; 19193323Sed// the operation must be unrolled, which introduces nodes with the illegal 20193323Sed// type i8 which must be promoted. 21193323Sed// 22193323Sed// This does not legalize vector manipulations like ISD::BUILD_VECTOR, 23198090Srdivacky// or operations that happen to take a vector which are custom-lowered; 24198090Srdivacky// the legalization for such operations never produces nodes 25193323Sed// with illegal types, so it's okay to put off legalizing them until 26193323Sed// SelectionDAG::Legalize runs. 27193323Sed// 28193323Sed//===----------------------------------------------------------------------===// 29193323Sed 30193323Sed#include "llvm/CodeGen/SelectionDAG.h" 31193323Sed#include "llvm/Target/TargetLowering.h" 32193323Sedusing namespace llvm; 33193323Sed 34193323Sednamespace { 35193323Sedclass VectorLegalizer { 36193323Sed SelectionDAG& DAG; 37207618Srdivacky const TargetLowering &TLI; 38193323Sed bool Changed; // Keep track of whether anything changed 39193323Sed 40193323Sed /// LegalizedNodes - For nodes that are of legal width, and that have more 41193323Sed /// than one use, this map indicates what regularized operand to use. This 42193323Sed /// allows us to avoid legalizing the same thing more than once. 43249423Sdim SmallDenseMap<SDValue, SDValue, 64> LegalizedNodes; 44193323Sed 45193323Sed // Adds a node to the translation cache 46193323Sed void AddLegalizedOperand(SDValue From, SDValue To) { 47193323Sed LegalizedNodes.insert(std::make_pair(From, To)); 48193323Sed // If someone requests legalization of the new node, return itself. 49193323Sed if (From != To) 50193323Sed LegalizedNodes.insert(std::make_pair(To, To)); 51193323Sed } 52193323Sed 53193323Sed // Legalizes the given node 54193323Sed SDValue LegalizeOp(SDValue Op); 55193323Sed // Assuming the node is legal, "legalize" the results 56193323Sed SDValue TranslateLegalizeResults(SDValue Op, SDValue Result); 57193323Sed // Implements unrolling a VSETCC. 58193323Sed SDValue UnrollVSETCC(SDValue Op); 59193323Sed // Implements expansion for FNEG; falls back to UnrollVectorOp if FSUB 60193323Sed // isn't legal. 61221345Sdim // Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if 62221345Sdim // SINT_TO_FLOAT and SHR on vectors isn't legal. 63221345Sdim SDValue ExpandUINT_TO_FLOAT(SDValue Op); 64249423Sdim // Implement expansion for SIGN_EXTEND_INREG using SRL and SRA. 65249423Sdim SDValue ExpandSEXTINREG(SDValue Op); 66226633Sdim // Implement vselect in terms of XOR, AND, OR when blend is not supported 67226633Sdim // by the target. 68226633Sdim SDValue ExpandVSELECT(SDValue Op); 69243830Sdim SDValue ExpandSELECT(SDValue Op); 70234353Sdim SDValue ExpandLoad(SDValue Op); 71234353Sdim SDValue ExpandStore(SDValue Op); 72193323Sed SDValue ExpandFNEG(SDValue Op); 73193323Sed // Implements vector promotion; this is essentially just bitcasting the 74193323Sed // operands to a different type and bitcasting the result back to the 75193323Sed // original type. 76193323Sed SDValue PromoteVectorOp(SDValue Op); 77239462Sdim // Implements [SU]INT_TO_FP vector promotion; this is a [zs]ext of the input 78239462Sdim // operand to the next size up. 79239462Sdim SDValue PromoteVectorOpINT_TO_FP(SDValue Op); 80193323Sed 81193323Sed public: 82193323Sed bool Run(); 83193323Sed VectorLegalizer(SelectionDAG& dag) : 84193323Sed DAG(dag), TLI(dag.getTargetLoweringInfo()), Changed(false) {} 85193323Sed}; 86193323Sed 87193323Sedbool VectorLegalizer::Run() { 88249423Sdim // Before we start legalizing vector nodes, check if there are any vectors. 89249423Sdim bool HasVectors = false; 90249423Sdim for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), 91249423Sdim E = prior(DAG.allnodes_end()); I != llvm::next(E); ++I) { 92249423Sdim // Check if the values of the nodes contain vectors. We don't need to check 93249423Sdim // the operands because we are going to check their values at some point. 94249423Sdim for (SDNode::value_iterator J = I->value_begin(), E = I->value_end(); 95249423Sdim J != E; ++J) 96249423Sdim HasVectors |= J->isVector(); 97249423Sdim 98249423Sdim // If we found a vector node we can start the legalization. 99249423Sdim if (HasVectors) 100249423Sdim break; 101249423Sdim } 102249423Sdim 103249423Sdim // If this basic block has no vectors then no need to legalize vectors. 104249423Sdim if (!HasVectors) 105249423Sdim return false; 106249423Sdim 107193323Sed // The legalize process is inherently a bottom-up recursive process (users 108193323Sed // legalize their uses before themselves). Given infinite stack space, we 109193323Sed // could just start legalizing on the root and traverse the whole graph. In 110193323Sed // practice however, this causes us to run out of stack space on large basic 111193323Sed // blocks. To avoid this problem, compute an ordering of the nodes where each 112193323Sed // node is only legalized after all of its operands are legalized. 113193323Sed DAG.AssignTopologicalOrder(); 114193323Sed for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), 115200581Srdivacky E = prior(DAG.allnodes_end()); I != llvm::next(E); ++I) 116193323Sed LegalizeOp(SDValue(I, 0)); 117193323Sed 118193323Sed // Finally, it's possible the root changed. Get the new root. 119193323Sed SDValue OldRoot = DAG.getRoot(); 120193323Sed assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?"); 121193323Sed DAG.setRoot(LegalizedNodes[OldRoot]); 122193323Sed 123193323Sed LegalizedNodes.clear(); 124193323Sed 125193323Sed // Remove dead nodes now. 126193323Sed DAG.RemoveDeadNodes(); 127193323Sed 128193323Sed return Changed; 129193323Sed} 130193323Sed 131193323SedSDValue VectorLegalizer::TranslateLegalizeResults(SDValue Op, SDValue Result) { 132193323Sed // Generic legalization: just pass the operand through. 133193323Sed for (unsigned i = 0, e = Op.getNode()->getNumValues(); i != e; ++i) 134193323Sed AddLegalizedOperand(Op.getValue(i), Result.getValue(i)); 135193323Sed return Result.getValue(Op.getResNo()); 136193323Sed} 137193323Sed 138193323SedSDValue VectorLegalizer::LegalizeOp(SDValue Op) { 139193323Sed // Note that LegalizeOp may be reentered even from single-use nodes, which 140193323Sed // means that we always must cache transformed nodes. 141193323Sed DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op); 142193323Sed if (I != LegalizedNodes.end()) return I->second; 143193323Sed 144193323Sed SDNode* Node = Op.getNode(); 145193323Sed 146193323Sed // Legalize the operands 147193323Sed SmallVector<SDValue, 8> Ops; 148193323Sed for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) 149193323Sed Ops.push_back(LegalizeOp(Node->getOperand(i))); 150193323Sed 151193323Sed SDValue Result = 152210299Sed SDValue(DAG.UpdateNodeOperands(Op.getNode(), Ops.data(), Ops.size()), 0); 153193323Sed 154234353Sdim if (Op.getOpcode() == ISD::LOAD) { 155234353Sdim LoadSDNode *LD = cast<LoadSDNode>(Op.getNode()); 156234353Sdim ISD::LoadExtType ExtType = LD->getExtensionType(); 157234353Sdim if (LD->getMemoryVT().isVector() && ExtType != ISD::NON_EXTLOAD) { 158234353Sdim if (TLI.isLoadExtLegal(LD->getExtensionType(), LD->getMemoryVT())) 159234353Sdim return TranslateLegalizeResults(Op, Result); 160234353Sdim Changed = true; 161234353Sdim return LegalizeOp(ExpandLoad(Op)); 162234353Sdim } 163234353Sdim } else if (Op.getOpcode() == ISD::STORE) { 164234353Sdim StoreSDNode *ST = cast<StoreSDNode>(Op.getNode()); 165234353Sdim EVT StVT = ST->getMemoryVT(); 166249423Sdim MVT ValVT = ST->getValue().getSimpleValueType(); 167234353Sdim if (StVT.isVector() && ST->isTruncatingStore()) 168249423Sdim switch (TLI.getTruncStoreAction(ValVT, StVT.getSimpleVT())) { 169234353Sdim default: llvm_unreachable("This action is not supported yet!"); 170234353Sdim case TargetLowering::Legal: 171234353Sdim return TranslateLegalizeResults(Op, Result); 172234353Sdim case TargetLowering::Custom: 173234353Sdim Changed = true; 174263508Sdim return TranslateLegalizeResults(Op, TLI.LowerOperation(Result, DAG)); 175234353Sdim case TargetLowering::Expand: 176234353Sdim Changed = true; 177234353Sdim return LegalizeOp(ExpandStore(Op)); 178234353Sdim } 179234353Sdim } 180234353Sdim 181193323Sed bool HasVectorValue = false; 182193323Sed for (SDNode::value_iterator J = Node->value_begin(), E = Node->value_end(); 183193323Sed J != E; 184193323Sed ++J) 185193323Sed HasVectorValue |= J->isVector(); 186193323Sed if (!HasVectorValue) 187193323Sed return TranslateLegalizeResults(Op, Result); 188193323Sed 189198090Srdivacky EVT QueryType; 190193323Sed switch (Op.getOpcode()) { 191193323Sed default: 192193323Sed return TranslateLegalizeResults(Op, Result); 193193323Sed case ISD::ADD: 194193323Sed case ISD::SUB: 195193323Sed case ISD::MUL: 196193323Sed case ISD::SDIV: 197193323Sed case ISD::UDIV: 198193323Sed case ISD::SREM: 199193323Sed case ISD::UREM: 200193323Sed case ISD::FADD: 201193323Sed case ISD::FSUB: 202193323Sed case ISD::FMUL: 203193323Sed case ISD::FDIV: 204193323Sed case ISD::FREM: 205193323Sed case ISD::AND: 206193323Sed case ISD::OR: 207193323Sed case ISD::XOR: 208193323Sed case ISD::SHL: 209193323Sed case ISD::SRA: 210193323Sed case ISD::SRL: 211193323Sed case ISD::ROTL: 212193323Sed case ISD::ROTR: 213266715Sdim case ISD::BSWAP: 214234353Sdim case ISD::CTLZ: 215193323Sed case ISD::CTTZ: 216234353Sdim case ISD::CTLZ_ZERO_UNDEF: 217234353Sdim case ISD::CTTZ_ZERO_UNDEF: 218193323Sed case ISD::CTPOP: 219193323Sed case ISD::SELECT: 220226633Sdim case ISD::VSELECT: 221193323Sed case ISD::SELECT_CC: 222226633Sdim case ISD::SETCC: 223193323Sed case ISD::ZERO_EXTEND: 224193323Sed case ISD::ANY_EXTEND: 225193323Sed case ISD::TRUNCATE: 226193323Sed case ISD::SIGN_EXTEND: 227193323Sed case ISD::FP_TO_SINT: 228193323Sed case ISD::FP_TO_UINT: 229193323Sed case ISD::FNEG: 230193323Sed case ISD::FABS: 231263508Sdim case ISD::FCOPYSIGN: 232193323Sed case ISD::FSQRT: 233193323Sed case ISD::FSIN: 234193323Sed case ISD::FCOS: 235193323Sed case ISD::FPOWI: 236193323Sed case ISD::FPOW: 237193323Sed case ISD::FLOG: 238193323Sed case ISD::FLOG2: 239193323Sed case ISD::FLOG10: 240193323Sed case ISD::FEXP: 241193323Sed case ISD::FEXP2: 242193323Sed case ISD::FCEIL: 243193323Sed case ISD::FTRUNC: 244193323Sed case ISD::FRINT: 245193323Sed case ISD::FNEARBYINT: 246263508Sdim case ISD::FROUND: 247193323Sed case ISD::FFLOOR: 248249423Sdim case ISD::FP_ROUND: 249249423Sdim case ISD::FP_EXTEND: 250243830Sdim case ISD::FMA: 251224145Sdim case ISD::SIGN_EXTEND_INREG: 252193574Sed QueryType = Node->getValueType(0); 253193323Sed break; 254202375Srdivacky case ISD::FP_ROUND_INREG: 255202375Srdivacky QueryType = cast<VTSDNode>(Node->getOperand(1))->getVT(); 256202375Srdivacky break; 257193574Sed case ISD::SINT_TO_FP: 258193574Sed case ISD::UINT_TO_FP: 259193574Sed QueryType = Node->getOperand(0).getValueType(); 260193574Sed break; 261193323Sed } 262193323Sed 263193574Sed switch (TLI.getOperationAction(Node->getOpcode(), QueryType)) { 264193323Sed case TargetLowering::Promote: 265239462Sdim switch (Op.getOpcode()) { 266239462Sdim default: 267239462Sdim // "Promote" the operation by bitcasting 268239462Sdim Result = PromoteVectorOp(Op); 269239462Sdim Changed = true; 270239462Sdim break; 271239462Sdim case ISD::SINT_TO_FP: 272239462Sdim case ISD::UINT_TO_FP: 273239462Sdim // "Promote" the operation by extending the operand. 274239462Sdim Result = PromoteVectorOpINT_TO_FP(Op); 275239462Sdim Changed = true; 276239462Sdim break; 277239462Sdim } 278193323Sed break; 279193323Sed case TargetLowering::Legal: break; 280193323Sed case TargetLowering::Custom: { 281193323Sed SDValue Tmp1 = TLI.LowerOperation(Op, DAG); 282193323Sed if (Tmp1.getNode()) { 283193323Sed Result = Tmp1; 284193323Sed break; 285193323Sed } 286193323Sed // FALL THROUGH 287193323Sed } 288193323Sed case TargetLowering::Expand: 289249423Sdim if (Node->getOpcode() == ISD::SIGN_EXTEND_INREG) 290249423Sdim Result = ExpandSEXTINREG(Op); 291249423Sdim else if (Node->getOpcode() == ISD::VSELECT) 292226633Sdim Result = ExpandVSELECT(Op); 293243830Sdim else if (Node->getOpcode() == ISD::SELECT) 294243830Sdim Result = ExpandSELECT(Op); 295226633Sdim else if (Node->getOpcode() == ISD::UINT_TO_FP) 296221345Sdim Result = ExpandUINT_TO_FLOAT(Op); 297221345Sdim else if (Node->getOpcode() == ISD::FNEG) 298193323Sed Result = ExpandFNEG(Op); 299226633Sdim else if (Node->getOpcode() == ISD::SETCC) 300193323Sed Result = UnrollVSETCC(Op); 301193323Sed else 302199989Srdivacky Result = DAG.UnrollVectorOp(Op.getNode()); 303193323Sed break; 304193323Sed } 305193323Sed 306193323Sed // Make sure that the generated code is itself legal. 307193323Sed if (Result != Op) { 308193323Sed Result = LegalizeOp(Result); 309193323Sed Changed = true; 310193323Sed } 311193323Sed 312193323Sed // Note that LegalizeOp may be reentered even from single-use nodes, which 313193323Sed // means that we always must cache transformed nodes. 314193323Sed AddLegalizedOperand(Op, Result); 315193323Sed return Result; 316193323Sed} 317193323Sed 318193323SedSDValue VectorLegalizer::PromoteVectorOp(SDValue Op) { 319193323Sed // Vector "promotion" is basically just bitcasting and doing the operation 320193323Sed // in a different type. For example, x86 promotes ISD::AND on v2i32 to 321193323Sed // v1i64. 322249423Sdim MVT VT = Op.getSimpleValueType(); 323193323Sed assert(Op.getNode()->getNumValues() == 1 && 324193323Sed "Can't promote a vector with multiple results!"); 325249423Sdim MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT); 326263508Sdim SDLoc dl(Op); 327193323Sed SmallVector<SDValue, 4> Operands(Op.getNumOperands()); 328193323Sed 329193323Sed for (unsigned j = 0; j != Op.getNumOperands(); ++j) { 330193323Sed if (Op.getOperand(j).getValueType().isVector()) 331218893Sdim Operands[j] = DAG.getNode(ISD::BITCAST, dl, NVT, Op.getOperand(j)); 332193323Sed else 333193323Sed Operands[j] = Op.getOperand(j); 334193323Sed } 335193323Sed 336193323Sed Op = DAG.getNode(Op.getOpcode(), dl, NVT, &Operands[0], Operands.size()); 337193323Sed 338218893Sdim return DAG.getNode(ISD::BITCAST, dl, VT, Op); 339193323Sed} 340193323Sed 341239462SdimSDValue VectorLegalizer::PromoteVectorOpINT_TO_FP(SDValue Op) { 342239462Sdim // INT_TO_FP operations may require the input operand be promoted even 343239462Sdim // when the type is otherwise legal. 344239462Sdim EVT VT = Op.getOperand(0).getValueType(); 345239462Sdim assert(Op.getNode()->getNumValues() == 1 && 346239462Sdim "Can't promote a vector with multiple results!"); 347234353Sdim 348239462Sdim // Normal getTypeToPromoteTo() doesn't work here, as that will promote 349239462Sdim // by widening the vector w/ the same element width and twice the number 350239462Sdim // of elements. We want the other way around, the same number of elements, 351239462Sdim // each twice the width. 352239462Sdim // 353239462Sdim // Increase the bitwidth of the element to the next pow-of-two 354239462Sdim // (which is greater than 8 bits). 355239462Sdim unsigned NumElts = VT.getVectorNumElements(); 356239462Sdim EVT EltVT = VT.getVectorElementType(); 357239462Sdim EltVT = EVT::getIntegerVT(*DAG.getContext(), 2 * EltVT.getSizeInBits()); 358239462Sdim assert(EltVT.isSimple() && "Promoting to a non-simple vector type!"); 359239462Sdim 360239462Sdim // Build a new vector type and check if it is legal. 361239462Sdim MVT NVT = MVT::getVectorVT(EltVT.getSimpleVT(), NumElts); 362239462Sdim 363263508Sdim SDLoc dl(Op); 364239462Sdim SmallVector<SDValue, 4> Operands(Op.getNumOperands()); 365239462Sdim 366239462Sdim unsigned Opc = Op.getOpcode() == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND : 367239462Sdim ISD::SIGN_EXTEND; 368239462Sdim for (unsigned j = 0; j != Op.getNumOperands(); ++j) { 369239462Sdim if (Op.getOperand(j).getValueType().isVector()) 370239462Sdim Operands[j] = DAG.getNode(Opc, dl, NVT, Op.getOperand(j)); 371239462Sdim else 372239462Sdim Operands[j] = Op.getOperand(j); 373239462Sdim } 374239462Sdim 375239462Sdim return DAG.getNode(Op.getOpcode(), dl, Op.getValueType(), &Operands[0], 376239462Sdim Operands.size()); 377239462Sdim} 378239462Sdim 379239462Sdim 380234353SdimSDValue VectorLegalizer::ExpandLoad(SDValue Op) { 381263508Sdim SDLoc dl(Op); 382234353Sdim LoadSDNode *LD = cast<LoadSDNode>(Op.getNode()); 383234353Sdim SDValue Chain = LD->getChain(); 384234353Sdim SDValue BasePTR = LD->getBasePtr(); 385234353Sdim EVT SrcVT = LD->getMemoryVT(); 386234353Sdim ISD::LoadExtType ExtType = LD->getExtensionType(); 387234353Sdim 388249423Sdim SmallVector<SDValue, 8> Vals; 389234353Sdim SmallVector<SDValue, 8> LoadChains; 390234353Sdim unsigned NumElem = SrcVT.getVectorNumElements(); 391234353Sdim 392249423Sdim EVT SrcEltVT = SrcVT.getScalarType(); 393249423Sdim EVT DstEltVT = Op.getNode()->getValueType(0).getScalarType(); 394234353Sdim 395249423Sdim if (SrcVT.getVectorNumElements() > 1 && !SrcEltVT.isByteSized()) { 396249423Sdim // When elements in a vector is not byte-addressable, we cannot directly 397249423Sdim // load each element by advancing pointer, which could only address bytes. 398249423Sdim // Instead, we load all significant words, mask bits off, and concatenate 399249423Sdim // them to form each element. Finally, they are extended to destination 400249423Sdim // scalar type to build the destination vector. 401249423Sdim EVT WideVT = TLI.getPointerTy(); 402234353Sdim 403249423Sdim assert(WideVT.isRound() && 404249423Sdim "Could not handle the sophisticated case when the widest integer is" 405249423Sdim " not power of 2."); 406249423Sdim assert(WideVT.bitsGE(SrcEltVT) && 407249423Sdim "Type is not legalized?"); 408249423Sdim 409249423Sdim unsigned WideBytes = WideVT.getStoreSize(); 410249423Sdim unsigned Offset = 0; 411249423Sdim unsigned RemainingBytes = SrcVT.getStoreSize(); 412249423Sdim SmallVector<SDValue, 8> LoadVals; 413249423Sdim 414249423Sdim while (RemainingBytes > 0) { 415249423Sdim SDValue ScalarLoad; 416249423Sdim unsigned LoadBytes = WideBytes; 417249423Sdim 418249423Sdim if (RemainingBytes >= LoadBytes) { 419249423Sdim ScalarLoad = DAG.getLoad(WideVT, dl, Chain, BasePTR, 420249423Sdim LD->getPointerInfo().getWithOffset(Offset), 421249423Sdim LD->isVolatile(), LD->isNonTemporal(), 422263508Sdim LD->isInvariant(), LD->getAlignment(), 423263508Sdim LD->getTBAAInfo()); 424249423Sdim } else { 425249423Sdim EVT LoadVT = WideVT; 426249423Sdim while (RemainingBytes < LoadBytes) { 427249423Sdim LoadBytes >>= 1; // Reduce the load size by half. 428249423Sdim LoadVT = EVT::getIntegerVT(*DAG.getContext(), LoadBytes << 3); 429249423Sdim } 430249423Sdim ScalarLoad = DAG.getExtLoad(ISD::EXTLOAD, dl, WideVT, Chain, BasePTR, 431249423Sdim LD->getPointerInfo().getWithOffset(Offset), 432249423Sdim LoadVT, LD->isVolatile(), 433263508Sdim LD->isNonTemporal(), LD->getAlignment(), 434263508Sdim LD->getTBAAInfo()); 435249423Sdim } 436249423Sdim 437249423Sdim RemainingBytes -= LoadBytes; 438249423Sdim Offset += LoadBytes; 439249423Sdim BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR, 440263508Sdim DAG.getConstant(LoadBytes, BasePTR.getValueType())); 441249423Sdim 442249423Sdim LoadVals.push_back(ScalarLoad.getValue(0)); 443249423Sdim LoadChains.push_back(ScalarLoad.getValue(1)); 444249423Sdim } 445249423Sdim 446249423Sdim // Extract bits, pack and extend/trunc them into destination type. 447249423Sdim unsigned SrcEltBits = SrcEltVT.getSizeInBits(); 448249423Sdim SDValue SrcEltBitMask = DAG.getConstant((1U << SrcEltBits) - 1, WideVT); 449249423Sdim 450249423Sdim unsigned BitOffset = 0; 451249423Sdim unsigned WideIdx = 0; 452249423Sdim unsigned WideBits = WideVT.getSizeInBits(); 453249423Sdim 454249423Sdim for (unsigned Idx = 0; Idx != NumElem; ++Idx) { 455249423Sdim SDValue Lo, Hi, ShAmt; 456249423Sdim 457249423Sdim if (BitOffset < WideBits) { 458249423Sdim ShAmt = DAG.getConstant(BitOffset, TLI.getShiftAmountTy(WideVT)); 459249423Sdim Lo = DAG.getNode(ISD::SRL, dl, WideVT, LoadVals[WideIdx], ShAmt); 460249423Sdim Lo = DAG.getNode(ISD::AND, dl, WideVT, Lo, SrcEltBitMask); 461249423Sdim } 462249423Sdim 463249423Sdim BitOffset += SrcEltBits; 464249423Sdim if (BitOffset >= WideBits) { 465249423Sdim WideIdx++; 466249423Sdim Offset -= WideBits; 467249423Sdim if (Offset > 0) { 468249423Sdim ShAmt = DAG.getConstant(SrcEltBits - Offset, 469249423Sdim TLI.getShiftAmountTy(WideVT)); 470249423Sdim Hi = DAG.getNode(ISD::SHL, dl, WideVT, LoadVals[WideIdx], ShAmt); 471249423Sdim Hi = DAG.getNode(ISD::AND, dl, WideVT, Hi, SrcEltBitMask); 472249423Sdim } 473249423Sdim } 474249423Sdim 475249423Sdim if (Hi.getNode()) 476249423Sdim Lo = DAG.getNode(ISD::OR, dl, WideVT, Lo, Hi); 477249423Sdim 478249423Sdim switch (ExtType) { 479249423Sdim default: llvm_unreachable("Unknown extended-load op!"); 480249423Sdim case ISD::EXTLOAD: 481249423Sdim Lo = DAG.getAnyExtOrTrunc(Lo, dl, DstEltVT); 482249423Sdim break; 483249423Sdim case ISD::ZEXTLOAD: 484249423Sdim Lo = DAG.getZExtOrTrunc(Lo, dl, DstEltVT); 485249423Sdim break; 486249423Sdim case ISD::SEXTLOAD: 487249423Sdim ShAmt = DAG.getConstant(WideBits - SrcEltBits, 488249423Sdim TLI.getShiftAmountTy(WideVT)); 489249423Sdim Lo = DAG.getNode(ISD::SHL, dl, WideVT, Lo, ShAmt); 490249423Sdim Lo = DAG.getNode(ISD::SRA, dl, WideVT, Lo, ShAmt); 491249423Sdim Lo = DAG.getSExtOrTrunc(Lo, dl, DstEltVT); 492249423Sdim break; 493249423Sdim } 494249423Sdim Vals.push_back(Lo); 495249423Sdim } 496249423Sdim } else { 497249423Sdim unsigned Stride = SrcVT.getScalarType().getSizeInBits()/8; 498249423Sdim 499249423Sdim for (unsigned Idx=0; Idx<NumElem; Idx++) { 500249423Sdim SDValue ScalarLoad = DAG.getExtLoad(ExtType, dl, 501249423Sdim Op.getNode()->getValueType(0).getScalarType(), 502249423Sdim Chain, BasePTR, LD->getPointerInfo().getWithOffset(Idx * Stride), 503249423Sdim SrcVT.getScalarType(), 504249423Sdim LD->isVolatile(), LD->isNonTemporal(), 505263508Sdim LD->getAlignment(), LD->getTBAAInfo()); 506249423Sdim 507249423Sdim BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR, 508263508Sdim DAG.getConstant(Stride, BasePTR.getValueType())); 509249423Sdim 510249423Sdim Vals.push_back(ScalarLoad.getValue(0)); 511249423Sdim LoadChains.push_back(ScalarLoad.getValue(1)); 512249423Sdim } 513234353Sdim } 514234353Sdim 515234353Sdim SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 516234353Sdim &LoadChains[0], LoadChains.size()); 517234353Sdim SDValue Value = DAG.getNode(ISD::BUILD_VECTOR, dl, 518249423Sdim Op.getNode()->getValueType(0), &Vals[0], Vals.size()); 519234353Sdim 520234353Sdim AddLegalizedOperand(Op.getValue(0), Value); 521234353Sdim AddLegalizedOperand(Op.getValue(1), NewChain); 522234353Sdim 523234353Sdim return (Op.getResNo() ? NewChain : Value); 524234353Sdim} 525234353Sdim 526234353SdimSDValue VectorLegalizer::ExpandStore(SDValue Op) { 527263508Sdim SDLoc dl(Op); 528234353Sdim StoreSDNode *ST = cast<StoreSDNode>(Op.getNode()); 529234353Sdim SDValue Chain = ST->getChain(); 530234353Sdim SDValue BasePTR = ST->getBasePtr(); 531234353Sdim SDValue Value = ST->getValue(); 532234353Sdim EVT StVT = ST->getMemoryVT(); 533234353Sdim 534234353Sdim unsigned Alignment = ST->getAlignment(); 535234353Sdim bool isVolatile = ST->isVolatile(); 536234353Sdim bool isNonTemporal = ST->isNonTemporal(); 537263508Sdim const MDNode *TBAAInfo = ST->getTBAAInfo(); 538234353Sdim 539234353Sdim unsigned NumElem = StVT.getVectorNumElements(); 540234353Sdim // The type of the data we want to save 541234353Sdim EVT RegVT = Value.getValueType(); 542234353Sdim EVT RegSclVT = RegVT.getScalarType(); 543234353Sdim // The type of data as saved in memory. 544234353Sdim EVT MemSclVT = StVT.getScalarType(); 545234353Sdim 546234353Sdim // Cast floats into integers 547234353Sdim unsigned ScalarSize = MemSclVT.getSizeInBits(); 548234353Sdim 549234353Sdim // Round odd types to the next pow of two. 550234353Sdim if (!isPowerOf2_32(ScalarSize)) 551234353Sdim ScalarSize = NextPowerOf2(ScalarSize); 552234353Sdim 553234353Sdim // Store Stride in bytes 554234353Sdim unsigned Stride = ScalarSize/8; 555234353Sdim // Extract each of the elements from the original vector 556234353Sdim // and save them into memory individually. 557234353Sdim SmallVector<SDValue, 8> Stores; 558234353Sdim for (unsigned Idx = 0; Idx < NumElem; Idx++) { 559234353Sdim SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, 560263508Sdim RegSclVT, Value, DAG.getConstant(Idx, TLI.getVectorIdxTy())); 561234353Sdim 562234353Sdim // This scalar TruncStore may be illegal, but we legalize it later. 563234353Sdim SDValue Store = DAG.getTruncStore(Chain, dl, Ex, BasePTR, 564234353Sdim ST->getPointerInfo().getWithOffset(Idx*Stride), MemSclVT, 565263508Sdim isVolatile, isNonTemporal, Alignment, TBAAInfo); 566234353Sdim 567234353Sdim BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR, 568263508Sdim DAG.getConstant(Stride, BasePTR.getValueType())); 569234353Sdim 570234353Sdim Stores.push_back(Store); 571234353Sdim } 572234353Sdim SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 573234353Sdim &Stores[0], Stores.size()); 574234353Sdim AddLegalizedOperand(Op, TF); 575234353Sdim return TF; 576234353Sdim} 577234353Sdim 578243830SdimSDValue VectorLegalizer::ExpandSELECT(SDValue Op) { 579243830Sdim // Lower a select instruction where the condition is a scalar and the 580243830Sdim // operands are vectors. Lower this select to VSELECT and implement it 581263508Sdim // using XOR AND OR. The selector bit is broadcasted. 582243830Sdim EVT VT = Op.getValueType(); 583263508Sdim SDLoc DL(Op); 584243830Sdim 585243830Sdim SDValue Mask = Op.getOperand(0); 586243830Sdim SDValue Op1 = Op.getOperand(1); 587243830Sdim SDValue Op2 = Op.getOperand(2); 588243830Sdim 589243830Sdim assert(VT.isVector() && !Mask.getValueType().isVector() 590243830Sdim && Op1.getValueType() == Op2.getValueType() && "Invalid type"); 591243830Sdim 592243830Sdim unsigned NumElem = VT.getVectorNumElements(); 593243830Sdim 594243830Sdim // If we can't even use the basic vector operations of 595243830Sdim // AND,OR,XOR, we will have to scalarize the op. 596243830Sdim // Notice that the operation may be 'promoted' which means that it is 597243830Sdim // 'bitcasted' to another type which is handled. 598243830Sdim // Also, we need to be able to construct a splat vector using BUILD_VECTOR. 599243830Sdim if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand || 600243830Sdim TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand || 601243830Sdim TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand || 602243830Sdim TLI.getOperationAction(ISD::BUILD_VECTOR, VT) == TargetLowering::Expand) 603243830Sdim return DAG.UnrollVectorOp(Op.getNode()); 604243830Sdim 605243830Sdim // Generate a mask operand. 606263508Sdim EVT MaskTy = VT.changeVectorElementTypeToInteger(); 607243830Sdim 608243830Sdim // What is the size of each element in the vector mask. 609243830Sdim EVT BitTy = MaskTy.getScalarType(); 610243830Sdim 611263508Sdim Mask = DAG.getSelect(DL, BitTy, Mask, 612243830Sdim DAG.getConstant(APInt::getAllOnesValue(BitTy.getSizeInBits()), BitTy), 613243830Sdim DAG.getConstant(0, BitTy)); 614243830Sdim 615243830Sdim // Broadcast the mask so that the entire vector is all-one or all zero. 616243830Sdim SmallVector<SDValue, 8> Ops(NumElem, Mask); 617243830Sdim Mask = DAG.getNode(ISD::BUILD_VECTOR, DL, MaskTy, &Ops[0], Ops.size()); 618243830Sdim 619243830Sdim // Bitcast the operands to be the same type as the mask. 620243830Sdim // This is needed when we select between FP types because 621243830Sdim // the mask is a vector of integers. 622243830Sdim Op1 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op1); 623243830Sdim Op2 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op2); 624243830Sdim 625243830Sdim SDValue AllOnes = DAG.getConstant( 626243830Sdim APInt::getAllOnesValue(BitTy.getSizeInBits()), MaskTy); 627243830Sdim SDValue NotMask = DAG.getNode(ISD::XOR, DL, MaskTy, Mask, AllOnes); 628243830Sdim 629243830Sdim Op1 = DAG.getNode(ISD::AND, DL, MaskTy, Op1, Mask); 630243830Sdim Op2 = DAG.getNode(ISD::AND, DL, MaskTy, Op2, NotMask); 631243830Sdim SDValue Val = DAG.getNode(ISD::OR, DL, MaskTy, Op1, Op2); 632243830Sdim return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Val); 633243830Sdim} 634243830Sdim 635249423SdimSDValue VectorLegalizer::ExpandSEXTINREG(SDValue Op) { 636249423Sdim EVT VT = Op.getValueType(); 637249423Sdim 638249423Sdim // Make sure that the SRA and SHL instructions are available. 639249423Sdim if (TLI.getOperationAction(ISD::SRA, VT) == TargetLowering::Expand || 640249423Sdim TLI.getOperationAction(ISD::SHL, VT) == TargetLowering::Expand) 641249423Sdim return DAG.UnrollVectorOp(Op.getNode()); 642249423Sdim 643263508Sdim SDLoc DL(Op); 644249423Sdim EVT OrigTy = cast<VTSDNode>(Op->getOperand(1))->getVT(); 645249423Sdim 646249423Sdim unsigned BW = VT.getScalarType().getSizeInBits(); 647249423Sdim unsigned OrigBW = OrigTy.getScalarType().getSizeInBits(); 648249423Sdim SDValue ShiftSz = DAG.getConstant(BW - OrigBW, VT); 649249423Sdim 650249423Sdim Op = Op.getOperand(0); 651249423Sdim Op = DAG.getNode(ISD::SHL, DL, VT, Op, ShiftSz); 652249423Sdim return DAG.getNode(ISD::SRA, DL, VT, Op, ShiftSz); 653249423Sdim} 654249423Sdim 655226633SdimSDValue VectorLegalizer::ExpandVSELECT(SDValue Op) { 656226633Sdim // Implement VSELECT in terms of XOR, AND, OR 657226633Sdim // on platforms which do not support blend natively. 658263508Sdim SDLoc DL(Op); 659221345Sdim 660226633Sdim SDValue Mask = Op.getOperand(0); 661226633Sdim SDValue Op1 = Op.getOperand(1); 662226633Sdim SDValue Op2 = Op.getOperand(2); 663221345Sdim 664263508Sdim EVT VT = Mask.getValueType(); 665263508Sdim 666226633Sdim // If we can't even use the basic vector operations of 667226633Sdim // AND,OR,XOR, we will have to scalarize the op. 668234353Sdim // Notice that the operation may be 'promoted' which means that it is 669234353Sdim // 'bitcasted' to another type which is handled. 670243830Sdim // This operation also isn't safe with AND, OR, XOR when the boolean 671243830Sdim // type is 0/1 as we need an all ones vector constant to mask with. 672243830Sdim // FIXME: Sign extend 1 to all ones if thats legal on the target. 673234353Sdim if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand || 674234353Sdim TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand || 675243830Sdim TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand || 676243830Sdim TLI.getBooleanContents(true) != 677243830Sdim TargetLowering::ZeroOrNegativeOneBooleanContent) 678234353Sdim return DAG.UnrollVectorOp(Op.getNode()); 679226633Sdim 680263508Sdim // If the mask and the type are different sizes, unroll the vector op. This 681263508Sdim // can occur when getSetCCResultType returns something that is different in 682263508Sdim // size from the operand types. For example, v4i8 = select v4i32, v4i8, v4i8. 683263508Sdim if (VT.getSizeInBits() != Op1.getValueType().getSizeInBits()) 684263508Sdim return DAG.UnrollVectorOp(Op.getNode()); 685263508Sdim 686226633Sdim // Bitcast the operands to be the same type as the mask. 687226633Sdim // This is needed when we select between FP types because 688226633Sdim // the mask is a vector of integers. 689226633Sdim Op1 = DAG.getNode(ISD::BITCAST, DL, VT, Op1); 690226633Sdim Op2 = DAG.getNode(ISD::BITCAST, DL, VT, Op2); 691226633Sdim 692226633Sdim SDValue AllOnes = DAG.getConstant( 693226633Sdim APInt::getAllOnesValue(VT.getScalarType().getSizeInBits()), VT); 694226633Sdim SDValue NotMask = DAG.getNode(ISD::XOR, DL, VT, Mask, AllOnes); 695226633Sdim 696226633Sdim Op1 = DAG.getNode(ISD::AND, DL, VT, Op1, Mask); 697226633Sdim Op2 = DAG.getNode(ISD::AND, DL, VT, Op2, NotMask); 698234982Sdim SDValue Val = DAG.getNode(ISD::OR, DL, VT, Op1, Op2); 699234982Sdim return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Val); 700226633Sdim} 701226633Sdim 702226633SdimSDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) { 703221345Sdim EVT VT = Op.getOperand(0).getValueType(); 704263508Sdim SDLoc DL(Op); 705221345Sdim 706221345Sdim // Make sure that the SINT_TO_FP and SRL instructions are available. 707234353Sdim if (TLI.getOperationAction(ISD::SINT_TO_FP, VT) == TargetLowering::Expand || 708234353Sdim TLI.getOperationAction(ISD::SRL, VT) == TargetLowering::Expand) 709234353Sdim return DAG.UnrollVectorOp(Op.getNode()); 710221345Sdim 711221345Sdim EVT SVT = VT.getScalarType(); 712221345Sdim assert((SVT.getSizeInBits() == 64 || SVT.getSizeInBits() == 32) && 713221345Sdim "Elements in vector-UINT_TO_FP must be 32 or 64 bits wide"); 714221345Sdim 715221345Sdim unsigned BW = SVT.getSizeInBits(); 716221345Sdim SDValue HalfWord = DAG.getConstant(BW/2, VT); 717221345Sdim 718221345Sdim // Constants to clear the upper part of the word. 719221345Sdim // Notice that we can also use SHL+SHR, but using a constant is slightly 720221345Sdim // faster on x86. 721221345Sdim uint64_t HWMask = (SVT.getSizeInBits()==64)?0x00000000FFFFFFFF:0x0000FFFF; 722221345Sdim SDValue HalfWordMask = DAG.getConstant(HWMask, VT); 723221345Sdim 724221345Sdim // Two to the power of half-word-size. 725221345Sdim SDValue TWOHW = DAG.getConstantFP((1<<(BW/2)), Op.getValueType()); 726221345Sdim 727221345Sdim // Clear upper part of LO, lower HI 728221345Sdim SDValue HI = DAG.getNode(ISD::SRL, DL, VT, Op.getOperand(0), HalfWord); 729221345Sdim SDValue LO = DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), HalfWordMask); 730221345Sdim 731221345Sdim // Convert hi and lo to floats 732221345Sdim // Convert the hi part back to the upper values 733221345Sdim SDValue fHI = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), HI); 734221345Sdim fHI = DAG.getNode(ISD::FMUL, DL, Op.getValueType(), fHI, TWOHW); 735221345Sdim SDValue fLO = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), LO); 736221345Sdim 737221345Sdim // Add the two halves 738221345Sdim return DAG.getNode(ISD::FADD, DL, Op.getValueType(), fHI, fLO); 739221345Sdim} 740221345Sdim 741221345Sdim 742193323SedSDValue VectorLegalizer::ExpandFNEG(SDValue Op) { 743193323Sed if (TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) { 744193323Sed SDValue Zero = DAG.getConstantFP(-0.0, Op.getValueType()); 745263508Sdim return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), 746193323Sed Zero, Op.getOperand(0)); 747193323Sed } 748199989Srdivacky return DAG.UnrollVectorOp(Op.getNode()); 749193323Sed} 750193323Sed 751193323SedSDValue VectorLegalizer::UnrollVSETCC(SDValue Op) { 752198090Srdivacky EVT VT = Op.getValueType(); 753193323Sed unsigned NumElems = VT.getVectorNumElements(); 754198090Srdivacky EVT EltVT = VT.getVectorElementType(); 755193323Sed SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1), CC = Op.getOperand(2); 756198090Srdivacky EVT TmpEltVT = LHS.getValueType().getVectorElementType(); 757263508Sdim SDLoc dl(Op); 758193323Sed SmallVector<SDValue, 8> Ops(NumElems); 759193323Sed for (unsigned i = 0; i < NumElems; ++i) { 760193323Sed SDValue LHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS, 761263508Sdim DAG.getConstant(i, TLI.getVectorIdxTy())); 762193323Sed SDValue RHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS, 763263508Sdim DAG.getConstant(i, TLI.getVectorIdxTy())); 764263508Sdim Ops[i] = DAG.getNode(ISD::SETCC, dl, 765263508Sdim TLI.getSetCCResultType(*DAG.getContext(), TmpEltVT), 766193323Sed LHSElem, RHSElem, CC); 767263508Sdim Ops[i] = DAG.getSelect(dl, EltVT, Ops[i], 768263508Sdim DAG.getConstant(APInt::getAllOnesValue 769263508Sdim (EltVT.getSizeInBits()), EltVT), 770263508Sdim DAG.getConstant(0, EltVT)); 771193323Sed } 772193323Sed return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElems); 773193323Sed} 774193323Sed 775193323Sed} 776193323Sed 777193323Sedbool SelectionDAG::LegalizeVectors() { 778193323Sed return VectorLegalizer(*this).Run(); 779193323Sed} 780