1193323Sed//===-- LegalizeVectorOps.cpp - Implement SelectionDAG::LegalizeVectors ---===//
2193323Sed//
3193323Sed//                     The LLVM Compiler Infrastructure
4193323Sed//
5193323Sed// This file is distributed under the University of Illinois Open Source
6193323Sed// License. See LICENSE.TXT for details.
7193323Sed//
8193323Sed//===----------------------------------------------------------------------===//
9193323Sed//
10193323Sed// This file implements the SelectionDAG::LegalizeVectors method.
11193323Sed//
12193323Sed// The vector legalizer looks for vector operations which might need to be
13193323Sed// scalarized and legalizes them. This is a separate step from Legalize because
14193323Sed// scalarizing can introduce illegal types.  For example, suppose we have an
15193323Sed// ISD::SDIV of type v2i64 on x86-32.  The type is legal (for example, addition
16193323Sed// on a v2i64 is legal), but ISD::SDIV isn't legal, so we have to unroll the
17193323Sed// operation, which introduces nodes with the illegal type i64 which must be
18193323Sed// expanded.  Similarly, suppose we have an ISD::SRA of type v16i8 on PowerPC;
19193323Sed// the operation must be unrolled, which introduces nodes with the illegal
20193323Sed// type i8 which must be promoted.
21193323Sed//
22193323Sed// This does not legalize vector manipulations like ISD::BUILD_VECTOR,
23198090Srdivacky// or operations that happen to take a vector which are custom-lowered;
24198090Srdivacky// the legalization for such operations never produces nodes
25193323Sed// with illegal types, so it's okay to put off legalizing them until
26193323Sed// SelectionDAG::Legalize runs.
27193323Sed//
28193323Sed//===----------------------------------------------------------------------===//
29193323Sed
30193323Sed#include "llvm/CodeGen/SelectionDAG.h"
31193323Sed#include "llvm/Target/TargetLowering.h"
32193323Sedusing namespace llvm;
33193323Sed
34193323Sednamespace {
35193323Sedclass VectorLegalizer {
36193323Sed  SelectionDAG& DAG;
37207618Srdivacky  const TargetLowering &TLI;
38193323Sed  bool Changed; // Keep track of whether anything changed
39193323Sed
40193323Sed  /// LegalizedNodes - For nodes that are of legal width, and that have more
41193323Sed  /// than one use, this map indicates what regularized operand to use.  This
42193323Sed  /// allows us to avoid legalizing the same thing more than once.
43249423Sdim  SmallDenseMap<SDValue, SDValue, 64> LegalizedNodes;
44193323Sed
45193323Sed  // Adds a node to the translation cache
46193323Sed  void AddLegalizedOperand(SDValue From, SDValue To) {
47193323Sed    LegalizedNodes.insert(std::make_pair(From, To));
48193323Sed    // If someone requests legalization of the new node, return itself.
49193323Sed    if (From != To)
50193323Sed      LegalizedNodes.insert(std::make_pair(To, To));
51193323Sed  }
52193323Sed
53193323Sed  // Legalizes the given node
54193323Sed  SDValue LegalizeOp(SDValue Op);
55193323Sed  // Assuming the node is legal, "legalize" the results
56193323Sed  SDValue TranslateLegalizeResults(SDValue Op, SDValue Result);
57193323Sed  // Implements unrolling a VSETCC.
58193323Sed  SDValue UnrollVSETCC(SDValue Op);
59193323Sed  // Implements expansion for FNEG; falls back to UnrollVectorOp if FSUB
60193323Sed  // isn't legal.
61221345Sdim  // Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if
62221345Sdim  // SINT_TO_FLOAT and SHR on vectors isn't legal.
63221345Sdim  SDValue ExpandUINT_TO_FLOAT(SDValue Op);
64249423Sdim  // Implement expansion for SIGN_EXTEND_INREG using SRL and SRA.
65249423Sdim  SDValue ExpandSEXTINREG(SDValue Op);
66226633Sdim  // Implement vselect in terms of XOR, AND, OR when blend is not supported
67226633Sdim  // by the target.
68226633Sdim  SDValue ExpandVSELECT(SDValue Op);
69243830Sdim  SDValue ExpandSELECT(SDValue Op);
70234353Sdim  SDValue ExpandLoad(SDValue Op);
71234353Sdim  SDValue ExpandStore(SDValue Op);
72193323Sed  SDValue ExpandFNEG(SDValue Op);
73193323Sed  // Implements vector promotion; this is essentially just bitcasting the
74193323Sed  // operands to a different type and bitcasting the result back to the
75193323Sed  // original type.
76193323Sed  SDValue PromoteVectorOp(SDValue Op);
77239462Sdim  // Implements [SU]INT_TO_FP vector promotion; this is a [zs]ext of the input
78239462Sdim  // operand to the next size up.
79239462Sdim  SDValue PromoteVectorOpINT_TO_FP(SDValue Op);
80193323Sed
81193323Sed  public:
82193323Sed  bool Run();
83193323Sed  VectorLegalizer(SelectionDAG& dag) :
84193323Sed      DAG(dag), TLI(dag.getTargetLoweringInfo()), Changed(false) {}
85193323Sed};
86193323Sed
87193323Sedbool VectorLegalizer::Run() {
88249423Sdim  // Before we start legalizing vector nodes, check if there are any vectors.
89249423Sdim  bool HasVectors = false;
90249423Sdim  for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
91249423Sdim       E = prior(DAG.allnodes_end()); I != llvm::next(E); ++I) {
92249423Sdim    // Check if the values of the nodes contain vectors. We don't need to check
93249423Sdim    // the operands because we are going to check their values at some point.
94249423Sdim    for (SDNode::value_iterator J = I->value_begin(), E = I->value_end();
95249423Sdim         J != E; ++J)
96249423Sdim      HasVectors |= J->isVector();
97249423Sdim
98249423Sdim    // If we found a vector node we can start the legalization.
99249423Sdim    if (HasVectors)
100249423Sdim      break;
101249423Sdim  }
102249423Sdim
103249423Sdim  // If this basic block has no vectors then no need to legalize vectors.
104249423Sdim  if (!HasVectors)
105249423Sdim    return false;
106249423Sdim
107193323Sed  // The legalize process is inherently a bottom-up recursive process (users
108193323Sed  // legalize their uses before themselves).  Given infinite stack space, we
109193323Sed  // could just start legalizing on the root and traverse the whole graph.  In
110193323Sed  // practice however, this causes us to run out of stack space on large basic
111193323Sed  // blocks.  To avoid this problem, compute an ordering of the nodes where each
112193323Sed  // node is only legalized after all of its operands are legalized.
113193323Sed  DAG.AssignTopologicalOrder();
114193323Sed  for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
115200581Srdivacky       E = prior(DAG.allnodes_end()); I != llvm::next(E); ++I)
116193323Sed    LegalizeOp(SDValue(I, 0));
117193323Sed
118193323Sed  // Finally, it's possible the root changed.  Get the new root.
119193323Sed  SDValue OldRoot = DAG.getRoot();
120193323Sed  assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?");
121193323Sed  DAG.setRoot(LegalizedNodes[OldRoot]);
122193323Sed
123193323Sed  LegalizedNodes.clear();
124193323Sed
125193323Sed  // Remove dead nodes now.
126193323Sed  DAG.RemoveDeadNodes();
127193323Sed
128193323Sed  return Changed;
129193323Sed}
130193323Sed
131193323SedSDValue VectorLegalizer::TranslateLegalizeResults(SDValue Op, SDValue Result) {
132193323Sed  // Generic legalization: just pass the operand through.
133193323Sed  for (unsigned i = 0, e = Op.getNode()->getNumValues(); i != e; ++i)
134193323Sed    AddLegalizedOperand(Op.getValue(i), Result.getValue(i));
135193323Sed  return Result.getValue(Op.getResNo());
136193323Sed}
137193323Sed
138193323SedSDValue VectorLegalizer::LegalizeOp(SDValue Op) {
139193323Sed  // Note that LegalizeOp may be reentered even from single-use nodes, which
140193323Sed  // means that we always must cache transformed nodes.
141193323Sed  DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op);
142193323Sed  if (I != LegalizedNodes.end()) return I->second;
143193323Sed
144193323Sed  SDNode* Node = Op.getNode();
145193323Sed
146193323Sed  // Legalize the operands
147193323Sed  SmallVector<SDValue, 8> Ops;
148193323Sed  for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i)
149193323Sed    Ops.push_back(LegalizeOp(Node->getOperand(i)));
150193323Sed
151193323Sed  SDValue Result =
152210299Sed    SDValue(DAG.UpdateNodeOperands(Op.getNode(), Ops.data(), Ops.size()), 0);
153193323Sed
154234353Sdim  if (Op.getOpcode() == ISD::LOAD) {
155234353Sdim    LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
156234353Sdim    ISD::LoadExtType ExtType = LD->getExtensionType();
157234353Sdim    if (LD->getMemoryVT().isVector() && ExtType != ISD::NON_EXTLOAD) {
158234353Sdim      if (TLI.isLoadExtLegal(LD->getExtensionType(), LD->getMemoryVT()))
159234353Sdim        return TranslateLegalizeResults(Op, Result);
160234353Sdim      Changed = true;
161234353Sdim      return LegalizeOp(ExpandLoad(Op));
162234353Sdim    }
163234353Sdim  } else if (Op.getOpcode() == ISD::STORE) {
164234353Sdim    StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
165234353Sdim    EVT StVT = ST->getMemoryVT();
166249423Sdim    MVT ValVT = ST->getValue().getSimpleValueType();
167234353Sdim    if (StVT.isVector() && ST->isTruncatingStore())
168249423Sdim      switch (TLI.getTruncStoreAction(ValVT, StVT.getSimpleVT())) {
169234353Sdim      default: llvm_unreachable("This action is not supported yet!");
170234353Sdim      case TargetLowering::Legal:
171234353Sdim        return TranslateLegalizeResults(Op, Result);
172234353Sdim      case TargetLowering::Custom:
173234353Sdim        Changed = true;
174263508Sdim        return TranslateLegalizeResults(Op, TLI.LowerOperation(Result, DAG));
175234353Sdim      case TargetLowering::Expand:
176234353Sdim        Changed = true;
177234353Sdim        return LegalizeOp(ExpandStore(Op));
178234353Sdim      }
179234353Sdim  }
180234353Sdim
181193323Sed  bool HasVectorValue = false;
182193323Sed  for (SDNode::value_iterator J = Node->value_begin(), E = Node->value_end();
183193323Sed       J != E;
184193323Sed       ++J)
185193323Sed    HasVectorValue |= J->isVector();
186193323Sed  if (!HasVectorValue)
187193323Sed    return TranslateLegalizeResults(Op, Result);
188193323Sed
189198090Srdivacky  EVT QueryType;
190193323Sed  switch (Op.getOpcode()) {
191193323Sed  default:
192193323Sed    return TranslateLegalizeResults(Op, Result);
193193323Sed  case ISD::ADD:
194193323Sed  case ISD::SUB:
195193323Sed  case ISD::MUL:
196193323Sed  case ISD::SDIV:
197193323Sed  case ISD::UDIV:
198193323Sed  case ISD::SREM:
199193323Sed  case ISD::UREM:
200193323Sed  case ISD::FADD:
201193323Sed  case ISD::FSUB:
202193323Sed  case ISD::FMUL:
203193323Sed  case ISD::FDIV:
204193323Sed  case ISD::FREM:
205193323Sed  case ISD::AND:
206193323Sed  case ISD::OR:
207193323Sed  case ISD::XOR:
208193323Sed  case ISD::SHL:
209193323Sed  case ISD::SRA:
210193323Sed  case ISD::SRL:
211193323Sed  case ISD::ROTL:
212193323Sed  case ISD::ROTR:
213266715Sdim  case ISD::BSWAP:
214234353Sdim  case ISD::CTLZ:
215193323Sed  case ISD::CTTZ:
216234353Sdim  case ISD::CTLZ_ZERO_UNDEF:
217234353Sdim  case ISD::CTTZ_ZERO_UNDEF:
218193323Sed  case ISD::CTPOP:
219193323Sed  case ISD::SELECT:
220226633Sdim  case ISD::VSELECT:
221193323Sed  case ISD::SELECT_CC:
222226633Sdim  case ISD::SETCC:
223193323Sed  case ISD::ZERO_EXTEND:
224193323Sed  case ISD::ANY_EXTEND:
225193323Sed  case ISD::TRUNCATE:
226193323Sed  case ISD::SIGN_EXTEND:
227193323Sed  case ISD::FP_TO_SINT:
228193323Sed  case ISD::FP_TO_UINT:
229193323Sed  case ISD::FNEG:
230193323Sed  case ISD::FABS:
231263508Sdim  case ISD::FCOPYSIGN:
232193323Sed  case ISD::FSQRT:
233193323Sed  case ISD::FSIN:
234193323Sed  case ISD::FCOS:
235193323Sed  case ISD::FPOWI:
236193323Sed  case ISD::FPOW:
237193323Sed  case ISD::FLOG:
238193323Sed  case ISD::FLOG2:
239193323Sed  case ISD::FLOG10:
240193323Sed  case ISD::FEXP:
241193323Sed  case ISD::FEXP2:
242193323Sed  case ISD::FCEIL:
243193323Sed  case ISD::FTRUNC:
244193323Sed  case ISD::FRINT:
245193323Sed  case ISD::FNEARBYINT:
246263508Sdim  case ISD::FROUND:
247193323Sed  case ISD::FFLOOR:
248249423Sdim  case ISD::FP_ROUND:
249249423Sdim  case ISD::FP_EXTEND:
250243830Sdim  case ISD::FMA:
251224145Sdim  case ISD::SIGN_EXTEND_INREG:
252193574Sed    QueryType = Node->getValueType(0);
253193323Sed    break;
254202375Srdivacky  case ISD::FP_ROUND_INREG:
255202375Srdivacky    QueryType = cast<VTSDNode>(Node->getOperand(1))->getVT();
256202375Srdivacky    break;
257193574Sed  case ISD::SINT_TO_FP:
258193574Sed  case ISD::UINT_TO_FP:
259193574Sed    QueryType = Node->getOperand(0).getValueType();
260193574Sed    break;
261193323Sed  }
262193323Sed
263193574Sed  switch (TLI.getOperationAction(Node->getOpcode(), QueryType)) {
264193323Sed  case TargetLowering::Promote:
265239462Sdim    switch (Op.getOpcode()) {
266239462Sdim    default:
267239462Sdim      // "Promote" the operation by bitcasting
268239462Sdim      Result = PromoteVectorOp(Op);
269239462Sdim      Changed = true;
270239462Sdim      break;
271239462Sdim    case ISD::SINT_TO_FP:
272239462Sdim    case ISD::UINT_TO_FP:
273239462Sdim      // "Promote" the operation by extending the operand.
274239462Sdim      Result = PromoteVectorOpINT_TO_FP(Op);
275239462Sdim      Changed = true;
276239462Sdim      break;
277239462Sdim    }
278193323Sed    break;
279193323Sed  case TargetLowering::Legal: break;
280193323Sed  case TargetLowering::Custom: {
281193323Sed    SDValue Tmp1 = TLI.LowerOperation(Op, DAG);
282193323Sed    if (Tmp1.getNode()) {
283193323Sed      Result = Tmp1;
284193323Sed      break;
285193323Sed    }
286193323Sed    // FALL THROUGH
287193323Sed  }
288193323Sed  case TargetLowering::Expand:
289249423Sdim    if (Node->getOpcode() == ISD::SIGN_EXTEND_INREG)
290249423Sdim      Result = ExpandSEXTINREG(Op);
291249423Sdim    else if (Node->getOpcode() == ISD::VSELECT)
292226633Sdim      Result = ExpandVSELECT(Op);
293243830Sdim    else if (Node->getOpcode() == ISD::SELECT)
294243830Sdim      Result = ExpandSELECT(Op);
295226633Sdim    else if (Node->getOpcode() == ISD::UINT_TO_FP)
296221345Sdim      Result = ExpandUINT_TO_FLOAT(Op);
297221345Sdim    else if (Node->getOpcode() == ISD::FNEG)
298193323Sed      Result = ExpandFNEG(Op);
299226633Sdim    else if (Node->getOpcode() == ISD::SETCC)
300193323Sed      Result = UnrollVSETCC(Op);
301193323Sed    else
302199989Srdivacky      Result = DAG.UnrollVectorOp(Op.getNode());
303193323Sed    break;
304193323Sed  }
305193323Sed
306193323Sed  // Make sure that the generated code is itself legal.
307193323Sed  if (Result != Op) {
308193323Sed    Result = LegalizeOp(Result);
309193323Sed    Changed = true;
310193323Sed  }
311193323Sed
312193323Sed  // Note that LegalizeOp may be reentered even from single-use nodes, which
313193323Sed  // means that we always must cache transformed nodes.
314193323Sed  AddLegalizedOperand(Op, Result);
315193323Sed  return Result;
316193323Sed}
317193323Sed
318193323SedSDValue VectorLegalizer::PromoteVectorOp(SDValue Op) {
319193323Sed  // Vector "promotion" is basically just bitcasting and doing the operation
320193323Sed  // in a different type.  For example, x86 promotes ISD::AND on v2i32 to
321193323Sed  // v1i64.
322249423Sdim  MVT VT = Op.getSimpleValueType();
323193323Sed  assert(Op.getNode()->getNumValues() == 1 &&
324193323Sed         "Can't promote a vector with multiple results!");
325249423Sdim  MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT);
326263508Sdim  SDLoc dl(Op);
327193323Sed  SmallVector<SDValue, 4> Operands(Op.getNumOperands());
328193323Sed
329193323Sed  for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
330193323Sed    if (Op.getOperand(j).getValueType().isVector())
331218893Sdim      Operands[j] = DAG.getNode(ISD::BITCAST, dl, NVT, Op.getOperand(j));
332193323Sed    else
333193323Sed      Operands[j] = Op.getOperand(j);
334193323Sed  }
335193323Sed
336193323Sed  Op = DAG.getNode(Op.getOpcode(), dl, NVT, &Operands[0], Operands.size());
337193323Sed
338218893Sdim  return DAG.getNode(ISD::BITCAST, dl, VT, Op);
339193323Sed}
340193323Sed
341239462SdimSDValue VectorLegalizer::PromoteVectorOpINT_TO_FP(SDValue Op) {
342239462Sdim  // INT_TO_FP operations may require the input operand be promoted even
343239462Sdim  // when the type is otherwise legal.
344239462Sdim  EVT VT = Op.getOperand(0).getValueType();
345239462Sdim  assert(Op.getNode()->getNumValues() == 1 &&
346239462Sdim         "Can't promote a vector with multiple results!");
347234353Sdim
348239462Sdim  // Normal getTypeToPromoteTo() doesn't work here, as that will promote
349239462Sdim  // by widening the vector w/ the same element width and twice the number
350239462Sdim  // of elements. We want the other way around, the same number of elements,
351239462Sdim  // each twice the width.
352239462Sdim  //
353239462Sdim  // Increase the bitwidth of the element to the next pow-of-two
354239462Sdim  // (which is greater than 8 bits).
355239462Sdim  unsigned NumElts = VT.getVectorNumElements();
356239462Sdim  EVT EltVT = VT.getVectorElementType();
357239462Sdim  EltVT = EVT::getIntegerVT(*DAG.getContext(), 2 * EltVT.getSizeInBits());
358239462Sdim  assert(EltVT.isSimple() && "Promoting to a non-simple vector type!");
359239462Sdim
360239462Sdim  // Build a new vector type and check if it is legal.
361239462Sdim  MVT NVT = MVT::getVectorVT(EltVT.getSimpleVT(), NumElts);
362239462Sdim
363263508Sdim  SDLoc dl(Op);
364239462Sdim  SmallVector<SDValue, 4> Operands(Op.getNumOperands());
365239462Sdim
366239462Sdim  unsigned Opc = Op.getOpcode() == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND :
367239462Sdim    ISD::SIGN_EXTEND;
368239462Sdim  for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
369239462Sdim    if (Op.getOperand(j).getValueType().isVector())
370239462Sdim      Operands[j] = DAG.getNode(Opc, dl, NVT, Op.getOperand(j));
371239462Sdim    else
372239462Sdim      Operands[j] = Op.getOperand(j);
373239462Sdim  }
374239462Sdim
375239462Sdim  return DAG.getNode(Op.getOpcode(), dl, Op.getValueType(), &Operands[0],
376239462Sdim                     Operands.size());
377239462Sdim}
378239462Sdim
379239462Sdim
380234353SdimSDValue VectorLegalizer::ExpandLoad(SDValue Op) {
381263508Sdim  SDLoc dl(Op);
382234353Sdim  LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
383234353Sdim  SDValue Chain = LD->getChain();
384234353Sdim  SDValue BasePTR = LD->getBasePtr();
385234353Sdim  EVT SrcVT = LD->getMemoryVT();
386234353Sdim  ISD::LoadExtType ExtType = LD->getExtensionType();
387234353Sdim
388249423Sdim  SmallVector<SDValue, 8> Vals;
389234353Sdim  SmallVector<SDValue, 8> LoadChains;
390234353Sdim  unsigned NumElem = SrcVT.getVectorNumElements();
391234353Sdim
392249423Sdim  EVT SrcEltVT = SrcVT.getScalarType();
393249423Sdim  EVT DstEltVT = Op.getNode()->getValueType(0).getScalarType();
394234353Sdim
395249423Sdim  if (SrcVT.getVectorNumElements() > 1 && !SrcEltVT.isByteSized()) {
396249423Sdim    // When elements in a vector is not byte-addressable, we cannot directly
397249423Sdim    // load each element by advancing pointer, which could only address bytes.
398249423Sdim    // Instead, we load all significant words, mask bits off, and concatenate
399249423Sdim    // them to form each element. Finally, they are extended to destination
400249423Sdim    // scalar type to build the destination vector.
401249423Sdim    EVT WideVT = TLI.getPointerTy();
402234353Sdim
403249423Sdim    assert(WideVT.isRound() &&
404249423Sdim           "Could not handle the sophisticated case when the widest integer is"
405249423Sdim           " not power of 2.");
406249423Sdim    assert(WideVT.bitsGE(SrcEltVT) &&
407249423Sdim           "Type is not legalized?");
408249423Sdim
409249423Sdim    unsigned WideBytes = WideVT.getStoreSize();
410249423Sdim    unsigned Offset = 0;
411249423Sdim    unsigned RemainingBytes = SrcVT.getStoreSize();
412249423Sdim    SmallVector<SDValue, 8> LoadVals;
413249423Sdim
414249423Sdim    while (RemainingBytes > 0) {
415249423Sdim      SDValue ScalarLoad;
416249423Sdim      unsigned LoadBytes = WideBytes;
417249423Sdim
418249423Sdim      if (RemainingBytes >= LoadBytes) {
419249423Sdim        ScalarLoad = DAG.getLoad(WideVT, dl, Chain, BasePTR,
420249423Sdim                                 LD->getPointerInfo().getWithOffset(Offset),
421249423Sdim                                 LD->isVolatile(), LD->isNonTemporal(),
422263508Sdim                                 LD->isInvariant(), LD->getAlignment(),
423263508Sdim                                 LD->getTBAAInfo());
424249423Sdim      } else {
425249423Sdim        EVT LoadVT = WideVT;
426249423Sdim        while (RemainingBytes < LoadBytes) {
427249423Sdim          LoadBytes >>= 1; // Reduce the load size by half.
428249423Sdim          LoadVT = EVT::getIntegerVT(*DAG.getContext(), LoadBytes << 3);
429249423Sdim        }
430249423Sdim        ScalarLoad = DAG.getExtLoad(ISD::EXTLOAD, dl, WideVT, Chain, BasePTR,
431249423Sdim                                    LD->getPointerInfo().getWithOffset(Offset),
432249423Sdim                                    LoadVT, LD->isVolatile(),
433263508Sdim                                    LD->isNonTemporal(), LD->getAlignment(),
434263508Sdim                                    LD->getTBAAInfo());
435249423Sdim      }
436249423Sdim
437249423Sdim      RemainingBytes -= LoadBytes;
438249423Sdim      Offset += LoadBytes;
439249423Sdim      BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR,
440263508Sdim                            DAG.getConstant(LoadBytes, BasePTR.getValueType()));
441249423Sdim
442249423Sdim      LoadVals.push_back(ScalarLoad.getValue(0));
443249423Sdim      LoadChains.push_back(ScalarLoad.getValue(1));
444249423Sdim    }
445249423Sdim
446249423Sdim    // Extract bits, pack and extend/trunc them into destination type.
447249423Sdim    unsigned SrcEltBits = SrcEltVT.getSizeInBits();
448249423Sdim    SDValue SrcEltBitMask = DAG.getConstant((1U << SrcEltBits) - 1, WideVT);
449249423Sdim
450249423Sdim    unsigned BitOffset = 0;
451249423Sdim    unsigned WideIdx = 0;
452249423Sdim    unsigned WideBits = WideVT.getSizeInBits();
453249423Sdim
454249423Sdim    for (unsigned Idx = 0; Idx != NumElem; ++Idx) {
455249423Sdim      SDValue Lo, Hi, ShAmt;
456249423Sdim
457249423Sdim      if (BitOffset < WideBits) {
458249423Sdim        ShAmt = DAG.getConstant(BitOffset, TLI.getShiftAmountTy(WideVT));
459249423Sdim        Lo = DAG.getNode(ISD::SRL, dl, WideVT, LoadVals[WideIdx], ShAmt);
460249423Sdim        Lo = DAG.getNode(ISD::AND, dl, WideVT, Lo, SrcEltBitMask);
461249423Sdim      }
462249423Sdim
463249423Sdim      BitOffset += SrcEltBits;
464249423Sdim      if (BitOffset >= WideBits) {
465249423Sdim        WideIdx++;
466249423Sdim        Offset -= WideBits;
467249423Sdim        if (Offset > 0) {
468249423Sdim          ShAmt = DAG.getConstant(SrcEltBits - Offset,
469249423Sdim                                  TLI.getShiftAmountTy(WideVT));
470249423Sdim          Hi = DAG.getNode(ISD::SHL, dl, WideVT, LoadVals[WideIdx], ShAmt);
471249423Sdim          Hi = DAG.getNode(ISD::AND, dl, WideVT, Hi, SrcEltBitMask);
472249423Sdim        }
473249423Sdim      }
474249423Sdim
475249423Sdim      if (Hi.getNode())
476249423Sdim        Lo = DAG.getNode(ISD::OR, dl, WideVT, Lo, Hi);
477249423Sdim
478249423Sdim      switch (ExtType) {
479249423Sdim      default: llvm_unreachable("Unknown extended-load op!");
480249423Sdim      case ISD::EXTLOAD:
481249423Sdim        Lo = DAG.getAnyExtOrTrunc(Lo, dl, DstEltVT);
482249423Sdim        break;
483249423Sdim      case ISD::ZEXTLOAD:
484249423Sdim        Lo = DAG.getZExtOrTrunc(Lo, dl, DstEltVT);
485249423Sdim        break;
486249423Sdim      case ISD::SEXTLOAD:
487249423Sdim        ShAmt = DAG.getConstant(WideBits - SrcEltBits,
488249423Sdim                                TLI.getShiftAmountTy(WideVT));
489249423Sdim        Lo = DAG.getNode(ISD::SHL, dl, WideVT, Lo, ShAmt);
490249423Sdim        Lo = DAG.getNode(ISD::SRA, dl, WideVT, Lo, ShAmt);
491249423Sdim        Lo = DAG.getSExtOrTrunc(Lo, dl, DstEltVT);
492249423Sdim        break;
493249423Sdim      }
494249423Sdim      Vals.push_back(Lo);
495249423Sdim    }
496249423Sdim  } else {
497249423Sdim    unsigned Stride = SrcVT.getScalarType().getSizeInBits()/8;
498249423Sdim
499249423Sdim    for (unsigned Idx=0; Idx<NumElem; Idx++) {
500249423Sdim      SDValue ScalarLoad = DAG.getExtLoad(ExtType, dl,
501249423Sdim                Op.getNode()->getValueType(0).getScalarType(),
502249423Sdim                Chain, BasePTR, LD->getPointerInfo().getWithOffset(Idx * Stride),
503249423Sdim                SrcVT.getScalarType(),
504249423Sdim                LD->isVolatile(), LD->isNonTemporal(),
505263508Sdim                LD->getAlignment(), LD->getTBAAInfo());
506249423Sdim
507249423Sdim      BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR,
508263508Sdim                         DAG.getConstant(Stride, BasePTR.getValueType()));
509249423Sdim
510249423Sdim      Vals.push_back(ScalarLoad.getValue(0));
511249423Sdim      LoadChains.push_back(ScalarLoad.getValue(1));
512249423Sdim    }
513234353Sdim  }
514234353Sdim
515234353Sdim  SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
516234353Sdim            &LoadChains[0], LoadChains.size());
517234353Sdim  SDValue Value = DAG.getNode(ISD::BUILD_VECTOR, dl,
518249423Sdim            Op.getNode()->getValueType(0), &Vals[0], Vals.size());
519234353Sdim
520234353Sdim  AddLegalizedOperand(Op.getValue(0), Value);
521234353Sdim  AddLegalizedOperand(Op.getValue(1), NewChain);
522234353Sdim
523234353Sdim  return (Op.getResNo() ? NewChain : Value);
524234353Sdim}
525234353Sdim
526234353SdimSDValue VectorLegalizer::ExpandStore(SDValue Op) {
527263508Sdim  SDLoc dl(Op);
528234353Sdim  StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
529234353Sdim  SDValue Chain = ST->getChain();
530234353Sdim  SDValue BasePTR = ST->getBasePtr();
531234353Sdim  SDValue Value = ST->getValue();
532234353Sdim  EVT StVT = ST->getMemoryVT();
533234353Sdim
534234353Sdim  unsigned Alignment = ST->getAlignment();
535234353Sdim  bool isVolatile = ST->isVolatile();
536234353Sdim  bool isNonTemporal = ST->isNonTemporal();
537263508Sdim  const MDNode *TBAAInfo = ST->getTBAAInfo();
538234353Sdim
539234353Sdim  unsigned NumElem = StVT.getVectorNumElements();
540234353Sdim  // The type of the data we want to save
541234353Sdim  EVT RegVT = Value.getValueType();
542234353Sdim  EVT RegSclVT = RegVT.getScalarType();
543234353Sdim  // The type of data as saved in memory.
544234353Sdim  EVT MemSclVT = StVT.getScalarType();
545234353Sdim
546234353Sdim  // Cast floats into integers
547234353Sdim  unsigned ScalarSize = MemSclVT.getSizeInBits();
548234353Sdim
549234353Sdim  // Round odd types to the next pow of two.
550234353Sdim  if (!isPowerOf2_32(ScalarSize))
551234353Sdim    ScalarSize = NextPowerOf2(ScalarSize);
552234353Sdim
553234353Sdim  // Store Stride in bytes
554234353Sdim  unsigned Stride = ScalarSize/8;
555234353Sdim  // Extract each of the elements from the original vector
556234353Sdim  // and save them into memory individually.
557234353Sdim  SmallVector<SDValue, 8> Stores;
558234353Sdim  for (unsigned Idx = 0; Idx < NumElem; Idx++) {
559234353Sdim    SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
560263508Sdim               RegSclVT, Value, DAG.getConstant(Idx, TLI.getVectorIdxTy()));
561234353Sdim
562234353Sdim    // This scalar TruncStore may be illegal, but we legalize it later.
563234353Sdim    SDValue Store = DAG.getTruncStore(Chain, dl, Ex, BasePTR,
564234353Sdim               ST->getPointerInfo().getWithOffset(Idx*Stride), MemSclVT,
565263508Sdim               isVolatile, isNonTemporal, Alignment, TBAAInfo);
566234353Sdim
567234353Sdim    BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR,
568263508Sdim                               DAG.getConstant(Stride, BasePTR.getValueType()));
569234353Sdim
570234353Sdim    Stores.push_back(Store);
571234353Sdim  }
572234353Sdim  SDValue TF =  DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
573234353Sdim                            &Stores[0], Stores.size());
574234353Sdim  AddLegalizedOperand(Op, TF);
575234353Sdim  return TF;
576234353Sdim}
577234353Sdim
578243830SdimSDValue VectorLegalizer::ExpandSELECT(SDValue Op) {
579243830Sdim  // Lower a select instruction where the condition is a scalar and the
580243830Sdim  // operands are vectors. Lower this select to VSELECT and implement it
581263508Sdim  // using XOR AND OR. The selector bit is broadcasted.
582243830Sdim  EVT VT = Op.getValueType();
583263508Sdim  SDLoc DL(Op);
584243830Sdim
585243830Sdim  SDValue Mask = Op.getOperand(0);
586243830Sdim  SDValue Op1 = Op.getOperand(1);
587243830Sdim  SDValue Op2 = Op.getOperand(2);
588243830Sdim
589243830Sdim  assert(VT.isVector() && !Mask.getValueType().isVector()
590243830Sdim         && Op1.getValueType() == Op2.getValueType() && "Invalid type");
591243830Sdim
592243830Sdim  unsigned NumElem = VT.getVectorNumElements();
593243830Sdim
594243830Sdim  // If we can't even use the basic vector operations of
595243830Sdim  // AND,OR,XOR, we will have to scalarize the op.
596243830Sdim  // Notice that the operation may be 'promoted' which means that it is
597243830Sdim  // 'bitcasted' to another type which is handled.
598243830Sdim  // Also, we need to be able to construct a splat vector using BUILD_VECTOR.
599243830Sdim  if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand ||
600243830Sdim      TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand ||
601243830Sdim      TLI.getOperationAction(ISD::OR,  VT) == TargetLowering::Expand ||
602243830Sdim      TLI.getOperationAction(ISD::BUILD_VECTOR,  VT) == TargetLowering::Expand)
603243830Sdim    return DAG.UnrollVectorOp(Op.getNode());
604243830Sdim
605243830Sdim  // Generate a mask operand.
606263508Sdim  EVT MaskTy = VT.changeVectorElementTypeToInteger();
607243830Sdim
608243830Sdim  // What is the size of each element in the vector mask.
609243830Sdim  EVT BitTy = MaskTy.getScalarType();
610243830Sdim
611263508Sdim  Mask = DAG.getSelect(DL, BitTy, Mask,
612243830Sdim          DAG.getConstant(APInt::getAllOnesValue(BitTy.getSizeInBits()), BitTy),
613243830Sdim          DAG.getConstant(0, BitTy));
614243830Sdim
615243830Sdim  // Broadcast the mask so that the entire vector is all-one or all zero.
616243830Sdim  SmallVector<SDValue, 8> Ops(NumElem, Mask);
617243830Sdim  Mask = DAG.getNode(ISD::BUILD_VECTOR, DL, MaskTy, &Ops[0], Ops.size());
618243830Sdim
619243830Sdim  // Bitcast the operands to be the same type as the mask.
620243830Sdim  // This is needed when we select between FP types because
621243830Sdim  // the mask is a vector of integers.
622243830Sdim  Op1 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op1);
623243830Sdim  Op2 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op2);
624243830Sdim
625243830Sdim  SDValue AllOnes = DAG.getConstant(
626243830Sdim            APInt::getAllOnesValue(BitTy.getSizeInBits()), MaskTy);
627243830Sdim  SDValue NotMask = DAG.getNode(ISD::XOR, DL, MaskTy, Mask, AllOnes);
628243830Sdim
629243830Sdim  Op1 = DAG.getNode(ISD::AND, DL, MaskTy, Op1, Mask);
630243830Sdim  Op2 = DAG.getNode(ISD::AND, DL, MaskTy, Op2, NotMask);
631243830Sdim  SDValue Val = DAG.getNode(ISD::OR, DL, MaskTy, Op1, Op2);
632243830Sdim  return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Val);
633243830Sdim}
634243830Sdim
635249423SdimSDValue VectorLegalizer::ExpandSEXTINREG(SDValue Op) {
636249423Sdim  EVT VT = Op.getValueType();
637249423Sdim
638249423Sdim  // Make sure that the SRA and SHL instructions are available.
639249423Sdim  if (TLI.getOperationAction(ISD::SRA, VT) == TargetLowering::Expand ||
640249423Sdim      TLI.getOperationAction(ISD::SHL, VT) == TargetLowering::Expand)
641249423Sdim    return DAG.UnrollVectorOp(Op.getNode());
642249423Sdim
643263508Sdim  SDLoc DL(Op);
644249423Sdim  EVT OrigTy = cast<VTSDNode>(Op->getOperand(1))->getVT();
645249423Sdim
646249423Sdim  unsigned BW = VT.getScalarType().getSizeInBits();
647249423Sdim  unsigned OrigBW = OrigTy.getScalarType().getSizeInBits();
648249423Sdim  SDValue ShiftSz = DAG.getConstant(BW - OrigBW, VT);
649249423Sdim
650249423Sdim  Op = Op.getOperand(0);
651249423Sdim  Op =   DAG.getNode(ISD::SHL, DL, VT, Op, ShiftSz);
652249423Sdim  return DAG.getNode(ISD::SRA, DL, VT, Op, ShiftSz);
653249423Sdim}
654249423Sdim
655226633SdimSDValue VectorLegalizer::ExpandVSELECT(SDValue Op) {
656226633Sdim  // Implement VSELECT in terms of XOR, AND, OR
657226633Sdim  // on platforms which do not support blend natively.
658263508Sdim  SDLoc DL(Op);
659221345Sdim
660226633Sdim  SDValue Mask = Op.getOperand(0);
661226633Sdim  SDValue Op1 = Op.getOperand(1);
662226633Sdim  SDValue Op2 = Op.getOperand(2);
663221345Sdim
664263508Sdim  EVT VT = Mask.getValueType();
665263508Sdim
666226633Sdim  // If we can't even use the basic vector operations of
667226633Sdim  // AND,OR,XOR, we will have to scalarize the op.
668234353Sdim  // Notice that the operation may be 'promoted' which means that it is
669234353Sdim  // 'bitcasted' to another type which is handled.
670243830Sdim  // This operation also isn't safe with AND, OR, XOR when the boolean
671243830Sdim  // type is 0/1 as we need an all ones vector constant to mask with.
672243830Sdim  // FIXME: Sign extend 1 to all ones if thats legal on the target.
673234353Sdim  if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand ||
674234353Sdim      TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand ||
675243830Sdim      TLI.getOperationAction(ISD::OR,  VT) == TargetLowering::Expand ||
676243830Sdim      TLI.getBooleanContents(true) !=
677243830Sdim      TargetLowering::ZeroOrNegativeOneBooleanContent)
678234353Sdim    return DAG.UnrollVectorOp(Op.getNode());
679226633Sdim
680263508Sdim  // If the mask and the type are different sizes, unroll the vector op. This
681263508Sdim  // can occur when getSetCCResultType returns something that is different in
682263508Sdim  // size from the operand types. For example, v4i8 = select v4i32, v4i8, v4i8.
683263508Sdim  if (VT.getSizeInBits() != Op1.getValueType().getSizeInBits())
684263508Sdim    return DAG.UnrollVectorOp(Op.getNode());
685263508Sdim
686226633Sdim  // Bitcast the operands to be the same type as the mask.
687226633Sdim  // This is needed when we select between FP types because
688226633Sdim  // the mask is a vector of integers.
689226633Sdim  Op1 = DAG.getNode(ISD::BITCAST, DL, VT, Op1);
690226633Sdim  Op2 = DAG.getNode(ISD::BITCAST, DL, VT, Op2);
691226633Sdim
692226633Sdim  SDValue AllOnes = DAG.getConstant(
693226633Sdim    APInt::getAllOnesValue(VT.getScalarType().getSizeInBits()), VT);
694226633Sdim  SDValue NotMask = DAG.getNode(ISD::XOR, DL, VT, Mask, AllOnes);
695226633Sdim
696226633Sdim  Op1 = DAG.getNode(ISD::AND, DL, VT, Op1, Mask);
697226633Sdim  Op2 = DAG.getNode(ISD::AND, DL, VT, Op2, NotMask);
698234982Sdim  SDValue Val = DAG.getNode(ISD::OR, DL, VT, Op1, Op2);
699234982Sdim  return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Val);
700226633Sdim}
701226633Sdim
702226633SdimSDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) {
703221345Sdim  EVT VT = Op.getOperand(0).getValueType();
704263508Sdim  SDLoc DL(Op);
705221345Sdim
706221345Sdim  // Make sure that the SINT_TO_FP and SRL instructions are available.
707234353Sdim  if (TLI.getOperationAction(ISD::SINT_TO_FP, VT) == TargetLowering::Expand ||
708234353Sdim      TLI.getOperationAction(ISD::SRL,        VT) == TargetLowering::Expand)
709234353Sdim    return DAG.UnrollVectorOp(Op.getNode());
710221345Sdim
711221345Sdim EVT SVT = VT.getScalarType();
712221345Sdim  assert((SVT.getSizeInBits() == 64 || SVT.getSizeInBits() == 32) &&
713221345Sdim      "Elements in vector-UINT_TO_FP must be 32 or 64 bits wide");
714221345Sdim
715221345Sdim  unsigned BW = SVT.getSizeInBits();
716221345Sdim  SDValue HalfWord = DAG.getConstant(BW/2, VT);
717221345Sdim
718221345Sdim  // Constants to clear the upper part of the word.
719221345Sdim  // Notice that we can also use SHL+SHR, but using a constant is slightly
720221345Sdim  // faster on x86.
721221345Sdim  uint64_t HWMask = (SVT.getSizeInBits()==64)?0x00000000FFFFFFFF:0x0000FFFF;
722221345Sdim  SDValue HalfWordMask = DAG.getConstant(HWMask, VT);
723221345Sdim
724221345Sdim  // Two to the power of half-word-size.
725221345Sdim  SDValue TWOHW = DAG.getConstantFP((1<<(BW/2)), Op.getValueType());
726221345Sdim
727221345Sdim  // Clear upper part of LO, lower HI
728221345Sdim  SDValue HI = DAG.getNode(ISD::SRL, DL, VT, Op.getOperand(0), HalfWord);
729221345Sdim  SDValue LO = DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), HalfWordMask);
730221345Sdim
731221345Sdim  // Convert hi and lo to floats
732221345Sdim  // Convert the hi part back to the upper values
733221345Sdim  SDValue fHI = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), HI);
734221345Sdim          fHI = DAG.getNode(ISD::FMUL, DL, Op.getValueType(), fHI, TWOHW);
735221345Sdim  SDValue fLO = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), LO);
736221345Sdim
737221345Sdim  // Add the two halves
738221345Sdim  return DAG.getNode(ISD::FADD, DL, Op.getValueType(), fHI, fLO);
739221345Sdim}
740221345Sdim
741221345Sdim
742193323SedSDValue VectorLegalizer::ExpandFNEG(SDValue Op) {
743193323Sed  if (TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) {
744193323Sed    SDValue Zero = DAG.getConstantFP(-0.0, Op.getValueType());
745263508Sdim    return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
746193323Sed                       Zero, Op.getOperand(0));
747193323Sed  }
748199989Srdivacky  return DAG.UnrollVectorOp(Op.getNode());
749193323Sed}
750193323Sed
751193323SedSDValue VectorLegalizer::UnrollVSETCC(SDValue Op) {
752198090Srdivacky  EVT VT = Op.getValueType();
753193323Sed  unsigned NumElems = VT.getVectorNumElements();
754198090Srdivacky  EVT EltVT = VT.getVectorElementType();
755193323Sed  SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1), CC = Op.getOperand(2);
756198090Srdivacky  EVT TmpEltVT = LHS.getValueType().getVectorElementType();
757263508Sdim  SDLoc dl(Op);
758193323Sed  SmallVector<SDValue, 8> Ops(NumElems);
759193323Sed  for (unsigned i = 0; i < NumElems; ++i) {
760193323Sed    SDValue LHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS,
761263508Sdim                                  DAG.getConstant(i, TLI.getVectorIdxTy()));
762193323Sed    SDValue RHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS,
763263508Sdim                                  DAG.getConstant(i, TLI.getVectorIdxTy()));
764263508Sdim    Ops[i] = DAG.getNode(ISD::SETCC, dl,
765263508Sdim                         TLI.getSetCCResultType(*DAG.getContext(), TmpEltVT),
766193323Sed                         LHSElem, RHSElem, CC);
767263508Sdim    Ops[i] = DAG.getSelect(dl, EltVT, Ops[i],
768263508Sdim                           DAG.getConstant(APInt::getAllOnesValue
769263508Sdim                                           (EltVT.getSizeInBits()), EltVT),
770263508Sdim                           DAG.getConstant(0, EltVT));
771193323Sed  }
772193323Sed  return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElems);
773193323Sed}
774193323Sed
775193323Sed}
776193323Sed
777193323Sedbool SelectionDAG::LegalizeVectors() {
778193323Sed  return VectorLegalizer(*this).Run();
779193323Sed}
780