1259698Sdim//===-- PPCFastISel.cpp - PowerPC FastISel implementation -----------------===//
2259698Sdim//
3259698Sdim//                     The LLVM Compiler Infrastructure
4259698Sdim//
5259698Sdim// This file is distributed under the University of Illinois Open Source
6259698Sdim// License. See LICENSE.TXT for details.
7259698Sdim//
8259698Sdim//===----------------------------------------------------------------------===//
9259698Sdim//
10259698Sdim// This file defines the PowerPC-specific support for the FastISel class. Some
11259698Sdim// of the target-specific code is generated by tablegen in the file
12259698Sdim// PPCGenFastISel.inc, which is #included here.
13259698Sdim//
14259698Sdim//===----------------------------------------------------------------------===//
15259698Sdim
16259698Sdim#define DEBUG_TYPE "ppcfastisel"
17259698Sdim#include "PPC.h"
18259698Sdim#include "PPCISelLowering.h"
19259698Sdim#include "PPCSubtarget.h"
20259698Sdim#include "PPCTargetMachine.h"
21259698Sdim#include "MCTargetDesc/PPCPredicates.h"
22259698Sdim#include "llvm/ADT/Optional.h"
23259698Sdim#include "llvm/CodeGen/CallingConvLower.h"
24259698Sdim#include "llvm/CodeGen/FastISel.h"
25259698Sdim#include "llvm/CodeGen/FunctionLoweringInfo.h"
26259698Sdim#include "llvm/CodeGen/MachineConstantPool.h"
27259698Sdim#include "llvm/CodeGen/MachineFrameInfo.h"
28259698Sdim#include "llvm/CodeGen/MachineInstrBuilder.h"
29259698Sdim#include "llvm/CodeGen/MachineRegisterInfo.h"
30259698Sdim#include "llvm/IR/CallingConv.h"
31259698Sdim#include "llvm/IR/GlobalAlias.h"
32259698Sdim#include "llvm/IR/GlobalVariable.h"
33259698Sdim#include "llvm/IR/IntrinsicInst.h"
34259698Sdim#include "llvm/IR/Operator.h"
35259698Sdim#include "llvm/Support/Debug.h"
36259698Sdim#include "llvm/Support/GetElementPtrTypeIterator.h"
37259698Sdim#include "llvm/Target/TargetLowering.h"
38259698Sdim#include "llvm/Target/TargetMachine.h"
39259698Sdim
40259698Sdim//===----------------------------------------------------------------------===//
41259698Sdim//
42259698Sdim// TBD:
43259698Sdim//   FastLowerArguments: Handle simple cases.
44259698Sdim//   PPCMaterializeGV: Handle TLS.
45259698Sdim//   SelectCall: Handle function pointers.
46259698Sdim//   SelectCall: Handle multi-register return values.
47259698Sdim//   SelectCall: Optimize away nops for local calls.
48259698Sdim//   processCallArgs: Handle bit-converted arguments.
49259698Sdim//   finishCall: Handle multi-register return values.
50259698Sdim//   PPCComputeAddress: Handle parameter references as FrameIndex's.
51259698Sdim//   PPCEmitCmp: Handle immediate as operand 1.
52259698Sdim//   SelectCall: Handle small byval arguments.
53259698Sdim//   SelectIntrinsicCall: Implement.
54259698Sdim//   SelectSelect: Implement.
55259698Sdim//   Consider factoring isTypeLegal into the base class.
56259698Sdim//   Implement switches and jump tables.
57259698Sdim//
58259698Sdim//===----------------------------------------------------------------------===//
59259698Sdimusing namespace llvm;
60259698Sdim
61259698Sdimnamespace {
62259698Sdim
63259698Sdimtypedef struct Address {
64259698Sdim  enum {
65259698Sdim    RegBase,
66259698Sdim    FrameIndexBase
67259698Sdim  } BaseType;
68259698Sdim
69259698Sdim  union {
70259698Sdim    unsigned Reg;
71259698Sdim    int FI;
72259698Sdim  } Base;
73259698Sdim
74259698Sdim  long Offset;
75259698Sdim
76259698Sdim  // Innocuous defaults for our address.
77259698Sdim  Address()
78259698Sdim   : BaseType(RegBase), Offset(0) {
79259698Sdim     Base.Reg = 0;
80259698Sdim   }
81259698Sdim} Address;
82259698Sdim
83259698Sdimclass PPCFastISel : public FastISel {
84259698Sdim
85259698Sdim  const TargetMachine &TM;
86259698Sdim  const TargetInstrInfo &TII;
87259698Sdim  const TargetLowering &TLI;
88259698Sdim  const PPCSubtarget &PPCSubTarget;
89259698Sdim  LLVMContext *Context;
90259698Sdim
91259698Sdim  public:
92259698Sdim    explicit PPCFastISel(FunctionLoweringInfo &FuncInfo,
93259698Sdim                         const TargetLibraryInfo *LibInfo)
94259698Sdim    : FastISel(FuncInfo, LibInfo),
95259698Sdim      TM(FuncInfo.MF->getTarget()),
96259698Sdim      TII(*TM.getInstrInfo()),
97259698Sdim      TLI(*TM.getTargetLowering()),
98259698Sdim      PPCSubTarget(
99259698Sdim       *((static_cast<const PPCTargetMachine *>(&TM))->getSubtargetImpl())
100259698Sdim      ),
101259698Sdim      Context(&FuncInfo.Fn->getContext()) { }
102259698Sdim
103259698Sdim  // Backend specific FastISel code.
104259698Sdim  private:
105259698Sdim    virtual bool TargetSelectInstruction(const Instruction *I);
106259698Sdim    virtual unsigned TargetMaterializeConstant(const Constant *C);
107259698Sdim    virtual unsigned TargetMaterializeAlloca(const AllocaInst *AI);
108259698Sdim    virtual bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
109259698Sdim                                     const LoadInst *LI);
110259698Sdim    virtual bool FastLowerArguments();
111259698Sdim    virtual unsigned FastEmit_i(MVT Ty, MVT RetTy, unsigned Opc, uint64_t Imm);
112259698Sdim    virtual unsigned FastEmitInst_ri(unsigned MachineInstOpcode,
113259698Sdim                                     const TargetRegisterClass *RC,
114259698Sdim                                     unsigned Op0, bool Op0IsKill,
115259698Sdim                                     uint64_t Imm);
116259698Sdim    virtual unsigned FastEmitInst_r(unsigned MachineInstOpcode,
117259698Sdim                                    const TargetRegisterClass *RC,
118259698Sdim                                    unsigned Op0, bool Op0IsKill);
119259698Sdim    virtual unsigned FastEmitInst_rr(unsigned MachineInstOpcode,
120259698Sdim                                     const TargetRegisterClass *RC,
121259698Sdim                                     unsigned Op0, bool Op0IsKill,
122259698Sdim                                     unsigned Op1, bool Op1IsKill);
123259698Sdim
124259698Sdim  // Instruction selection routines.
125259698Sdim  private:
126259698Sdim    bool SelectLoad(const Instruction *I);
127259698Sdim    bool SelectStore(const Instruction *I);
128259698Sdim    bool SelectBranch(const Instruction *I);
129259698Sdim    bool SelectIndirectBr(const Instruction *I);
130259698Sdim    bool SelectCmp(const Instruction *I);
131259698Sdim    bool SelectFPExt(const Instruction *I);
132259698Sdim    bool SelectFPTrunc(const Instruction *I);
133259698Sdim    bool SelectIToFP(const Instruction *I, bool IsSigned);
134259698Sdim    bool SelectFPToI(const Instruction *I, bool IsSigned);
135259698Sdim    bool SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode);
136259698Sdim    bool SelectCall(const Instruction *I);
137259698Sdim    bool SelectRet(const Instruction *I);
138259698Sdim    bool SelectTrunc(const Instruction *I);
139259698Sdim    bool SelectIntExt(const Instruction *I);
140259698Sdim
141259698Sdim  // Utility routines.
142259698Sdim  private:
143259698Sdim    bool isTypeLegal(Type *Ty, MVT &VT);
144259698Sdim    bool isLoadTypeLegal(Type *Ty, MVT &VT);
145259698Sdim    bool PPCEmitCmp(const Value *Src1Value, const Value *Src2Value,
146259698Sdim                    bool isZExt, unsigned DestReg);
147259698Sdim    bool PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
148259698Sdim                     const TargetRegisterClass *RC, bool IsZExt = true,
149259698Sdim                     unsigned FP64LoadOpc = PPC::LFD);
150259698Sdim    bool PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr);
151259698Sdim    bool PPCComputeAddress(const Value *Obj, Address &Addr);
152259698Sdim    void PPCSimplifyAddress(Address &Addr, MVT VT, bool &UseOffset,
153259698Sdim                            unsigned &IndexReg);
154259698Sdim    bool PPCEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
155259698Sdim                           unsigned DestReg, bool IsZExt);
156259698Sdim    unsigned PPCMaterializeFP(const ConstantFP *CFP, MVT VT);
157259698Sdim    unsigned PPCMaterializeGV(const GlobalValue *GV, MVT VT);
158259698Sdim    unsigned PPCMaterializeInt(const Constant *C, MVT VT);
159259698Sdim    unsigned PPCMaterialize32BitInt(int64_t Imm,
160259698Sdim                                    const TargetRegisterClass *RC);
161259698Sdim    unsigned PPCMaterialize64BitInt(int64_t Imm,
162259698Sdim                                    const TargetRegisterClass *RC);
163259698Sdim    unsigned PPCMoveToIntReg(const Instruction *I, MVT VT,
164259698Sdim                             unsigned SrcReg, bool IsSigned);
165259698Sdim    unsigned PPCMoveToFPReg(MVT VT, unsigned SrcReg, bool IsSigned);
166259698Sdim
167259698Sdim  // Call handling routines.
168259698Sdim  private:
169259698Sdim    bool processCallArgs(SmallVectorImpl<Value*> &Args,
170259698Sdim                         SmallVectorImpl<unsigned> &ArgRegs,
171259698Sdim                         SmallVectorImpl<MVT> &ArgVTs,
172259698Sdim                         SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
173259698Sdim                         SmallVectorImpl<unsigned> &RegArgs,
174259698Sdim                         CallingConv::ID CC,
175259698Sdim                         unsigned &NumBytes,
176259698Sdim                         bool IsVarArg);
177259698Sdim    void finishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
178259698Sdim                    const Instruction *I, CallingConv::ID CC,
179259698Sdim                    unsigned &NumBytes, bool IsVarArg);
180259698Sdim    CCAssignFn *usePPC32CCs(unsigned Flag);
181259698Sdim
182259698Sdim  private:
183259698Sdim  #include "PPCGenFastISel.inc"
184259698Sdim
185259698Sdim};
186259698Sdim
187259698Sdim} // end anonymous namespace
188259698Sdim
189259698Sdim#include "PPCGenCallingConv.inc"
190259698Sdim
191259698Sdim// Function whose sole purpose is to kill compiler warnings
192259698Sdim// stemming from unused functions included from PPCGenCallingConv.inc.
193259698SdimCCAssignFn *PPCFastISel::usePPC32CCs(unsigned Flag) {
194259698Sdim  if (Flag == 1)
195259698Sdim    return CC_PPC32_SVR4;
196259698Sdim  else if (Flag == 2)
197259698Sdim    return CC_PPC32_SVR4_ByVal;
198259698Sdim  else if (Flag == 3)
199259698Sdim    return CC_PPC32_SVR4_VarArg;
200259698Sdim  else
201259698Sdim    return RetCC_PPC;
202259698Sdim}
203259698Sdim
204259698Sdimstatic Optional<PPC::Predicate> getComparePred(CmpInst::Predicate Pred) {
205259698Sdim  switch (Pred) {
206259698Sdim    // These are not representable with any single compare.
207259698Sdim    case CmpInst::FCMP_FALSE:
208259698Sdim    case CmpInst::FCMP_UEQ:
209259698Sdim    case CmpInst::FCMP_UGT:
210259698Sdim    case CmpInst::FCMP_UGE:
211259698Sdim    case CmpInst::FCMP_ULT:
212259698Sdim    case CmpInst::FCMP_ULE:
213259698Sdim    case CmpInst::FCMP_UNE:
214259698Sdim    case CmpInst::FCMP_TRUE:
215259698Sdim    default:
216259698Sdim      return Optional<PPC::Predicate>();
217259698Sdim
218259698Sdim    case CmpInst::FCMP_OEQ:
219259698Sdim    case CmpInst::ICMP_EQ:
220259698Sdim      return PPC::PRED_EQ;
221259698Sdim
222259698Sdim    case CmpInst::FCMP_OGT:
223259698Sdim    case CmpInst::ICMP_UGT:
224259698Sdim    case CmpInst::ICMP_SGT:
225259698Sdim      return PPC::PRED_GT;
226259698Sdim
227259698Sdim    case CmpInst::FCMP_OGE:
228259698Sdim    case CmpInst::ICMP_UGE:
229259698Sdim    case CmpInst::ICMP_SGE:
230259698Sdim      return PPC::PRED_GE;
231259698Sdim
232259698Sdim    case CmpInst::FCMP_OLT:
233259698Sdim    case CmpInst::ICMP_ULT:
234259698Sdim    case CmpInst::ICMP_SLT:
235259698Sdim      return PPC::PRED_LT;
236259698Sdim
237259698Sdim    case CmpInst::FCMP_OLE:
238259698Sdim    case CmpInst::ICMP_ULE:
239259698Sdim    case CmpInst::ICMP_SLE:
240259698Sdim      return PPC::PRED_LE;
241259698Sdim
242259698Sdim    case CmpInst::FCMP_ONE:
243259698Sdim    case CmpInst::ICMP_NE:
244259698Sdim      return PPC::PRED_NE;
245259698Sdim
246259698Sdim    case CmpInst::FCMP_ORD:
247259698Sdim      return PPC::PRED_NU;
248259698Sdim
249259698Sdim    case CmpInst::FCMP_UNO:
250259698Sdim      return PPC::PRED_UN;
251259698Sdim  }
252259698Sdim}
253259698Sdim
254259698Sdim// Determine whether the type Ty is simple enough to be handled by
255259698Sdim// fast-isel, and return its equivalent machine type in VT.
256259698Sdim// FIXME: Copied directly from ARM -- factor into base class?
257259698Sdimbool PPCFastISel::isTypeLegal(Type *Ty, MVT &VT) {
258259698Sdim  EVT Evt = TLI.getValueType(Ty, true);
259259698Sdim
260259698Sdim  // Only handle simple types.
261259698Sdim  if (Evt == MVT::Other || !Evt.isSimple()) return false;
262259698Sdim  VT = Evt.getSimpleVT();
263259698Sdim
264259698Sdim  // Handle all legal types, i.e. a register that will directly hold this
265259698Sdim  // value.
266259698Sdim  return TLI.isTypeLegal(VT);
267259698Sdim}
268259698Sdim
269259698Sdim// Determine whether the type Ty is simple enough to be handled by
270259698Sdim// fast-isel as a load target, and return its equivalent machine type in VT.
271259698Sdimbool PPCFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) {
272259698Sdim  if (isTypeLegal(Ty, VT)) return true;
273259698Sdim
274259698Sdim  // If this is a type than can be sign or zero-extended to a basic operation
275259698Sdim  // go ahead and accept it now.
276259698Sdim  if (VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) {
277259698Sdim    return true;
278259698Sdim  }
279259698Sdim
280259698Sdim  return false;
281259698Sdim}
282259698Sdim
283259698Sdim// Given a value Obj, create an Address object Addr that represents its
284259698Sdim// address.  Return false if we can't handle it.
285259698Sdimbool PPCFastISel::PPCComputeAddress(const Value *Obj, Address &Addr) {
286259698Sdim  const User *U = NULL;
287259698Sdim  unsigned Opcode = Instruction::UserOp1;
288259698Sdim  if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
289259698Sdim    // Don't walk into other basic blocks unless the object is an alloca from
290259698Sdim    // another block, otherwise it may not have a virtual register assigned.
291259698Sdim    if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
292259698Sdim        FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
293259698Sdim      Opcode = I->getOpcode();
294259698Sdim      U = I;
295259698Sdim    }
296259698Sdim  } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
297259698Sdim    Opcode = C->getOpcode();
298259698Sdim    U = C;
299259698Sdim  }
300259698Sdim
301259698Sdim  switch (Opcode) {
302259698Sdim    default:
303259698Sdim      break;
304259698Sdim    case Instruction::BitCast:
305259698Sdim      // Look through bitcasts.
306259698Sdim      return PPCComputeAddress(U->getOperand(0), Addr);
307259698Sdim    case Instruction::IntToPtr:
308259698Sdim      // Look past no-op inttoptrs.
309259698Sdim      if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
310259698Sdim        return PPCComputeAddress(U->getOperand(0), Addr);
311259698Sdim      break;
312259698Sdim    case Instruction::PtrToInt:
313259698Sdim      // Look past no-op ptrtoints.
314259698Sdim      if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
315259698Sdim        return PPCComputeAddress(U->getOperand(0), Addr);
316259698Sdim      break;
317259698Sdim    case Instruction::GetElementPtr: {
318259698Sdim      Address SavedAddr = Addr;
319259698Sdim      long TmpOffset = Addr.Offset;
320259698Sdim
321259698Sdim      // Iterate through the GEP folding the constants into offsets where
322259698Sdim      // we can.
323259698Sdim      gep_type_iterator GTI = gep_type_begin(U);
324259698Sdim      for (User::const_op_iterator II = U->op_begin() + 1, IE = U->op_end();
325259698Sdim           II != IE; ++II, ++GTI) {
326259698Sdim        const Value *Op = *II;
327259698Sdim        if (StructType *STy = dyn_cast<StructType>(*GTI)) {
328259698Sdim          const StructLayout *SL = TD.getStructLayout(STy);
329259698Sdim          unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
330259698Sdim          TmpOffset += SL->getElementOffset(Idx);
331259698Sdim        } else {
332259698Sdim          uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType());
333259698Sdim          for (;;) {
334259698Sdim            if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
335259698Sdim              // Constant-offset addressing.
336259698Sdim              TmpOffset += CI->getSExtValue() * S;
337259698Sdim              break;
338259698Sdim            }
339259698Sdim            if (canFoldAddIntoGEP(U, Op)) {
340259698Sdim              // A compatible add with a constant operand. Fold the constant.
341259698Sdim              ConstantInt *CI =
342259698Sdim              cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
343259698Sdim              TmpOffset += CI->getSExtValue() * S;
344259698Sdim              // Iterate on the other operand.
345259698Sdim              Op = cast<AddOperator>(Op)->getOperand(0);
346259698Sdim              continue;
347259698Sdim            }
348259698Sdim            // Unsupported
349259698Sdim            goto unsupported_gep;
350259698Sdim          }
351259698Sdim        }
352259698Sdim      }
353259698Sdim
354259698Sdim      // Try to grab the base operand now.
355259698Sdim      Addr.Offset = TmpOffset;
356259698Sdim      if (PPCComputeAddress(U->getOperand(0), Addr)) return true;
357259698Sdim
358259698Sdim      // We failed, restore everything and try the other options.
359259698Sdim      Addr = SavedAddr;
360259698Sdim
361259698Sdim      unsupported_gep:
362259698Sdim      break;
363259698Sdim    }
364259698Sdim    case Instruction::Alloca: {
365259698Sdim      const AllocaInst *AI = cast<AllocaInst>(Obj);
366259698Sdim      DenseMap<const AllocaInst*, int>::iterator SI =
367259698Sdim        FuncInfo.StaticAllocaMap.find(AI);
368259698Sdim      if (SI != FuncInfo.StaticAllocaMap.end()) {
369259698Sdim        Addr.BaseType = Address::FrameIndexBase;
370259698Sdim        Addr.Base.FI = SI->second;
371259698Sdim        return true;
372259698Sdim      }
373259698Sdim      break;
374259698Sdim    }
375259698Sdim  }
376259698Sdim
377259698Sdim  // FIXME: References to parameters fall through to the behavior
378259698Sdim  // below.  They should be able to reference a frame index since
379259698Sdim  // they are stored to the stack, so we can get "ld rx, offset(r1)"
380259698Sdim  // instead of "addi ry, r1, offset / ld rx, 0(ry)".  Obj will
381259698Sdim  // just contain the parameter.  Try to handle this with a FI.
382259698Sdim
383259698Sdim  // Try to get this in a register if nothing else has worked.
384259698Sdim  if (Addr.Base.Reg == 0)
385259698Sdim    Addr.Base.Reg = getRegForValue(Obj);
386259698Sdim
387259698Sdim  // Prevent assignment of base register to X0, which is inappropriate
388259698Sdim  // for loads and stores alike.
389259698Sdim  if (Addr.Base.Reg != 0)
390259698Sdim    MRI.setRegClass(Addr.Base.Reg, &PPC::G8RC_and_G8RC_NOX0RegClass);
391259698Sdim
392259698Sdim  return Addr.Base.Reg != 0;
393259698Sdim}
394259698Sdim
395259698Sdim// Fix up some addresses that can't be used directly.  For example, if
396259698Sdim// an offset won't fit in an instruction field, we may need to move it
397259698Sdim// into an index register.
398259698Sdimvoid PPCFastISel::PPCSimplifyAddress(Address &Addr, MVT VT, bool &UseOffset,
399259698Sdim                                     unsigned &IndexReg) {
400259698Sdim
401259698Sdim  // Check whether the offset fits in the instruction field.
402259698Sdim  if (!isInt<16>(Addr.Offset))
403259698Sdim    UseOffset = false;
404259698Sdim
405259698Sdim  // If this is a stack pointer and the offset needs to be simplified then
406259698Sdim  // put the alloca address into a register, set the base type back to
407259698Sdim  // register and continue. This should almost never happen.
408259698Sdim  if (!UseOffset && Addr.BaseType == Address::FrameIndexBase) {
409259698Sdim    unsigned ResultReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
410259698Sdim    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::ADDI8),
411259698Sdim            ResultReg).addFrameIndex(Addr.Base.FI).addImm(0);
412259698Sdim    Addr.Base.Reg = ResultReg;
413259698Sdim    Addr.BaseType = Address::RegBase;
414259698Sdim  }
415259698Sdim
416259698Sdim  if (!UseOffset) {
417259698Sdim    IntegerType *OffsetTy = ((VT == MVT::i32) ? Type::getInt32Ty(*Context)
418259698Sdim                             : Type::getInt64Ty(*Context));
419259698Sdim    const ConstantInt *Offset =
420259698Sdim      ConstantInt::getSigned(OffsetTy, (int64_t)(Addr.Offset));
421259698Sdim    IndexReg = PPCMaterializeInt(Offset, MVT::i64);
422259698Sdim    assert(IndexReg && "Unexpected error in PPCMaterializeInt!");
423259698Sdim  }
424259698Sdim}
425259698Sdim
426259698Sdim// Emit a load instruction if possible, returning true if we succeeded,
427259698Sdim// otherwise false.  See commentary below for how the register class of
428259698Sdim// the load is determined.
429259698Sdimbool PPCFastISel::PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
430259698Sdim                              const TargetRegisterClass *RC,
431259698Sdim                              bool IsZExt, unsigned FP64LoadOpc) {
432259698Sdim  unsigned Opc;
433259698Sdim  bool UseOffset = true;
434259698Sdim
435259698Sdim  // If ResultReg is given, it determines the register class of the load.
436259698Sdim  // Otherwise, RC is the register class to use.  If the result of the
437259698Sdim  // load isn't anticipated in this block, both may be zero, in which
438259698Sdim  // case we must make a conservative guess.  In particular, don't assign
439259698Sdim  // R0 or X0 to the result register, as the result may be used in a load,
440259698Sdim  // store, add-immediate, or isel that won't permit this.  (Though
441259698Sdim  // perhaps the spill and reload of live-exit values would handle this?)
442259698Sdim  const TargetRegisterClass *UseRC =
443259698Sdim    (ResultReg ? MRI.getRegClass(ResultReg) :
444259698Sdim     (RC ? RC :
445259698Sdim      (VT == MVT::f64 ? &PPC::F8RCRegClass :
446259698Sdim       (VT == MVT::f32 ? &PPC::F4RCRegClass :
447259698Sdim        (VT == MVT::i64 ? &PPC::G8RC_and_G8RC_NOX0RegClass :
448259698Sdim         &PPC::GPRC_and_GPRC_NOR0RegClass)))));
449259698Sdim
450259698Sdim  bool Is32BitInt = UseRC->hasSuperClassEq(&PPC::GPRCRegClass);
451259698Sdim
452259698Sdim  switch (VT.SimpleTy) {
453259698Sdim    default: // e.g., vector types not handled
454259698Sdim      return false;
455259698Sdim    case MVT::i8:
456259698Sdim      Opc = Is32BitInt ? PPC::LBZ : PPC::LBZ8;
457259698Sdim      break;
458259698Sdim    case MVT::i16:
459259698Sdim      Opc = (IsZExt ?
460259698Sdim             (Is32BitInt ? PPC::LHZ : PPC::LHZ8) :
461259698Sdim             (Is32BitInt ? PPC::LHA : PPC::LHA8));
462259698Sdim      break;
463259698Sdim    case MVT::i32:
464259698Sdim      Opc = (IsZExt ?
465259698Sdim             (Is32BitInt ? PPC::LWZ : PPC::LWZ8) :
466259698Sdim             (Is32BitInt ? PPC::LWA_32 : PPC::LWA));
467259698Sdim      if ((Opc == PPC::LWA || Opc == PPC::LWA_32) && ((Addr.Offset & 3) != 0))
468259698Sdim        UseOffset = false;
469259698Sdim      break;
470259698Sdim    case MVT::i64:
471259698Sdim      Opc = PPC::LD;
472259698Sdim      assert(UseRC->hasSuperClassEq(&PPC::G8RCRegClass) &&
473259698Sdim             "64-bit load with 32-bit target??");
474259698Sdim      UseOffset = ((Addr.Offset & 3) == 0);
475259698Sdim      break;
476259698Sdim    case MVT::f32:
477259698Sdim      Opc = PPC::LFS;
478259698Sdim      break;
479259698Sdim    case MVT::f64:
480259698Sdim      Opc = FP64LoadOpc;
481259698Sdim      break;
482259698Sdim  }
483259698Sdim
484259698Sdim  // If necessary, materialize the offset into a register and use
485259698Sdim  // the indexed form.  Also handle stack pointers with special needs.
486259698Sdim  unsigned IndexReg = 0;
487259698Sdim  PPCSimplifyAddress(Addr, VT, UseOffset, IndexReg);
488259698Sdim  if (ResultReg == 0)
489259698Sdim    ResultReg = createResultReg(UseRC);
490259698Sdim
491259698Sdim  // Note: If we still have a frame index here, we know the offset is
492259698Sdim  // in range, as otherwise PPCSimplifyAddress would have converted it
493259698Sdim  // into a RegBase.
494259698Sdim  if (Addr.BaseType == Address::FrameIndexBase) {
495259698Sdim
496259698Sdim    MachineMemOperand *MMO =
497259698Sdim      FuncInfo.MF->getMachineMemOperand(
498259698Sdim        MachinePointerInfo::getFixedStack(Addr.Base.FI, Addr.Offset),
499259698Sdim        MachineMemOperand::MOLoad, MFI.getObjectSize(Addr.Base.FI),
500259698Sdim        MFI.getObjectAlignment(Addr.Base.FI));
501259698Sdim
502259698Sdim    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg)
503259698Sdim      .addImm(Addr.Offset).addFrameIndex(Addr.Base.FI).addMemOperand(MMO);
504259698Sdim
505259698Sdim  // Base reg with offset in range.
506259698Sdim  } else if (UseOffset) {
507259698Sdim
508259698Sdim    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg)
509259698Sdim      .addImm(Addr.Offset).addReg(Addr.Base.Reg);
510259698Sdim
511259698Sdim  // Indexed form.
512259698Sdim  } else {
513259698Sdim    // Get the RR opcode corresponding to the RI one.  FIXME: It would be
514259698Sdim    // preferable to use the ImmToIdxMap from PPCRegisterInfo.cpp, but it
515259698Sdim    // is hard to get at.
516259698Sdim    switch (Opc) {
517259698Sdim      default:        llvm_unreachable("Unexpected opcode!");
518259698Sdim      case PPC::LBZ:    Opc = PPC::LBZX;    break;
519259698Sdim      case PPC::LBZ8:   Opc = PPC::LBZX8;   break;
520259698Sdim      case PPC::LHZ:    Opc = PPC::LHZX;    break;
521259698Sdim      case PPC::LHZ8:   Opc = PPC::LHZX8;   break;
522259698Sdim      case PPC::LHA:    Opc = PPC::LHAX;    break;
523259698Sdim      case PPC::LHA8:   Opc = PPC::LHAX8;   break;
524259698Sdim      case PPC::LWZ:    Opc = PPC::LWZX;    break;
525259698Sdim      case PPC::LWZ8:   Opc = PPC::LWZX8;   break;
526259698Sdim      case PPC::LWA:    Opc = PPC::LWAX;    break;
527259698Sdim      case PPC::LWA_32: Opc = PPC::LWAX_32; break;
528259698Sdim      case PPC::LD:     Opc = PPC::LDX;     break;
529259698Sdim      case PPC::LFS:    Opc = PPC::LFSX;    break;
530259698Sdim      case PPC::LFD:    Opc = PPC::LFDX;    break;
531259698Sdim    }
532259698Sdim    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg)
533259698Sdim      .addReg(Addr.Base.Reg).addReg(IndexReg);
534259698Sdim  }
535259698Sdim
536259698Sdim  return true;
537259698Sdim}
538259698Sdim
539259698Sdim// Attempt to fast-select a load instruction.
540259698Sdimbool PPCFastISel::SelectLoad(const Instruction *I) {
541259698Sdim  // FIXME: No atomic loads are supported.
542259698Sdim  if (cast<LoadInst>(I)->isAtomic())
543259698Sdim    return false;
544259698Sdim
545259698Sdim  // Verify we have a legal type before going any further.
546259698Sdim  MVT VT;
547259698Sdim  if (!isLoadTypeLegal(I->getType(), VT))
548259698Sdim    return false;
549259698Sdim
550259698Sdim  // See if we can handle this address.
551259698Sdim  Address Addr;
552259698Sdim  if (!PPCComputeAddress(I->getOperand(0), Addr))
553259698Sdim    return false;
554259698Sdim
555259698Sdim  // Look at the currently assigned register for this instruction
556259698Sdim  // to determine the required register class.  This is necessary
557259698Sdim  // to constrain RA from using R0/X0 when this is not legal.
558259698Sdim  unsigned AssignedReg = FuncInfo.ValueMap[I];
559259698Sdim  const TargetRegisterClass *RC =
560259698Sdim    AssignedReg ? MRI.getRegClass(AssignedReg) : 0;
561259698Sdim
562259698Sdim  unsigned ResultReg = 0;
563259698Sdim  if (!PPCEmitLoad(VT, ResultReg, Addr, RC))
564259698Sdim    return false;
565259698Sdim  UpdateValueMap(I, ResultReg);
566259698Sdim  return true;
567259698Sdim}
568259698Sdim
569259698Sdim// Emit a store instruction to store SrcReg at Addr.
570259698Sdimbool PPCFastISel::PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr) {
571259698Sdim  assert(SrcReg && "Nothing to store!");
572259698Sdim  unsigned Opc;
573259698Sdim  bool UseOffset = true;
574259698Sdim
575259698Sdim  const TargetRegisterClass *RC = MRI.getRegClass(SrcReg);
576259698Sdim  bool Is32BitInt = RC->hasSuperClassEq(&PPC::GPRCRegClass);
577259698Sdim
578259698Sdim  switch (VT.SimpleTy) {
579259698Sdim    default: // e.g., vector types not handled
580259698Sdim      return false;
581259698Sdim    case MVT::i8:
582259698Sdim      Opc = Is32BitInt ? PPC::STB : PPC::STB8;
583259698Sdim      break;
584259698Sdim    case MVT::i16:
585259698Sdim      Opc = Is32BitInt ? PPC::STH : PPC::STH8;
586259698Sdim      break;
587259698Sdim    case MVT::i32:
588259698Sdim      assert(Is32BitInt && "Not GPRC for i32??");
589259698Sdim      Opc = PPC::STW;
590259698Sdim      break;
591259698Sdim    case MVT::i64:
592259698Sdim      Opc = PPC::STD;
593259698Sdim      UseOffset = ((Addr.Offset & 3) == 0);
594259698Sdim      break;
595259698Sdim    case MVT::f32:
596259698Sdim      Opc = PPC::STFS;
597259698Sdim      break;
598259698Sdim    case MVT::f64:
599259698Sdim      Opc = PPC::STFD;
600259698Sdim      break;
601259698Sdim  }
602259698Sdim
603259698Sdim  // If necessary, materialize the offset into a register and use
604259698Sdim  // the indexed form.  Also handle stack pointers with special needs.
605259698Sdim  unsigned IndexReg = 0;
606259698Sdim  PPCSimplifyAddress(Addr, VT, UseOffset, IndexReg);
607259698Sdim
608259698Sdim  // Note: If we still have a frame index here, we know the offset is
609259698Sdim  // in range, as otherwise PPCSimplifyAddress would have converted it
610259698Sdim  // into a RegBase.
611259698Sdim  if (Addr.BaseType == Address::FrameIndexBase) {
612259698Sdim    MachineMemOperand *MMO =
613259698Sdim      FuncInfo.MF->getMachineMemOperand(
614259698Sdim        MachinePointerInfo::getFixedStack(Addr.Base.FI, Addr.Offset),
615259698Sdim        MachineMemOperand::MOStore, MFI.getObjectSize(Addr.Base.FI),
616259698Sdim        MFI.getObjectAlignment(Addr.Base.FI));
617259698Sdim
618259698Sdim    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc)).addReg(SrcReg)
619259698Sdim      .addImm(Addr.Offset).addFrameIndex(Addr.Base.FI).addMemOperand(MMO);
620259698Sdim
621259698Sdim  // Base reg with offset in range.
622259698Sdim  } else if (UseOffset)
623259698Sdim    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc))
624259698Sdim      .addReg(SrcReg).addImm(Addr.Offset).addReg(Addr.Base.Reg);
625259698Sdim
626259698Sdim  // Indexed form.
627259698Sdim  else {
628259698Sdim    // Get the RR opcode corresponding to the RI one.  FIXME: It would be
629259698Sdim    // preferable to use the ImmToIdxMap from PPCRegisterInfo.cpp, but it
630259698Sdim    // is hard to get at.
631259698Sdim    switch (Opc) {
632259698Sdim      default:        llvm_unreachable("Unexpected opcode!");
633259698Sdim      case PPC::STB:  Opc = PPC::STBX;  break;
634259698Sdim      case PPC::STH : Opc = PPC::STHX;  break;
635259698Sdim      case PPC::STW : Opc = PPC::STWX;  break;
636259698Sdim      case PPC::STB8: Opc = PPC::STBX8; break;
637259698Sdim      case PPC::STH8: Opc = PPC::STHX8; break;
638259698Sdim      case PPC::STW8: Opc = PPC::STWX8; break;
639259698Sdim      case PPC::STD:  Opc = PPC::STDX;  break;
640259698Sdim      case PPC::STFS: Opc = PPC::STFSX; break;
641259698Sdim      case PPC::STFD: Opc = PPC::STFDX; break;
642259698Sdim    }
643259698Sdim    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc))
644259698Sdim      .addReg(SrcReg).addReg(Addr.Base.Reg).addReg(IndexReg);
645259698Sdim  }
646259698Sdim
647259698Sdim  return true;
648259698Sdim}
649259698Sdim
650259698Sdim// Attempt to fast-select a store instruction.
651259698Sdimbool PPCFastISel::SelectStore(const Instruction *I) {
652259698Sdim  Value *Op0 = I->getOperand(0);
653259698Sdim  unsigned SrcReg = 0;
654259698Sdim
655259698Sdim  // FIXME: No atomics loads are supported.
656259698Sdim  if (cast<StoreInst>(I)->isAtomic())
657259698Sdim    return false;
658259698Sdim
659259698Sdim  // Verify we have a legal type before going any further.
660259698Sdim  MVT VT;
661259698Sdim  if (!isLoadTypeLegal(Op0->getType(), VT))
662259698Sdim    return false;
663259698Sdim
664259698Sdim  // Get the value to be stored into a register.
665259698Sdim  SrcReg = getRegForValue(Op0);
666259698Sdim  if (SrcReg == 0)
667259698Sdim    return false;
668259698Sdim
669259698Sdim  // See if we can handle this address.
670259698Sdim  Address Addr;
671259698Sdim  if (!PPCComputeAddress(I->getOperand(1), Addr))
672259698Sdim    return false;
673259698Sdim
674259698Sdim  if (!PPCEmitStore(VT, SrcReg, Addr))
675259698Sdim    return false;
676259698Sdim
677259698Sdim  return true;
678259698Sdim}
679259698Sdim
680259698Sdim// Attempt to fast-select a branch instruction.
681259698Sdimbool PPCFastISel::SelectBranch(const Instruction *I) {
682259698Sdim  const BranchInst *BI = cast<BranchInst>(I);
683259698Sdim  MachineBasicBlock *BrBB = FuncInfo.MBB;
684259698Sdim  MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
685259698Sdim  MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
686259698Sdim
687259698Sdim  // For now, just try the simplest case where it's fed by a compare.
688259698Sdim  if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
689259698Sdim    Optional<PPC::Predicate> OptPPCPred = getComparePred(CI->getPredicate());
690259698Sdim    if (!OptPPCPred)
691259698Sdim      return false;
692259698Sdim
693259698Sdim    PPC::Predicate PPCPred = OptPPCPred.getValue();
694259698Sdim
695259698Sdim    // Take advantage of fall-through opportunities.
696259698Sdim    if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
697259698Sdim      std::swap(TBB, FBB);
698259698Sdim      PPCPred = PPC::InvertPredicate(PPCPred);
699259698Sdim    }
700259698Sdim
701259698Sdim    unsigned CondReg = createResultReg(&PPC::CRRCRegClass);
702259698Sdim
703259698Sdim    if (!PPCEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned(),
704259698Sdim                    CondReg))
705259698Sdim      return false;
706259698Sdim
707259698Sdim    BuildMI(*BrBB, FuncInfo.InsertPt, DL, TII.get(PPC::BCC))
708259698Sdim      .addImm(PPCPred).addReg(CondReg).addMBB(TBB);
709259698Sdim    FastEmitBranch(FBB, DL);
710259698Sdim    FuncInfo.MBB->addSuccessor(TBB);
711259698Sdim    return true;
712259698Sdim
713259698Sdim  } else if (const ConstantInt *CI =
714259698Sdim             dyn_cast<ConstantInt>(BI->getCondition())) {
715259698Sdim    uint64_t Imm = CI->getZExtValue();
716259698Sdim    MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
717259698Sdim    FastEmitBranch(Target, DL);
718259698Sdim    return true;
719259698Sdim  }
720259698Sdim
721259698Sdim  // FIXME: ARM looks for a case where the block containing the compare
722259698Sdim  // has been split from the block containing the branch.  If this happens,
723259698Sdim  // there is a vreg available containing the result of the compare.  I'm
724259698Sdim  // not sure we can do much, as we've lost the predicate information with
725259698Sdim  // the compare instruction -- we have a 4-bit CR but don't know which bit
726259698Sdim  // to test here.
727259698Sdim  return false;
728259698Sdim}
729259698Sdim
730259698Sdim// Attempt to emit a compare of the two source values.  Signed and unsigned
731259698Sdim// comparisons are supported.  Return false if we can't handle it.
732259698Sdimbool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2,
733259698Sdim                             bool IsZExt, unsigned DestReg) {
734259698Sdim  Type *Ty = SrcValue1->getType();
735259698Sdim  EVT SrcEVT = TLI.getValueType(Ty, true);
736259698Sdim  if (!SrcEVT.isSimple())
737259698Sdim    return false;
738259698Sdim  MVT SrcVT = SrcEVT.getSimpleVT();
739259698Sdim
740259698Sdim  // See if operand 2 is an immediate encodeable in the compare.
741259698Sdim  // FIXME: Operands are not in canonical order at -O0, so an immediate
742259698Sdim  // operand in position 1 is a lost opportunity for now.  We are
743259698Sdim  // similar to ARM in this regard.
744259698Sdim  long Imm = 0;
745259698Sdim  bool UseImm = false;
746259698Sdim
747259698Sdim  // Only 16-bit integer constants can be represented in compares for
748259698Sdim  // PowerPC.  Others will be materialized into a register.
749259698Sdim  if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(SrcValue2)) {
750259698Sdim    if (SrcVT == MVT::i64 || SrcVT == MVT::i32 || SrcVT == MVT::i16 ||
751259698Sdim        SrcVT == MVT::i8 || SrcVT == MVT::i1) {
752259698Sdim      const APInt &CIVal = ConstInt->getValue();
753259698Sdim      Imm = (IsZExt) ? (long)CIVal.getZExtValue() : (long)CIVal.getSExtValue();
754259698Sdim      if ((IsZExt && isUInt<16>(Imm)) || (!IsZExt && isInt<16>(Imm)))
755259698Sdim        UseImm = true;
756259698Sdim    }
757259698Sdim  }
758259698Sdim
759259698Sdim  unsigned CmpOpc;
760259698Sdim  bool NeedsExt = false;
761259698Sdim  switch (SrcVT.SimpleTy) {
762259698Sdim    default: return false;
763259698Sdim    case MVT::f32:
764259698Sdim      CmpOpc = PPC::FCMPUS;
765259698Sdim      break;
766259698Sdim    case MVT::f64:
767259698Sdim      CmpOpc = PPC::FCMPUD;
768259698Sdim      break;
769259698Sdim    case MVT::i1:
770259698Sdim    case MVT::i8:
771259698Sdim    case MVT::i16:
772259698Sdim      NeedsExt = true;
773259698Sdim      // Intentional fall-through.
774259698Sdim    case MVT::i32:
775259698Sdim      if (!UseImm)
776259698Sdim        CmpOpc = IsZExt ? PPC::CMPLW : PPC::CMPW;
777259698Sdim      else
778259698Sdim        CmpOpc = IsZExt ? PPC::CMPLWI : PPC::CMPWI;
779259698Sdim      break;
780259698Sdim    case MVT::i64:
781259698Sdim      if (!UseImm)
782259698Sdim        CmpOpc = IsZExt ? PPC::CMPLD : PPC::CMPD;
783259698Sdim      else
784259698Sdim        CmpOpc = IsZExt ? PPC::CMPLDI : PPC::CMPDI;
785259698Sdim      break;
786259698Sdim  }
787259698Sdim
788259698Sdim  unsigned SrcReg1 = getRegForValue(SrcValue1);
789259698Sdim  if (SrcReg1 == 0)
790259698Sdim    return false;
791259698Sdim
792259698Sdim  unsigned SrcReg2 = 0;
793259698Sdim  if (!UseImm) {
794259698Sdim    SrcReg2 = getRegForValue(SrcValue2);
795259698Sdim    if (SrcReg2 == 0)
796259698Sdim      return false;
797259698Sdim  }
798259698Sdim
799259698Sdim  if (NeedsExt) {
800259698Sdim    unsigned ExtReg = createResultReg(&PPC::GPRCRegClass);
801259698Sdim    if (!PPCEmitIntExt(SrcVT, SrcReg1, MVT::i32, ExtReg, IsZExt))
802259698Sdim      return false;
803259698Sdim    SrcReg1 = ExtReg;
804259698Sdim
805259698Sdim    if (!UseImm) {
806259698Sdim      unsigned ExtReg = createResultReg(&PPC::GPRCRegClass);
807259698Sdim      if (!PPCEmitIntExt(SrcVT, SrcReg2, MVT::i32, ExtReg, IsZExt))
808259698Sdim        return false;
809259698Sdim      SrcReg2 = ExtReg;
810259698Sdim    }
811259698Sdim  }
812259698Sdim
813259698Sdim  if (!UseImm)
814259698Sdim    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc), DestReg)
815259698Sdim      .addReg(SrcReg1).addReg(SrcReg2);
816259698Sdim  else
817259698Sdim    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc), DestReg)
818259698Sdim      .addReg(SrcReg1).addImm(Imm);
819259698Sdim
820259698Sdim  return true;
821259698Sdim}
822259698Sdim
823259698Sdim// Attempt to fast-select a floating-point extend instruction.
824259698Sdimbool PPCFastISel::SelectFPExt(const Instruction *I) {
825259698Sdim  Value *Src  = I->getOperand(0);
826259698Sdim  EVT SrcVT  = TLI.getValueType(Src->getType(), true);
827259698Sdim  EVT DestVT = TLI.getValueType(I->getType(), true);
828259698Sdim
829259698Sdim  if (SrcVT != MVT::f32 || DestVT != MVT::f64)
830259698Sdim    return false;
831259698Sdim
832259698Sdim  unsigned SrcReg = getRegForValue(Src);
833259698Sdim  if (!SrcReg)
834259698Sdim    return false;
835259698Sdim
836259698Sdim  // No code is generated for a FP extend.
837259698Sdim  UpdateValueMap(I, SrcReg);
838259698Sdim  return true;
839259698Sdim}
840259698Sdim
841259698Sdim// Attempt to fast-select a floating-point truncate instruction.
842259698Sdimbool PPCFastISel::SelectFPTrunc(const Instruction *I) {
843259698Sdim  Value *Src  = I->getOperand(0);
844259698Sdim  EVT SrcVT  = TLI.getValueType(Src->getType(), true);
845259698Sdim  EVT DestVT = TLI.getValueType(I->getType(), true);
846259698Sdim
847259698Sdim  if (SrcVT != MVT::f64 || DestVT != MVT::f32)
848259698Sdim    return false;
849259698Sdim
850259698Sdim  unsigned SrcReg = getRegForValue(Src);
851259698Sdim  if (!SrcReg)
852259698Sdim    return false;
853259698Sdim
854259698Sdim  // Round the result to single precision.
855259698Sdim  unsigned DestReg = createResultReg(&PPC::F4RCRegClass);
856259698Sdim  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::FRSP), DestReg)
857259698Sdim    .addReg(SrcReg);
858259698Sdim
859259698Sdim  UpdateValueMap(I, DestReg);
860259698Sdim  return true;
861259698Sdim}
862259698Sdim
863259698Sdim// Move an i32 or i64 value in a GPR to an f64 value in an FPR.
864259698Sdim// FIXME: When direct register moves are implemented (see PowerISA 2.08),
865259698Sdim// those should be used instead of moving via a stack slot when the
866259698Sdim// subtarget permits.
867259698Sdim// FIXME: The code here is sloppy for the 4-byte case.  Can use a 4-byte
868259698Sdim// stack slot and 4-byte store/load sequence.  Or just sext the 4-byte
869259698Sdim// case to 8 bytes which produces tighter code but wastes stack space.
870259698Sdimunsigned PPCFastISel::PPCMoveToFPReg(MVT SrcVT, unsigned SrcReg,
871259698Sdim                                     bool IsSigned) {
872259698Sdim
873259698Sdim  // If necessary, extend 32-bit int to 64-bit.
874259698Sdim  if (SrcVT == MVT::i32) {
875259698Sdim    unsigned TmpReg = createResultReg(&PPC::G8RCRegClass);
876259698Sdim    if (!PPCEmitIntExt(MVT::i32, SrcReg, MVT::i64, TmpReg, !IsSigned))
877259698Sdim      return 0;
878259698Sdim    SrcReg = TmpReg;
879259698Sdim  }
880259698Sdim
881259698Sdim  // Get a stack slot 8 bytes wide, aligned on an 8-byte boundary.
882259698Sdim  Address Addr;
883259698Sdim  Addr.BaseType = Address::FrameIndexBase;
884259698Sdim  Addr.Base.FI = MFI.CreateStackObject(8, 8, false);
885259698Sdim
886259698Sdim  // Store the value from the GPR.
887259698Sdim  if (!PPCEmitStore(MVT::i64, SrcReg, Addr))
888259698Sdim    return 0;
889259698Sdim
890259698Sdim  // Load the integer value into an FPR.  The kind of load used depends
891259698Sdim  // on a number of conditions.
892259698Sdim  unsigned LoadOpc = PPC::LFD;
893259698Sdim
894259698Sdim  if (SrcVT == MVT::i32) {
895266715Sdim    if (!IsSigned) {
896259698Sdim      LoadOpc = PPC::LFIWZX;
897266715Sdim      Addr.Offset = 4;
898266715Sdim    } else if (PPCSubTarget.hasLFIWAX()) {
899259698Sdim      LoadOpc = PPC::LFIWAX;
900266715Sdim      Addr.Offset = 4;
901266715Sdim    }
902259698Sdim  }
903259698Sdim
904259698Sdim  const TargetRegisterClass *RC = &PPC::F8RCRegClass;
905259698Sdim  unsigned ResultReg = 0;
906259698Sdim  if (!PPCEmitLoad(MVT::f64, ResultReg, Addr, RC, !IsSigned, LoadOpc))
907259698Sdim    return 0;
908259698Sdim
909259698Sdim  return ResultReg;
910259698Sdim}
911259698Sdim
912259698Sdim// Attempt to fast-select an integer-to-floating-point conversion.
913259698Sdimbool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) {
914259698Sdim  MVT DstVT;
915259698Sdim  Type *DstTy = I->getType();
916259698Sdim  if (!isTypeLegal(DstTy, DstVT))
917259698Sdim    return false;
918259698Sdim
919259698Sdim  if (DstVT != MVT::f32 && DstVT != MVT::f64)
920259698Sdim    return false;
921259698Sdim
922259698Sdim  Value *Src = I->getOperand(0);
923259698Sdim  EVT SrcEVT = TLI.getValueType(Src->getType(), true);
924259698Sdim  if (!SrcEVT.isSimple())
925259698Sdim    return false;
926259698Sdim
927259698Sdim  MVT SrcVT = SrcEVT.getSimpleVT();
928259698Sdim
929259698Sdim  if (SrcVT != MVT::i8  && SrcVT != MVT::i16 &&
930259698Sdim      SrcVT != MVT::i32 && SrcVT != MVT::i64)
931259698Sdim    return false;
932259698Sdim
933259698Sdim  unsigned SrcReg = getRegForValue(Src);
934259698Sdim  if (SrcReg == 0)
935259698Sdim    return false;
936259698Sdim
937259698Sdim  // We can only lower an unsigned convert if we have the newer
938259698Sdim  // floating-point conversion operations.
939259698Sdim  if (!IsSigned && !PPCSubTarget.hasFPCVT())
940259698Sdim    return false;
941259698Sdim
942259698Sdim  // FIXME: For now we require the newer floating-point conversion operations
943259698Sdim  // (which are present only on P7 and A2 server models) when converting
944259698Sdim  // to single-precision float.  Otherwise we have to generate a lot of
945259698Sdim  // fiddly code to avoid double rounding.  If necessary, the fiddly code
946259698Sdim  // can be found in PPCTargetLowering::LowerINT_TO_FP().
947259698Sdim  if (DstVT == MVT::f32 && !PPCSubTarget.hasFPCVT())
948259698Sdim    return false;
949259698Sdim
950259698Sdim  // Extend the input if necessary.
951259698Sdim  if (SrcVT == MVT::i8 || SrcVT == MVT::i16) {
952259698Sdim    unsigned TmpReg = createResultReg(&PPC::G8RCRegClass);
953259698Sdim    if (!PPCEmitIntExt(SrcVT, SrcReg, MVT::i64, TmpReg, !IsSigned))
954259698Sdim      return false;
955259698Sdim    SrcVT = MVT::i64;
956259698Sdim    SrcReg = TmpReg;
957259698Sdim  }
958259698Sdim
959259698Sdim  // Move the integer value to an FPR.
960259698Sdim  unsigned FPReg = PPCMoveToFPReg(SrcVT, SrcReg, IsSigned);
961259698Sdim  if (FPReg == 0)
962259698Sdim    return false;
963259698Sdim
964259698Sdim  // Determine the opcode for the conversion.
965259698Sdim  const TargetRegisterClass *RC = &PPC::F8RCRegClass;
966259698Sdim  unsigned DestReg = createResultReg(RC);
967259698Sdim  unsigned Opc;
968259698Sdim
969259698Sdim  if (DstVT == MVT::f32)
970259698Sdim    Opc = IsSigned ? PPC::FCFIDS : PPC::FCFIDUS;
971259698Sdim  else
972259698Sdim    Opc = IsSigned ? PPC::FCFID : PPC::FCFIDU;
973259698Sdim
974259698Sdim  // Generate the convert.
975259698Sdim  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
976259698Sdim    .addReg(FPReg);
977259698Sdim
978259698Sdim  UpdateValueMap(I, DestReg);
979259698Sdim  return true;
980259698Sdim}
981259698Sdim
982259698Sdim// Move the floating-point value in SrcReg into an integer destination
983259698Sdim// register, and return the register (or zero if we can't handle it).
984259698Sdim// FIXME: When direct register moves are implemented (see PowerISA 2.08),
985259698Sdim// those should be used instead of moving via a stack slot when the
986259698Sdim// subtarget permits.
987259698Sdimunsigned PPCFastISel::PPCMoveToIntReg(const Instruction *I, MVT VT,
988259698Sdim                                      unsigned SrcReg, bool IsSigned) {
989259698Sdim  // Get a stack slot 8 bytes wide, aligned on an 8-byte boundary.
990259698Sdim  // Note that if have STFIWX available, we could use a 4-byte stack
991259698Sdim  // slot for i32, but this being fast-isel we'll just go with the
992259698Sdim  // easiest code gen possible.
993259698Sdim  Address Addr;
994259698Sdim  Addr.BaseType = Address::FrameIndexBase;
995259698Sdim  Addr.Base.FI = MFI.CreateStackObject(8, 8, false);
996259698Sdim
997259698Sdim  // Store the value from the FPR.
998259698Sdim  if (!PPCEmitStore(MVT::f64, SrcReg, Addr))
999259698Sdim    return 0;
1000259698Sdim
1001259698Sdim  // Reload it into a GPR.  If we want an i32, modify the address
1002259698Sdim  // to have a 4-byte offset so we load from the right place.
1003259698Sdim  if (VT == MVT::i32)
1004259698Sdim    Addr.Offset = 4;
1005259698Sdim
1006259698Sdim  // Look at the currently assigned register for this instruction
1007259698Sdim  // to determine the required register class.
1008259698Sdim  unsigned AssignedReg = FuncInfo.ValueMap[I];
1009259698Sdim  const TargetRegisterClass *RC =
1010259698Sdim    AssignedReg ? MRI.getRegClass(AssignedReg) : 0;
1011259698Sdim
1012259698Sdim  unsigned ResultReg = 0;
1013259698Sdim  if (!PPCEmitLoad(VT, ResultReg, Addr, RC, !IsSigned))
1014259698Sdim    return 0;
1015259698Sdim
1016259698Sdim  return ResultReg;
1017259698Sdim}
1018259698Sdim
1019259698Sdim// Attempt to fast-select a floating-point-to-integer conversion.
1020259698Sdimbool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) {
1021259698Sdim  MVT DstVT, SrcVT;
1022259698Sdim  Type *DstTy = I->getType();
1023259698Sdim  if (!isTypeLegal(DstTy, DstVT))
1024259698Sdim    return false;
1025259698Sdim
1026259698Sdim  if (DstVT != MVT::i32 && DstVT != MVT::i64)
1027259698Sdim    return false;
1028259698Sdim
1029268065Sdim  // If we don't have FCTIDUZ and we need it, punt to SelectionDAG.
1030268065Sdim  if (DstVT == MVT::i64 && !IsSigned && !PPCSubTarget.hasFPCVT())
1031268065Sdim    return false;
1032268065Sdim
1033259698Sdim  Value *Src = I->getOperand(0);
1034259698Sdim  Type *SrcTy = Src->getType();
1035259698Sdim  if (!isTypeLegal(SrcTy, SrcVT))
1036259698Sdim    return false;
1037259698Sdim
1038259698Sdim  if (SrcVT != MVT::f32 && SrcVT != MVT::f64)
1039259698Sdim    return false;
1040259698Sdim
1041259698Sdim  unsigned SrcReg = getRegForValue(Src);
1042259698Sdim  if (SrcReg == 0)
1043259698Sdim    return false;
1044259698Sdim
1045259698Sdim  // Convert f32 to f64 if necessary.  This is just a meaningless copy
1046259698Sdim  // to get the register class right.  COPY_TO_REGCLASS is needed since
1047259698Sdim  // a COPY from F4RC to F8RC is converted to a F4RC-F4RC copy downstream.
1048259698Sdim  const TargetRegisterClass *InRC = MRI.getRegClass(SrcReg);
1049259698Sdim  if (InRC == &PPC::F4RCRegClass) {
1050259698Sdim    unsigned TmpReg = createResultReg(&PPC::F8RCRegClass);
1051259698Sdim    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1052259698Sdim            TII.get(TargetOpcode::COPY_TO_REGCLASS), TmpReg)
1053259698Sdim      .addReg(SrcReg).addImm(PPC::F8RCRegClassID);
1054259698Sdim    SrcReg = TmpReg;
1055259698Sdim  }
1056259698Sdim
1057259698Sdim  // Determine the opcode for the conversion, which takes place
1058259698Sdim  // entirely within FPRs.
1059259698Sdim  unsigned DestReg = createResultReg(&PPC::F8RCRegClass);
1060259698Sdim  unsigned Opc;
1061259698Sdim
1062259698Sdim  if (DstVT == MVT::i32)
1063259698Sdim    if (IsSigned)
1064259698Sdim      Opc = PPC::FCTIWZ;
1065259698Sdim    else
1066259698Sdim      Opc = PPCSubTarget.hasFPCVT() ? PPC::FCTIWUZ : PPC::FCTIDZ;
1067259698Sdim  else
1068259698Sdim    Opc = IsSigned ? PPC::FCTIDZ : PPC::FCTIDUZ;
1069259698Sdim
1070259698Sdim  // Generate the convert.
1071259698Sdim  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
1072259698Sdim    .addReg(SrcReg);
1073259698Sdim
1074259698Sdim  // Now move the integer value from a float register to an integer register.
1075259698Sdim  unsigned IntReg = PPCMoveToIntReg(I, DstVT, DestReg, IsSigned);
1076259698Sdim  if (IntReg == 0)
1077259698Sdim    return false;
1078259698Sdim
1079259698Sdim  UpdateValueMap(I, IntReg);
1080259698Sdim  return true;
1081259698Sdim}
1082259698Sdim
1083259698Sdim// Attempt to fast-select a binary integer operation that isn't already
1084259698Sdim// handled automatically.
1085259698Sdimbool PPCFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {
1086259698Sdim  EVT DestVT  = TLI.getValueType(I->getType(), true);
1087259698Sdim
1088259698Sdim  // We can get here in the case when we have a binary operation on a non-legal
1089259698Sdim  // type and the target independent selector doesn't know how to handle it.
1090259698Sdim  if (DestVT != MVT::i16 && DestVT != MVT::i8)
1091259698Sdim    return false;
1092259698Sdim
1093259698Sdim  // Look at the currently assigned register for this instruction
1094259698Sdim  // to determine the required register class.  If there is no register,
1095259698Sdim  // make a conservative choice (don't assign R0).
1096259698Sdim  unsigned AssignedReg = FuncInfo.ValueMap[I];
1097259698Sdim  const TargetRegisterClass *RC =
1098259698Sdim    (AssignedReg ? MRI.getRegClass(AssignedReg) :
1099259698Sdim     &PPC::GPRC_and_GPRC_NOR0RegClass);
1100259698Sdim  bool IsGPRC = RC->hasSuperClassEq(&PPC::GPRCRegClass);
1101259698Sdim
1102259698Sdim  unsigned Opc;
1103259698Sdim  switch (ISDOpcode) {
1104259698Sdim    default: return false;
1105259698Sdim    case ISD::ADD:
1106259698Sdim      Opc = IsGPRC ? PPC::ADD4 : PPC::ADD8;
1107259698Sdim      break;
1108259698Sdim    case ISD::OR:
1109259698Sdim      Opc = IsGPRC ? PPC::OR : PPC::OR8;
1110259698Sdim      break;
1111259698Sdim    case ISD::SUB:
1112259698Sdim      Opc = IsGPRC ? PPC::SUBF : PPC::SUBF8;
1113259698Sdim      break;
1114259698Sdim  }
1115259698Sdim
1116259698Sdim  unsigned ResultReg = createResultReg(RC ? RC : &PPC::G8RCRegClass);
1117259698Sdim  unsigned SrcReg1 = getRegForValue(I->getOperand(0));
1118259698Sdim  if (SrcReg1 == 0) return false;
1119259698Sdim
1120259698Sdim  // Handle case of small immediate operand.
1121259698Sdim  if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(I->getOperand(1))) {
1122259698Sdim    const APInt &CIVal = ConstInt->getValue();
1123259698Sdim    int Imm = (int)CIVal.getSExtValue();
1124259698Sdim    bool UseImm = true;
1125259698Sdim    if (isInt<16>(Imm)) {
1126259698Sdim      switch (Opc) {
1127259698Sdim        default:
1128259698Sdim          llvm_unreachable("Missing case!");
1129259698Sdim        case PPC::ADD4:
1130259698Sdim          Opc = PPC::ADDI;
1131259698Sdim          MRI.setRegClass(SrcReg1, &PPC::GPRC_and_GPRC_NOR0RegClass);
1132259698Sdim          break;
1133259698Sdim        case PPC::ADD8:
1134259698Sdim          Opc = PPC::ADDI8;
1135259698Sdim          MRI.setRegClass(SrcReg1, &PPC::G8RC_and_G8RC_NOX0RegClass);
1136259698Sdim          break;
1137259698Sdim        case PPC::OR:
1138259698Sdim          Opc = PPC::ORI;
1139259698Sdim          break;
1140259698Sdim        case PPC::OR8:
1141259698Sdim          Opc = PPC::ORI8;
1142259698Sdim          break;
1143259698Sdim        case PPC::SUBF:
1144259698Sdim          if (Imm == -32768)
1145259698Sdim            UseImm = false;
1146259698Sdim          else {
1147259698Sdim            Opc = PPC::ADDI;
1148259698Sdim            MRI.setRegClass(SrcReg1, &PPC::GPRC_and_GPRC_NOR0RegClass);
1149259698Sdim            Imm = -Imm;
1150259698Sdim          }
1151259698Sdim          break;
1152259698Sdim        case PPC::SUBF8:
1153259698Sdim          if (Imm == -32768)
1154259698Sdim            UseImm = false;
1155259698Sdim          else {
1156259698Sdim            Opc = PPC::ADDI8;
1157259698Sdim            MRI.setRegClass(SrcReg1, &PPC::G8RC_and_G8RC_NOX0RegClass);
1158259698Sdim            Imm = -Imm;
1159259698Sdim          }
1160259698Sdim          break;
1161259698Sdim      }
1162259698Sdim
1163259698Sdim      if (UseImm) {
1164259698Sdim        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg)
1165259698Sdim          .addReg(SrcReg1).addImm(Imm);
1166259698Sdim        UpdateValueMap(I, ResultReg);
1167259698Sdim        return true;
1168259698Sdim      }
1169259698Sdim    }
1170259698Sdim  }
1171259698Sdim
1172259698Sdim  // Reg-reg case.
1173259698Sdim  unsigned SrcReg2 = getRegForValue(I->getOperand(1));
1174259698Sdim  if (SrcReg2 == 0) return false;
1175259698Sdim
1176259698Sdim  // Reverse operands for subtract-from.
1177259698Sdim  if (ISDOpcode == ISD::SUB)
1178259698Sdim    std::swap(SrcReg1, SrcReg2);
1179259698Sdim
1180259698Sdim  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg)
1181259698Sdim    .addReg(SrcReg1).addReg(SrcReg2);
1182259698Sdim  UpdateValueMap(I, ResultReg);
1183259698Sdim  return true;
1184259698Sdim}
1185259698Sdim
1186259698Sdim// Handle arguments to a call that we're attempting to fast-select.
1187259698Sdim// Return false if the arguments are too complex for us at the moment.
1188259698Sdimbool PPCFastISel::processCallArgs(SmallVectorImpl<Value*> &Args,
1189259698Sdim                                  SmallVectorImpl<unsigned> &ArgRegs,
1190259698Sdim                                  SmallVectorImpl<MVT> &ArgVTs,
1191259698Sdim                                  SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
1192259698Sdim                                  SmallVectorImpl<unsigned> &RegArgs,
1193259698Sdim                                  CallingConv::ID CC,
1194259698Sdim                                  unsigned &NumBytes,
1195259698Sdim                                  bool IsVarArg) {
1196259698Sdim  SmallVector<CCValAssign, 16> ArgLocs;
1197259698Sdim  CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, TM, ArgLocs, *Context);
1198259698Sdim  CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CC_PPC64_ELF_FIS);
1199259698Sdim
1200259698Sdim  // Bail out if we can't handle any of the arguments.
1201259698Sdim  for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1202259698Sdim    CCValAssign &VA = ArgLocs[I];
1203259698Sdim    MVT ArgVT = ArgVTs[VA.getValNo()];
1204259698Sdim
1205259698Sdim    // Skip vector arguments for now, as well as long double and
1206259698Sdim    // uint128_t, and anything that isn't passed in a register.
1207259698Sdim    if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64 ||
1208259698Sdim        !VA.isRegLoc() || VA.needsCustom())
1209259698Sdim      return false;
1210259698Sdim
1211259698Sdim    // Skip bit-converted arguments for now.
1212259698Sdim    if (VA.getLocInfo() == CCValAssign::BCvt)
1213259698Sdim      return false;
1214259698Sdim  }
1215259698Sdim
1216259698Sdim  // Get a count of how many bytes are to be pushed onto the stack.
1217259698Sdim  NumBytes = CCInfo.getNextStackOffset();
1218259698Sdim
1219259698Sdim  // Issue CALLSEQ_START.
1220259698Sdim  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1221259698Sdim          TII.get(TII.getCallFrameSetupOpcode()))
1222259698Sdim    .addImm(NumBytes);
1223259698Sdim
1224259698Sdim  // Prepare to assign register arguments.  Every argument uses up a
1225259698Sdim  // GPR protocol register even if it's passed in a floating-point
1226259698Sdim  // register.
1227259698Sdim  unsigned NextGPR = PPC::X3;
1228259698Sdim  unsigned NextFPR = PPC::F1;
1229259698Sdim
1230259698Sdim  // Process arguments.
1231259698Sdim  for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1232259698Sdim    CCValAssign &VA = ArgLocs[I];
1233259698Sdim    unsigned Arg = ArgRegs[VA.getValNo()];
1234259698Sdim    MVT ArgVT = ArgVTs[VA.getValNo()];
1235259698Sdim
1236259698Sdim    // Handle argument promotion and bitcasts.
1237259698Sdim    switch (VA.getLocInfo()) {
1238259698Sdim      default:
1239259698Sdim        llvm_unreachable("Unknown loc info!");
1240259698Sdim      case CCValAssign::Full:
1241259698Sdim        break;
1242259698Sdim      case CCValAssign::SExt: {
1243259698Sdim        MVT DestVT = VA.getLocVT();
1244259698Sdim        const TargetRegisterClass *RC =
1245259698Sdim          (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
1246259698Sdim        unsigned TmpReg = createResultReg(RC);
1247259698Sdim        if (!PPCEmitIntExt(ArgVT, Arg, DestVT, TmpReg, /*IsZExt*/false))
1248259698Sdim          llvm_unreachable("Failed to emit a sext!");
1249259698Sdim        ArgVT = DestVT;
1250259698Sdim        Arg = TmpReg;
1251259698Sdim        break;
1252259698Sdim      }
1253259698Sdim      case CCValAssign::AExt:
1254259698Sdim      case CCValAssign::ZExt: {
1255259698Sdim        MVT DestVT = VA.getLocVT();
1256259698Sdim        const TargetRegisterClass *RC =
1257259698Sdim          (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
1258259698Sdim        unsigned TmpReg = createResultReg(RC);
1259259698Sdim        if (!PPCEmitIntExt(ArgVT, Arg, DestVT, TmpReg, /*IsZExt*/true))
1260259698Sdim          llvm_unreachable("Failed to emit a zext!");
1261259698Sdim        ArgVT = DestVT;
1262259698Sdim        Arg = TmpReg;
1263259698Sdim        break;
1264259698Sdim      }
1265259698Sdim      case CCValAssign::BCvt: {
1266259698Sdim        // FIXME: Not yet handled.
1267259698Sdim        llvm_unreachable("Should have bailed before getting here!");
1268259698Sdim        break;
1269259698Sdim      }
1270259698Sdim    }
1271259698Sdim
1272259698Sdim    // Copy this argument to the appropriate register.
1273259698Sdim    unsigned ArgReg;
1274259698Sdim    if (ArgVT == MVT::f32 || ArgVT == MVT::f64) {
1275259698Sdim      ArgReg = NextFPR++;
1276259698Sdim      ++NextGPR;
1277259698Sdim    } else
1278259698Sdim      ArgReg = NextGPR++;
1279259698Sdim
1280259698Sdim    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
1281259698Sdim            ArgReg).addReg(Arg);
1282259698Sdim    RegArgs.push_back(ArgReg);
1283259698Sdim  }
1284259698Sdim
1285259698Sdim  return true;
1286259698Sdim}
1287259698Sdim
1288259698Sdim// For a call that we've determined we can fast-select, finish the
1289259698Sdim// call sequence and generate a copy to obtain the return value (if any).
1290259698Sdimvoid PPCFastISel::finishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
1291259698Sdim                             const Instruction *I, CallingConv::ID CC,
1292259698Sdim                             unsigned &NumBytes, bool IsVarArg) {
1293259698Sdim  // Issue CallSEQ_END.
1294259698Sdim  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1295259698Sdim          TII.get(TII.getCallFrameDestroyOpcode()))
1296259698Sdim    .addImm(NumBytes).addImm(0);
1297259698Sdim
1298259698Sdim  // Next, generate a copy to obtain the return value.
1299259698Sdim  // FIXME: No multi-register return values yet, though I don't foresee
1300259698Sdim  // any real difficulties there.
1301259698Sdim  if (RetVT != MVT::isVoid) {
1302259698Sdim    SmallVector<CCValAssign, 16> RVLocs;
1303259698Sdim    CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, TM, RVLocs, *Context);
1304259698Sdim    CCInfo.AnalyzeCallResult(RetVT, RetCC_PPC64_ELF_FIS);
1305259698Sdim    CCValAssign &VA = RVLocs[0];
1306259698Sdim    assert(RVLocs.size() == 1 && "No support for multi-reg return values!");
1307259698Sdim    assert(VA.isRegLoc() && "Can only return in registers!");
1308259698Sdim
1309259698Sdim    MVT DestVT = VA.getValVT();
1310259698Sdim    MVT CopyVT = DestVT;
1311259698Sdim
1312259698Sdim    // Ints smaller than a register still arrive in a full 64-bit
1313259698Sdim    // register, so make sure we recognize this.
1314259698Sdim    if (RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32)
1315259698Sdim      CopyVT = MVT::i64;
1316259698Sdim
1317259698Sdim    unsigned SourcePhysReg = VA.getLocReg();
1318259698Sdim    unsigned ResultReg = 0;
1319259698Sdim
1320259698Sdim    if (RetVT == CopyVT) {
1321259698Sdim      const TargetRegisterClass *CpyRC = TLI.getRegClassFor(CopyVT);
1322259698Sdim      ResultReg = createResultReg(CpyRC);
1323259698Sdim
1324259698Sdim      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1325259698Sdim              TII.get(TargetOpcode::COPY), ResultReg)
1326259698Sdim        .addReg(SourcePhysReg);
1327259698Sdim
1328259698Sdim    // If necessary, round the floating result to single precision.
1329259698Sdim    } else if (CopyVT == MVT::f64) {
1330259698Sdim      ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
1331259698Sdim      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::FRSP),
1332259698Sdim              ResultReg).addReg(SourcePhysReg);
1333259698Sdim
1334259698Sdim    // If only the low half of a general register is needed, generate
1335259698Sdim    // a GPRC copy instead of a G8RC copy.  (EXTRACT_SUBREG can't be
1336259698Sdim    // used along the fast-isel path (not lowered), and downstream logic
1337259698Sdim    // also doesn't like a direct subreg copy on a physical reg.)
1338259698Sdim    } else if (RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32) {
1339259698Sdim      ResultReg = createResultReg(&PPC::GPRCRegClass);
1340259698Sdim      // Convert physical register from G8RC to GPRC.
1341259698Sdim      SourcePhysReg -= PPC::X0 - PPC::R0;
1342259698Sdim      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1343259698Sdim              TII.get(TargetOpcode::COPY), ResultReg)
1344259698Sdim        .addReg(SourcePhysReg);
1345259698Sdim    }
1346259698Sdim
1347259698Sdim    assert(ResultReg && "ResultReg unset!");
1348259698Sdim    UsedRegs.push_back(SourcePhysReg);
1349259698Sdim    UpdateValueMap(I, ResultReg);
1350259698Sdim  }
1351259698Sdim}
1352259698Sdim
1353259698Sdim// Attempt to fast-select a call instruction.
1354259698Sdimbool PPCFastISel::SelectCall(const Instruction *I) {
1355259698Sdim  const CallInst *CI = cast<CallInst>(I);
1356259698Sdim  const Value *Callee = CI->getCalledValue();
1357259698Sdim
1358259698Sdim  // Can't handle inline asm.
1359259698Sdim  if (isa<InlineAsm>(Callee))
1360259698Sdim    return false;
1361259698Sdim
1362259698Sdim  // Allow SelectionDAG isel to handle tail calls.
1363259698Sdim  if (CI->isTailCall())
1364259698Sdim    return false;
1365259698Sdim
1366259698Sdim  // Obtain calling convention.
1367259698Sdim  ImmutableCallSite CS(CI);
1368259698Sdim  CallingConv::ID CC = CS.getCallingConv();
1369259698Sdim
1370259698Sdim  PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
1371259698Sdim  FunctionType *FTy = cast<FunctionType>(PT->getElementType());
1372259698Sdim  bool IsVarArg = FTy->isVarArg();
1373259698Sdim
1374259698Sdim  // Not ready for varargs yet.
1375259698Sdim  if (IsVarArg)
1376259698Sdim    return false;
1377259698Sdim
1378259698Sdim  // Handle simple calls for now, with legal return types and
1379259698Sdim  // those that can be extended.
1380259698Sdim  Type *RetTy = I->getType();
1381259698Sdim  MVT RetVT;
1382259698Sdim  if (RetTy->isVoidTy())
1383259698Sdim    RetVT = MVT::isVoid;
1384259698Sdim  else if (!isTypeLegal(RetTy, RetVT) && RetVT != MVT::i16 &&
1385259698Sdim           RetVT != MVT::i8)
1386259698Sdim    return false;
1387259698Sdim
1388259698Sdim  // FIXME: No multi-register return values yet.
1389259698Sdim  if (RetVT != MVT::isVoid && RetVT != MVT::i8 && RetVT != MVT::i16 &&
1390259698Sdim      RetVT != MVT::i32 && RetVT != MVT::i64 && RetVT != MVT::f32 &&
1391259698Sdim      RetVT != MVT::f64) {
1392259698Sdim    SmallVector<CCValAssign, 16> RVLocs;
1393259698Sdim    CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, TM, RVLocs, *Context);
1394259698Sdim    CCInfo.AnalyzeCallResult(RetVT, RetCC_PPC64_ELF_FIS);
1395259698Sdim    if (RVLocs.size() > 1)
1396259698Sdim      return false;
1397259698Sdim  }
1398259698Sdim
1399259698Sdim  // Bail early if more than 8 arguments, as we only currently
1400259698Sdim  // handle arguments passed in registers.
1401259698Sdim  unsigned NumArgs = CS.arg_size();
1402259698Sdim  if (NumArgs > 8)
1403259698Sdim    return false;
1404259698Sdim
1405259698Sdim  // Set up the argument vectors.
1406259698Sdim  SmallVector<Value*, 8> Args;
1407259698Sdim  SmallVector<unsigned, 8> ArgRegs;
1408259698Sdim  SmallVector<MVT, 8> ArgVTs;
1409259698Sdim  SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
1410259698Sdim
1411259698Sdim  Args.reserve(NumArgs);
1412259698Sdim  ArgRegs.reserve(NumArgs);
1413259698Sdim  ArgVTs.reserve(NumArgs);
1414259698Sdim  ArgFlags.reserve(NumArgs);
1415259698Sdim
1416259698Sdim  for (ImmutableCallSite::arg_iterator II = CS.arg_begin(), IE = CS.arg_end();
1417259698Sdim       II != IE; ++II) {
1418259698Sdim    // FIXME: ARM does something for intrinsic calls here, check into that.
1419259698Sdim
1420259698Sdim    unsigned AttrIdx = II - CS.arg_begin() + 1;
1421259698Sdim
1422259698Sdim    // Only handle easy calls for now.  It would be reasonably easy
1423259698Sdim    // to handle <= 8-byte structures passed ByVal in registers, but we
1424259698Sdim    // have to ensure they are right-justified in the register.
1425259698Sdim    if (CS.paramHasAttr(AttrIdx, Attribute::InReg) ||
1426259698Sdim        CS.paramHasAttr(AttrIdx, Attribute::StructRet) ||
1427259698Sdim        CS.paramHasAttr(AttrIdx, Attribute::Nest) ||
1428259698Sdim        CS.paramHasAttr(AttrIdx, Attribute::ByVal))
1429259698Sdim      return false;
1430259698Sdim
1431259698Sdim    ISD::ArgFlagsTy Flags;
1432259698Sdim    if (CS.paramHasAttr(AttrIdx, Attribute::SExt))
1433259698Sdim      Flags.setSExt();
1434259698Sdim    if (CS.paramHasAttr(AttrIdx, Attribute::ZExt))
1435259698Sdim      Flags.setZExt();
1436259698Sdim
1437259698Sdim    Type *ArgTy = (*II)->getType();
1438259698Sdim    MVT ArgVT;
1439259698Sdim    if (!isTypeLegal(ArgTy, ArgVT) && ArgVT != MVT::i16 && ArgVT != MVT::i8)
1440259698Sdim      return false;
1441259698Sdim
1442259698Sdim    if (ArgVT.isVector())
1443259698Sdim      return false;
1444259698Sdim
1445259698Sdim    unsigned Arg = getRegForValue(*II);
1446259698Sdim    if (Arg == 0)
1447259698Sdim      return false;
1448259698Sdim
1449259698Sdim    unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy);
1450259698Sdim    Flags.setOrigAlign(OriginalAlignment);
1451259698Sdim
1452259698Sdim    Args.push_back(*II);
1453259698Sdim    ArgRegs.push_back(Arg);
1454259698Sdim    ArgVTs.push_back(ArgVT);
1455259698Sdim    ArgFlags.push_back(Flags);
1456259698Sdim  }
1457259698Sdim
1458259698Sdim  // Process the arguments.
1459259698Sdim  SmallVector<unsigned, 8> RegArgs;
1460259698Sdim  unsigned NumBytes;
1461259698Sdim
1462259698Sdim  if (!processCallArgs(Args, ArgRegs, ArgVTs, ArgFlags,
1463259698Sdim                       RegArgs, CC, NumBytes, IsVarArg))
1464259698Sdim    return false;
1465259698Sdim
1466259698Sdim  // FIXME: No handling for function pointers yet.  This requires
1467259698Sdim  // implementing the function descriptor (OPD) setup.
1468259698Sdim  const GlobalValue *GV = dyn_cast<GlobalValue>(Callee);
1469259698Sdim  if (!GV)
1470259698Sdim    return false;
1471259698Sdim
1472259698Sdim  // Build direct call with NOP for TOC restore.
1473259698Sdim  // FIXME: We can and should optimize away the NOP for local calls.
1474259698Sdim  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1475259698Sdim                                    TII.get(PPC::BL8_NOP));
1476259698Sdim  // Add callee.
1477259698Sdim  MIB.addGlobalAddress(GV);
1478259698Sdim
1479259698Sdim  // Add implicit physical register uses to the call.
1480259698Sdim  for (unsigned II = 0, IE = RegArgs.size(); II != IE; ++II)
1481259698Sdim    MIB.addReg(RegArgs[II], RegState::Implicit);
1482259698Sdim
1483259698Sdim  // Add a register mask with the call-preserved registers.  Proper
1484259698Sdim  // defs for return values will be added by setPhysRegsDeadExcept().
1485259698Sdim  MIB.addRegMask(TRI.getCallPreservedMask(CC));
1486259698Sdim
1487259698Sdim  // Finish off the call including any return values.
1488259698Sdim  SmallVector<unsigned, 4> UsedRegs;
1489259698Sdim  finishCall(RetVT, UsedRegs, I, CC, NumBytes, IsVarArg);
1490259698Sdim
1491259698Sdim  // Set all unused physregs defs as dead.
1492259698Sdim  static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
1493259698Sdim
1494259698Sdim  return true;
1495259698Sdim}
1496259698Sdim
1497259698Sdim// Attempt to fast-select a return instruction.
1498259698Sdimbool PPCFastISel::SelectRet(const Instruction *I) {
1499259698Sdim
1500259698Sdim  if (!FuncInfo.CanLowerReturn)
1501259698Sdim    return false;
1502259698Sdim
1503259698Sdim  const ReturnInst *Ret = cast<ReturnInst>(I);
1504259698Sdim  const Function &F = *I->getParent()->getParent();
1505259698Sdim
1506259698Sdim  // Build a list of return value registers.
1507259698Sdim  SmallVector<unsigned, 4> RetRegs;
1508259698Sdim  CallingConv::ID CC = F.getCallingConv();
1509259698Sdim
1510259698Sdim  if (Ret->getNumOperands() > 0) {
1511259698Sdim    SmallVector<ISD::OutputArg, 4> Outs;
1512259698Sdim    GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI);
1513259698Sdim
1514259698Sdim    // Analyze operands of the call, assigning locations to each operand.
1515259698Sdim    SmallVector<CCValAssign, 16> ValLocs;
1516259698Sdim    CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs, *Context);
1517259698Sdim    CCInfo.AnalyzeReturn(Outs, RetCC_PPC64_ELF_FIS);
1518259698Sdim    const Value *RV = Ret->getOperand(0);
1519259698Sdim
1520259698Sdim    // FIXME: Only one output register for now.
1521259698Sdim    if (ValLocs.size() > 1)
1522259698Sdim      return false;
1523259698Sdim
1524259698Sdim    // Special case for returning a constant integer of any size.
1525259698Sdim    // Materialize the constant as an i64 and copy it to the return
1526259698Sdim    // register.  This avoids an unnecessary extend or truncate.
1527259698Sdim    if (isa<ConstantInt>(*RV)) {
1528259698Sdim      const Constant *C = cast<Constant>(RV);
1529259698Sdim      unsigned SrcReg = PPCMaterializeInt(C, MVT::i64);
1530259698Sdim      unsigned RetReg = ValLocs[0].getLocReg();
1531259698Sdim      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
1532259698Sdim              RetReg).addReg(SrcReg);
1533259698Sdim      RetRegs.push_back(RetReg);
1534259698Sdim
1535259698Sdim    } else {
1536259698Sdim      unsigned Reg = getRegForValue(RV);
1537259698Sdim
1538259698Sdim      if (Reg == 0)
1539259698Sdim        return false;
1540259698Sdim
1541259698Sdim      // Copy the result values into the output registers.
1542259698Sdim      for (unsigned i = 0; i < ValLocs.size(); ++i) {
1543259698Sdim
1544259698Sdim        CCValAssign &VA = ValLocs[i];
1545259698Sdim        assert(VA.isRegLoc() && "Can only return in registers!");
1546259698Sdim        RetRegs.push_back(VA.getLocReg());
1547259698Sdim        unsigned SrcReg = Reg + VA.getValNo();
1548259698Sdim
1549259698Sdim        EVT RVEVT = TLI.getValueType(RV->getType());
1550259698Sdim        if (!RVEVT.isSimple())
1551259698Sdim          return false;
1552259698Sdim        MVT RVVT = RVEVT.getSimpleVT();
1553259698Sdim        MVT DestVT = VA.getLocVT();
1554259698Sdim
1555259698Sdim        if (RVVT != DestVT && RVVT != MVT::i8 &&
1556259698Sdim            RVVT != MVT::i16 && RVVT != MVT::i32)
1557259698Sdim          return false;
1558259698Sdim
1559259698Sdim        if (RVVT != DestVT) {
1560259698Sdim          switch (VA.getLocInfo()) {
1561259698Sdim            default:
1562259698Sdim              llvm_unreachable("Unknown loc info!");
1563259698Sdim            case CCValAssign::Full:
1564259698Sdim              llvm_unreachable("Full value assign but types don't match?");
1565259698Sdim            case CCValAssign::AExt:
1566259698Sdim            case CCValAssign::ZExt: {
1567259698Sdim              const TargetRegisterClass *RC =
1568259698Sdim                (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
1569259698Sdim              unsigned TmpReg = createResultReg(RC);
1570259698Sdim              if (!PPCEmitIntExt(RVVT, SrcReg, DestVT, TmpReg, true))
1571259698Sdim                return false;
1572259698Sdim              SrcReg = TmpReg;
1573259698Sdim              break;
1574259698Sdim            }
1575259698Sdim            case CCValAssign::SExt: {
1576259698Sdim              const TargetRegisterClass *RC =
1577259698Sdim                (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
1578259698Sdim              unsigned TmpReg = createResultReg(RC);
1579259698Sdim              if (!PPCEmitIntExt(RVVT, SrcReg, DestVT, TmpReg, false))
1580259698Sdim                return false;
1581259698Sdim              SrcReg = TmpReg;
1582259698Sdim              break;
1583259698Sdim            }
1584259698Sdim          }
1585259698Sdim        }
1586259698Sdim
1587259698Sdim        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1588259698Sdim                TII.get(TargetOpcode::COPY), RetRegs[i])
1589259698Sdim          .addReg(SrcReg);
1590259698Sdim      }
1591259698Sdim    }
1592259698Sdim  }
1593259698Sdim
1594259698Sdim  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1595259698Sdim                                    TII.get(PPC::BLR));
1596259698Sdim
1597259698Sdim  for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
1598259698Sdim    MIB.addReg(RetRegs[i], RegState::Implicit);
1599259698Sdim
1600259698Sdim  return true;
1601259698Sdim}
1602259698Sdim
1603259698Sdim// Attempt to emit an integer extend of SrcReg into DestReg.  Both
1604259698Sdim// signed and zero extensions are supported.  Return false if we
1605259698Sdim// can't handle it.
1606259698Sdimbool PPCFastISel::PPCEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
1607259698Sdim                                unsigned DestReg, bool IsZExt) {
1608259698Sdim  if (DestVT != MVT::i32 && DestVT != MVT::i64)
1609259698Sdim    return false;
1610259698Sdim  if (SrcVT != MVT::i8 && SrcVT != MVT::i16 && SrcVT != MVT::i32)
1611259698Sdim    return false;
1612259698Sdim
1613259698Sdim  // Signed extensions use EXTSB, EXTSH, EXTSW.
1614259698Sdim  if (!IsZExt) {
1615259698Sdim    unsigned Opc;
1616259698Sdim    if (SrcVT == MVT::i8)
1617259698Sdim      Opc = (DestVT == MVT::i32) ? PPC::EXTSB : PPC::EXTSB8_32_64;
1618259698Sdim    else if (SrcVT == MVT::i16)
1619259698Sdim      Opc = (DestVT == MVT::i32) ? PPC::EXTSH : PPC::EXTSH8_32_64;
1620259698Sdim    else {
1621259698Sdim      assert(DestVT == MVT::i64 && "Signed extend from i32 to i32??");
1622259698Sdim      Opc = PPC::EXTSW_32_64;
1623259698Sdim    }
1624259698Sdim    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
1625259698Sdim      .addReg(SrcReg);
1626259698Sdim
1627259698Sdim  // Unsigned 32-bit extensions use RLWINM.
1628259698Sdim  } else if (DestVT == MVT::i32) {
1629259698Sdim    unsigned MB;
1630259698Sdim    if (SrcVT == MVT::i8)
1631259698Sdim      MB = 24;
1632259698Sdim    else {
1633259698Sdim      assert(SrcVT == MVT::i16 && "Unsigned extend from i32 to i32??");
1634259698Sdim      MB = 16;
1635259698Sdim    }
1636259698Sdim    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::RLWINM),
1637259698Sdim            DestReg)
1638259698Sdim      .addReg(SrcReg).addImm(/*SH=*/0).addImm(MB).addImm(/*ME=*/31);
1639259698Sdim
1640259698Sdim  // Unsigned 64-bit extensions use RLDICL (with a 32-bit source).
1641259698Sdim  } else {
1642259698Sdim    unsigned MB;
1643259698Sdim    if (SrcVT == MVT::i8)
1644259698Sdim      MB = 56;
1645259698Sdim    else if (SrcVT == MVT::i16)
1646259698Sdim      MB = 48;
1647259698Sdim    else
1648259698Sdim      MB = 32;
1649259698Sdim    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1650259698Sdim            TII.get(PPC::RLDICL_32_64), DestReg)
1651259698Sdim      .addReg(SrcReg).addImm(/*SH=*/0).addImm(MB);
1652259698Sdim  }
1653259698Sdim
1654259698Sdim  return true;
1655259698Sdim}
1656259698Sdim
1657259698Sdim// Attempt to fast-select an indirect branch instruction.
1658259698Sdimbool PPCFastISel::SelectIndirectBr(const Instruction *I) {
1659259698Sdim  unsigned AddrReg = getRegForValue(I->getOperand(0));
1660259698Sdim  if (AddrReg == 0)
1661259698Sdim    return false;
1662259698Sdim
1663259698Sdim  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::MTCTR8))
1664259698Sdim    .addReg(AddrReg);
1665259698Sdim  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::BCTR8));
1666259698Sdim
1667259698Sdim  const IndirectBrInst *IB = cast<IndirectBrInst>(I);
1668259698Sdim  for (unsigned i = 0, e = IB->getNumSuccessors(); i != e; ++i)
1669259698Sdim    FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[IB->getSuccessor(i)]);
1670259698Sdim
1671259698Sdim  return true;
1672259698Sdim}
1673259698Sdim
1674259698Sdim// Attempt to fast-select an integer truncate instruction.
1675259698Sdimbool PPCFastISel::SelectTrunc(const Instruction *I) {
1676259698Sdim  Value *Src  = I->getOperand(0);
1677259698Sdim  EVT SrcVT  = TLI.getValueType(Src->getType(), true);
1678259698Sdim  EVT DestVT = TLI.getValueType(I->getType(), true);
1679259698Sdim
1680259698Sdim  if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16)
1681259698Sdim    return false;
1682259698Sdim
1683259698Sdim  if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8)
1684259698Sdim    return false;
1685259698Sdim
1686259698Sdim  unsigned SrcReg = getRegForValue(Src);
1687259698Sdim  if (!SrcReg)
1688259698Sdim    return false;
1689259698Sdim
1690259698Sdim  // The only interesting case is when we need to switch register classes.
1691259698Sdim  if (SrcVT == MVT::i64) {
1692259698Sdim    unsigned ResultReg = createResultReg(&PPC::GPRCRegClass);
1693259698Sdim    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
1694259698Sdim            ResultReg).addReg(SrcReg, 0, PPC::sub_32);
1695259698Sdim    SrcReg = ResultReg;
1696259698Sdim  }
1697259698Sdim
1698259698Sdim  UpdateValueMap(I, SrcReg);
1699259698Sdim  return true;
1700259698Sdim}
1701259698Sdim
1702259698Sdim// Attempt to fast-select an integer extend instruction.
1703259698Sdimbool PPCFastISel::SelectIntExt(const Instruction *I) {
1704259698Sdim  Type *DestTy = I->getType();
1705259698Sdim  Value *Src = I->getOperand(0);
1706259698Sdim  Type *SrcTy = Src->getType();
1707259698Sdim
1708259698Sdim  bool IsZExt = isa<ZExtInst>(I);
1709259698Sdim  unsigned SrcReg = getRegForValue(Src);
1710259698Sdim  if (!SrcReg) return false;
1711259698Sdim
1712259698Sdim  EVT SrcEVT, DestEVT;
1713259698Sdim  SrcEVT = TLI.getValueType(SrcTy, true);
1714259698Sdim  DestEVT = TLI.getValueType(DestTy, true);
1715259698Sdim  if (!SrcEVT.isSimple())
1716259698Sdim    return false;
1717259698Sdim  if (!DestEVT.isSimple())
1718259698Sdim    return false;
1719259698Sdim
1720259698Sdim  MVT SrcVT = SrcEVT.getSimpleVT();
1721259698Sdim  MVT DestVT = DestEVT.getSimpleVT();
1722259698Sdim
1723259698Sdim  // If we know the register class needed for the result of this
1724259698Sdim  // instruction, use it.  Otherwise pick the register class of the
1725259698Sdim  // correct size that does not contain X0/R0, since we don't know
1726259698Sdim  // whether downstream uses permit that assignment.
1727259698Sdim  unsigned AssignedReg = FuncInfo.ValueMap[I];
1728259698Sdim  const TargetRegisterClass *RC =
1729259698Sdim    (AssignedReg ? MRI.getRegClass(AssignedReg) :
1730259698Sdim     (DestVT == MVT::i64 ? &PPC::G8RC_and_G8RC_NOX0RegClass :
1731259698Sdim      &PPC::GPRC_and_GPRC_NOR0RegClass));
1732259698Sdim  unsigned ResultReg = createResultReg(RC);
1733259698Sdim
1734259698Sdim  if (!PPCEmitIntExt(SrcVT, SrcReg, DestVT, ResultReg, IsZExt))
1735259698Sdim    return false;
1736259698Sdim
1737259698Sdim  UpdateValueMap(I, ResultReg);
1738259698Sdim  return true;
1739259698Sdim}
1740259698Sdim
1741259698Sdim// Attempt to fast-select an instruction that wasn't handled by
1742259698Sdim// the table-generated machinery.
1743259698Sdimbool PPCFastISel::TargetSelectInstruction(const Instruction *I) {
1744259698Sdim
1745259698Sdim  switch (I->getOpcode()) {
1746259698Sdim    case Instruction::Load:
1747259698Sdim      return SelectLoad(I);
1748259698Sdim    case Instruction::Store:
1749259698Sdim      return SelectStore(I);
1750259698Sdim    case Instruction::Br:
1751259698Sdim      return SelectBranch(I);
1752259698Sdim    case Instruction::IndirectBr:
1753259698Sdim      return SelectIndirectBr(I);
1754259698Sdim    case Instruction::FPExt:
1755259698Sdim      return SelectFPExt(I);
1756259698Sdim    case Instruction::FPTrunc:
1757259698Sdim      return SelectFPTrunc(I);
1758259698Sdim    case Instruction::SIToFP:
1759259698Sdim      return SelectIToFP(I, /*IsSigned*/ true);
1760259698Sdim    case Instruction::UIToFP:
1761259698Sdim      return SelectIToFP(I, /*IsSigned*/ false);
1762259698Sdim    case Instruction::FPToSI:
1763259698Sdim      return SelectFPToI(I, /*IsSigned*/ true);
1764259698Sdim    case Instruction::FPToUI:
1765259698Sdim      return SelectFPToI(I, /*IsSigned*/ false);
1766259698Sdim    case Instruction::Add:
1767259698Sdim      return SelectBinaryIntOp(I, ISD::ADD);
1768259698Sdim    case Instruction::Or:
1769259698Sdim      return SelectBinaryIntOp(I, ISD::OR);
1770259698Sdim    case Instruction::Sub:
1771259698Sdim      return SelectBinaryIntOp(I, ISD::SUB);
1772259698Sdim    case Instruction::Call:
1773259698Sdim      if (dyn_cast<IntrinsicInst>(I))
1774259698Sdim        return false;
1775259698Sdim      return SelectCall(I);
1776259698Sdim    case Instruction::Ret:
1777259698Sdim      return SelectRet(I);
1778259698Sdim    case Instruction::Trunc:
1779259698Sdim      return SelectTrunc(I);
1780259698Sdim    case Instruction::ZExt:
1781259698Sdim    case Instruction::SExt:
1782259698Sdim      return SelectIntExt(I);
1783259698Sdim    // Here add other flavors of Instruction::XXX that automated
1784259698Sdim    // cases don't catch.  For example, switches are terminators
1785259698Sdim    // that aren't yet handled.
1786259698Sdim    default:
1787259698Sdim      break;
1788259698Sdim  }
1789259698Sdim  return false;
1790259698Sdim}
1791259698Sdim
1792259698Sdim// Materialize a floating-point constant into a register, and return
1793259698Sdim// the register number (or zero if we failed to handle it).
1794259698Sdimunsigned PPCFastISel::PPCMaterializeFP(const ConstantFP *CFP, MVT VT) {
1795259698Sdim  // No plans to handle long double here.
1796259698Sdim  if (VT != MVT::f32 && VT != MVT::f64)
1797259698Sdim    return 0;
1798259698Sdim
1799259698Sdim  // All FP constants are loaded from the constant pool.
1800259698Sdim  unsigned Align = TD.getPrefTypeAlignment(CFP->getType());
1801259698Sdim  assert(Align > 0 && "Unexpectedly missing alignment information!");
1802259698Sdim  unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
1803259698Sdim  unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
1804259698Sdim  CodeModel::Model CModel = TM.getCodeModel();
1805259698Sdim
1806259698Sdim  MachineMemOperand *MMO =
1807259698Sdim    FuncInfo.MF->getMachineMemOperand(
1808259698Sdim      MachinePointerInfo::getConstantPool(), MachineMemOperand::MOLoad,
1809259698Sdim      (VT == MVT::f32) ? 4 : 8, Align);
1810259698Sdim
1811259698Sdim  unsigned Opc = (VT == MVT::f32) ? PPC::LFS : PPC::LFD;
1812259698Sdim  unsigned TmpReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
1813259698Sdim
1814259698Sdim  // For small code model, generate a LF[SD](0, LDtocCPT(Idx, X2)).
1815259698Sdim  if (CModel == CodeModel::Small || CModel == CodeModel::JITDefault) {
1816259698Sdim    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::LDtocCPT),
1817259698Sdim            TmpReg)
1818259698Sdim      .addConstantPoolIndex(Idx).addReg(PPC::X2);
1819259698Sdim    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
1820259698Sdim      .addImm(0).addReg(TmpReg).addMemOperand(MMO);
1821259698Sdim  } else {
1822259698Sdim    // Otherwise we generate LF[SD](Idx[lo], ADDIStocHA(X2, Idx)).
1823259698Sdim    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::ADDIStocHA),
1824259698Sdim            TmpReg).addReg(PPC::X2).addConstantPoolIndex(Idx);
1825259698Sdim    // But for large code model, we must generate a LDtocL followed
1826259698Sdim    // by the LF[SD].
1827259698Sdim    if (CModel == CodeModel::Large) {
1828259698Sdim      unsigned TmpReg2 = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
1829259698Sdim      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::LDtocL),
1830259698Sdim              TmpReg2).addConstantPoolIndex(Idx).addReg(TmpReg);
1831259698Sdim      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
1832259698Sdim        .addImm(0).addReg(TmpReg2);
1833259698Sdim    } else
1834259698Sdim      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
1835259698Sdim        .addConstantPoolIndex(Idx, 0, PPCII::MO_TOC_LO)
1836259698Sdim        .addReg(TmpReg)
1837259698Sdim        .addMemOperand(MMO);
1838259698Sdim  }
1839259698Sdim
1840259698Sdim  return DestReg;
1841259698Sdim}
1842259698Sdim
1843259698Sdim// Materialize the address of a global value into a register, and return
1844259698Sdim// the register number (or zero if we failed to handle it).
1845259698Sdimunsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) {
1846259698Sdim  assert(VT == MVT::i64 && "Non-address!");
1847259698Sdim  const TargetRegisterClass *RC = &PPC::G8RC_and_G8RC_NOX0RegClass;
1848259698Sdim  unsigned DestReg = createResultReg(RC);
1849259698Sdim
1850259698Sdim  // Global values may be plain old object addresses, TLS object
1851259698Sdim  // addresses, constant pool entries, or jump tables.  How we generate
1852259698Sdim  // code for these may depend on small, medium, or large code model.
1853259698Sdim  CodeModel::Model CModel = TM.getCodeModel();
1854259698Sdim
1855259698Sdim  // FIXME: Jump tables are not yet required because fast-isel doesn't
1856259698Sdim  // handle switches; if that changes, we need them as well.  For now,
1857259698Sdim  // what follows assumes everything's a generic (or TLS) global address.
1858259698Sdim  const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
1859259698Sdim  if (!GVar) {
1860259698Sdim    // If GV is an alias, use the aliasee for determining thread-locality.
1861259698Sdim    if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
1862259698Sdim      GVar = dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal(false));
1863259698Sdim  }
1864259698Sdim
1865259698Sdim  // FIXME: We don't yet handle the complexity of TLS.
1866259698Sdim  bool IsTLS = GVar && GVar->isThreadLocal();
1867259698Sdim  if (IsTLS)
1868259698Sdim    return 0;
1869259698Sdim
1870259698Sdim  // For small code model, generate a simple TOC load.
1871259698Sdim  if (CModel == CodeModel::Small || CModel == CodeModel::JITDefault)
1872259698Sdim    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::LDtoc), DestReg)
1873259698Sdim      .addGlobalAddress(GV).addReg(PPC::X2);
1874259698Sdim  else {
1875259698Sdim    // If the address is an externally defined symbol, a symbol with
1876259698Sdim    // common or externally available linkage, a function address, or a
1877259698Sdim    // jump table address (not yet needed), or if we are generating code
1878259698Sdim    // for large code model, we generate:
1879259698Sdim    //       LDtocL(GV, ADDIStocHA(%X2, GV))
1880259698Sdim    // Otherwise we generate:
1881259698Sdim    //       ADDItocL(ADDIStocHA(%X2, GV), GV)
1882259698Sdim    // Either way, start with the ADDIStocHA:
1883259698Sdim    unsigned HighPartReg = createResultReg(RC);
1884259698Sdim    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::ADDIStocHA),
1885259698Sdim            HighPartReg).addReg(PPC::X2).addGlobalAddress(GV);
1886259698Sdim
1887259698Sdim    // !GVar implies a function address.  An external variable is one
1888259698Sdim    // without an initializer.
1889259698Sdim    // If/when switches are implemented, jump tables should be handled
1890259698Sdim    // on the "if" path here.
1891259698Sdim    if (CModel == CodeModel::Large || !GVar || !GVar->hasInitializer() ||
1892259698Sdim        GVar->hasCommonLinkage() || GVar->hasAvailableExternallyLinkage())
1893259698Sdim      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::LDtocL),
1894259698Sdim              DestReg).addGlobalAddress(GV).addReg(HighPartReg);
1895259698Sdim    else
1896259698Sdim      // Otherwise generate the ADDItocL.
1897259698Sdim      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::ADDItocL),
1898259698Sdim              DestReg).addReg(HighPartReg).addGlobalAddress(GV);
1899259698Sdim  }
1900259698Sdim
1901259698Sdim  return DestReg;
1902259698Sdim}
1903259698Sdim
1904259698Sdim// Materialize a 32-bit integer constant into a register, and return
1905259698Sdim// the register number (or zero if we failed to handle it).
1906259698Sdimunsigned PPCFastISel::PPCMaterialize32BitInt(int64_t Imm,
1907259698Sdim                                             const TargetRegisterClass *RC) {
1908259698Sdim  unsigned Lo = Imm & 0xFFFF;
1909259698Sdim  unsigned Hi = (Imm >> 16) & 0xFFFF;
1910259698Sdim
1911259698Sdim  unsigned ResultReg = createResultReg(RC);
1912259698Sdim  bool IsGPRC = RC->hasSuperClassEq(&PPC::GPRCRegClass);
1913259698Sdim
1914259698Sdim  if (isInt<16>(Imm))
1915259698Sdim    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1916259698Sdim            TII.get(IsGPRC ? PPC::LI : PPC::LI8), ResultReg)
1917259698Sdim      .addImm(Imm);
1918259698Sdim  else if (Lo) {
1919259698Sdim    // Both Lo and Hi have nonzero bits.
1920259698Sdim    unsigned TmpReg = createResultReg(RC);
1921259698Sdim    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1922259698Sdim            TII.get(IsGPRC ? PPC::LIS : PPC::LIS8), TmpReg)
1923259698Sdim      .addImm(Hi);
1924259698Sdim    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1925259698Sdim            TII.get(IsGPRC ? PPC::ORI : PPC::ORI8), ResultReg)
1926259698Sdim      .addReg(TmpReg).addImm(Lo);
1927259698Sdim  } else
1928259698Sdim    // Just Hi bits.
1929259698Sdim    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1930259698Sdim            TII.get(IsGPRC ? PPC::LIS : PPC::LIS8), ResultReg)
1931259698Sdim      .addImm(Hi);
1932259698Sdim
1933259698Sdim  return ResultReg;
1934259698Sdim}
1935259698Sdim
1936259698Sdim// Materialize a 64-bit integer constant into a register, and return
1937259698Sdim// the register number (or zero if we failed to handle it).
1938259698Sdimunsigned PPCFastISel::PPCMaterialize64BitInt(int64_t Imm,
1939259698Sdim                                             const TargetRegisterClass *RC) {
1940259698Sdim  unsigned Remainder = 0;
1941259698Sdim  unsigned Shift = 0;
1942259698Sdim
1943259698Sdim  // If the value doesn't fit in 32 bits, see if we can shift it
1944259698Sdim  // so that it fits in 32 bits.
1945259698Sdim  if (!isInt<32>(Imm)) {
1946259698Sdim    Shift = countTrailingZeros<uint64_t>(Imm);
1947259698Sdim    int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift;
1948259698Sdim
1949259698Sdim    if (isInt<32>(ImmSh))
1950259698Sdim      Imm = ImmSh;
1951259698Sdim    else {
1952259698Sdim      Remainder = Imm;
1953259698Sdim      Shift = 32;
1954259698Sdim      Imm >>= 32;
1955259698Sdim    }
1956259698Sdim  }
1957259698Sdim
1958259698Sdim  // Handle the high-order 32 bits (if shifted) or the whole 32 bits
1959259698Sdim  // (if not shifted).
1960259698Sdim  unsigned TmpReg1 = PPCMaterialize32BitInt(Imm, RC);
1961259698Sdim  if (!Shift)
1962259698Sdim    return TmpReg1;
1963259698Sdim
1964259698Sdim  // If upper 32 bits were not zero, we've built them and need to shift
1965259698Sdim  // them into place.
1966259698Sdim  unsigned TmpReg2;
1967259698Sdim  if (Imm) {
1968259698Sdim    TmpReg2 = createResultReg(RC);
1969259698Sdim    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::RLDICR),
1970259698Sdim            TmpReg2).addReg(TmpReg1).addImm(Shift).addImm(63 - Shift);
1971259698Sdim  } else
1972259698Sdim    TmpReg2 = TmpReg1;
1973259698Sdim
1974259698Sdim  unsigned TmpReg3, Hi, Lo;
1975259698Sdim  if ((Hi = (Remainder >> 16) & 0xFFFF)) {
1976259698Sdim    TmpReg3 = createResultReg(RC);
1977259698Sdim    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::ORIS8),
1978259698Sdim            TmpReg3).addReg(TmpReg2).addImm(Hi);
1979259698Sdim  } else
1980259698Sdim    TmpReg3 = TmpReg2;
1981259698Sdim
1982259698Sdim  if ((Lo = Remainder & 0xFFFF)) {
1983259698Sdim    unsigned ResultReg = createResultReg(RC);
1984259698Sdim    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::ORI8),
1985259698Sdim            ResultReg).addReg(TmpReg3).addImm(Lo);
1986259698Sdim    return ResultReg;
1987259698Sdim  }
1988259698Sdim
1989259698Sdim  return TmpReg3;
1990259698Sdim}
1991259698Sdim
1992259698Sdim
1993259698Sdim// Materialize an integer constant into a register, and return
1994259698Sdim// the register number (or zero if we failed to handle it).
1995259698Sdimunsigned PPCFastISel::PPCMaterializeInt(const Constant *C, MVT VT) {
1996259698Sdim
1997259698Sdim  if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 &&
1998259698Sdim      VT != MVT::i8 && VT != MVT::i1)
1999259698Sdim    return 0;
2000259698Sdim
2001259698Sdim  const TargetRegisterClass *RC = ((VT == MVT::i64) ? &PPC::G8RCRegClass :
2002259698Sdim                                   &PPC::GPRCRegClass);
2003259698Sdim
2004259698Sdim  // If the constant is in range, use a load-immediate.
2005259698Sdim  const ConstantInt *CI = cast<ConstantInt>(C);
2006259698Sdim  if (isInt<16>(CI->getSExtValue())) {
2007259698Sdim    unsigned Opc = (VT == MVT::i64) ? PPC::LI8 : PPC::LI;
2008259698Sdim    unsigned ImmReg = createResultReg(RC);
2009259698Sdim    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ImmReg)
2010259698Sdim      .addImm(CI->getSExtValue());
2011259698Sdim    return ImmReg;
2012259698Sdim  }
2013259698Sdim
2014259698Sdim  // Construct the constant piecewise.
2015259698Sdim  int64_t Imm = CI->getZExtValue();
2016259698Sdim
2017259698Sdim  if (VT == MVT::i64)
2018259698Sdim    return PPCMaterialize64BitInt(Imm, RC);
2019259698Sdim  else if (VT == MVT::i32)
2020259698Sdim    return PPCMaterialize32BitInt(Imm, RC);
2021259698Sdim
2022259698Sdim  return 0;
2023259698Sdim}
2024259698Sdim
2025259698Sdim// Materialize a constant into a register, and return the register
2026259698Sdim// number (or zero if we failed to handle it).
2027259698Sdimunsigned PPCFastISel::TargetMaterializeConstant(const Constant *C) {
2028259698Sdim  EVT CEVT = TLI.getValueType(C->getType(), true);
2029259698Sdim
2030259698Sdim  // Only handle simple types.
2031259698Sdim  if (!CEVT.isSimple()) return 0;
2032259698Sdim  MVT VT = CEVT.getSimpleVT();
2033259698Sdim
2034259698Sdim  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
2035259698Sdim    return PPCMaterializeFP(CFP, VT);
2036259698Sdim  else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
2037259698Sdim    return PPCMaterializeGV(GV, VT);
2038259698Sdim  else if (isa<ConstantInt>(C))
2039259698Sdim    return PPCMaterializeInt(C, VT);
2040259698Sdim
2041259698Sdim  return 0;
2042259698Sdim}
2043259698Sdim
2044259698Sdim// Materialize the address created by an alloca into a register, and
2045259698Sdim// return the register number (or zero if we failed to handle it).
2046259698Sdimunsigned PPCFastISel::TargetMaterializeAlloca(const AllocaInst *AI) {
2047259698Sdim  // Don't handle dynamic allocas.
2048259698Sdim  if (!FuncInfo.StaticAllocaMap.count(AI)) return 0;
2049259698Sdim
2050259698Sdim  MVT VT;
2051259698Sdim  if (!isLoadTypeLegal(AI->getType(), VT)) return 0;
2052259698Sdim
2053259698Sdim  DenseMap<const AllocaInst*, int>::iterator SI =
2054259698Sdim    FuncInfo.StaticAllocaMap.find(AI);
2055259698Sdim
2056259698Sdim  if (SI != FuncInfo.StaticAllocaMap.end()) {
2057259698Sdim    unsigned ResultReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
2058259698Sdim    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::ADDI8),
2059259698Sdim            ResultReg).addFrameIndex(SI->second).addImm(0);
2060259698Sdim    return ResultReg;
2061259698Sdim  }
2062259698Sdim
2063259698Sdim  return 0;
2064259698Sdim}
2065259698Sdim
2066259698Sdim// Fold loads into extends when possible.
2067259698Sdim// FIXME: We can have multiple redundant extend/trunc instructions
2068259698Sdim// following a load.  The folding only picks up one.  Extend this
2069259698Sdim// to check subsequent instructions for the same pattern and remove
2070259698Sdim// them.  Thus ResultReg should be the def reg for the last redundant
2071259698Sdim// instruction in a chain, and all intervening instructions can be
2072259698Sdim// removed from parent.  Change test/CodeGen/PowerPC/fast-isel-fold.ll
2073259698Sdim// to add ELF64-NOT: rldicl to the appropriate tests when this works.
2074259698Sdimbool PPCFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
2075259698Sdim                                      const LoadInst *LI) {
2076259698Sdim  // Verify we have a legal type before going any further.
2077259698Sdim  MVT VT;
2078259698Sdim  if (!isLoadTypeLegal(LI->getType(), VT))
2079259698Sdim    return false;
2080259698Sdim
2081259698Sdim  // Combine load followed by zero- or sign-extend.
2082259698Sdim  bool IsZExt = false;
2083259698Sdim  switch(MI->getOpcode()) {
2084259698Sdim    default:
2085259698Sdim      return false;
2086259698Sdim
2087259698Sdim    case PPC::RLDICL:
2088259698Sdim    case PPC::RLDICL_32_64: {
2089259698Sdim      IsZExt = true;
2090259698Sdim      unsigned MB = MI->getOperand(3).getImm();
2091259698Sdim      if ((VT == MVT::i8 && MB <= 56) ||
2092259698Sdim          (VT == MVT::i16 && MB <= 48) ||
2093259698Sdim          (VT == MVT::i32 && MB <= 32))
2094259698Sdim        break;
2095259698Sdim      return false;
2096259698Sdim    }
2097259698Sdim
2098259698Sdim    case PPC::RLWINM:
2099259698Sdim    case PPC::RLWINM8: {
2100259698Sdim      IsZExt = true;
2101259698Sdim      unsigned MB = MI->getOperand(3).getImm();
2102259698Sdim      if ((VT == MVT::i8 && MB <= 24) ||
2103259698Sdim          (VT == MVT::i16 && MB <= 16))
2104259698Sdim        break;
2105259698Sdim      return false;
2106259698Sdim    }
2107259698Sdim
2108259698Sdim    case PPC::EXTSB:
2109259698Sdim    case PPC::EXTSB8:
2110259698Sdim    case PPC::EXTSB8_32_64:
2111259698Sdim      /* There is no sign-extending load-byte instruction. */
2112259698Sdim      return false;
2113259698Sdim
2114259698Sdim    case PPC::EXTSH:
2115259698Sdim    case PPC::EXTSH8:
2116259698Sdim    case PPC::EXTSH8_32_64: {
2117259698Sdim      if (VT != MVT::i16 && VT != MVT::i8)
2118259698Sdim        return false;
2119259698Sdim      break;
2120259698Sdim    }
2121259698Sdim
2122259698Sdim    case PPC::EXTSW:
2123259698Sdim    case PPC::EXTSW_32_64: {
2124259698Sdim      if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8)
2125259698Sdim        return false;
2126259698Sdim      break;
2127259698Sdim    }
2128259698Sdim  }
2129259698Sdim
2130259698Sdim  // See if we can handle this address.
2131259698Sdim  Address Addr;
2132259698Sdim  if (!PPCComputeAddress(LI->getOperand(0), Addr))
2133259698Sdim    return false;
2134259698Sdim
2135259698Sdim  unsigned ResultReg = MI->getOperand(0).getReg();
2136259698Sdim
2137259698Sdim  if (!PPCEmitLoad(VT, ResultReg, Addr, 0, IsZExt))
2138259698Sdim    return false;
2139259698Sdim
2140259698Sdim  MI->eraseFromParent();
2141259698Sdim  return true;
2142259698Sdim}
2143259698Sdim
2144259698Sdim// Attempt to lower call arguments in a faster way than done by
2145259698Sdim// the selection DAG code.
2146259698Sdimbool PPCFastISel::FastLowerArguments() {
2147259698Sdim  // Defer to normal argument lowering for now.  It's reasonably
2148259698Sdim  // efficient.  Consider doing something like ARM to handle the
2149259698Sdim  // case where all args fit in registers, no varargs, no float
2150259698Sdim  // or vector args.
2151259698Sdim  return false;
2152259698Sdim}
2153259698Sdim
2154259698Sdim// Handle materializing integer constants into a register.  This is not
2155259698Sdim// automatically generated for PowerPC, so must be explicitly created here.
2156259698Sdimunsigned PPCFastISel::FastEmit_i(MVT Ty, MVT VT, unsigned Opc, uint64_t Imm) {
2157259698Sdim
2158259698Sdim  if (Opc != ISD::Constant)
2159259698Sdim    return 0;
2160259698Sdim
2161259698Sdim  if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 &&
2162259698Sdim      VT != MVT::i8 && VT != MVT::i1)
2163259698Sdim    return 0;
2164259698Sdim
2165259698Sdim  const TargetRegisterClass *RC = ((VT == MVT::i64) ? &PPC::G8RCRegClass :
2166259698Sdim                                   &PPC::GPRCRegClass);
2167259698Sdim  if (VT == MVT::i64)
2168259698Sdim    return PPCMaterialize64BitInt(Imm, RC);
2169259698Sdim  else
2170259698Sdim    return PPCMaterialize32BitInt(Imm, RC);
2171259698Sdim}
2172259698Sdim
2173259698Sdim// Override for ADDI and ADDI8 to set the correct register class
2174259698Sdim// on RHS operand 0.  The automatic infrastructure naively assumes
2175259698Sdim// GPRC for i32 and G8RC for i64; the concept of "no R0" is lost
2176259698Sdim// for these cases.  At the moment, none of the other automatically
2177259698Sdim// generated RI instructions require special treatment.  However, once
2178259698Sdim// SelectSelect is implemented, "isel" requires similar handling.
2179259698Sdim//
2180259698Sdim// Also be conservative about the output register class.  Avoid
2181259698Sdim// assigning R0 or X0 to the output register for GPRC and G8RC
2182259698Sdim// register classes, as any such result could be used in ADDI, etc.,
2183259698Sdim// where those regs have another meaning.
2184259698Sdimunsigned PPCFastISel::FastEmitInst_ri(unsigned MachineInstOpcode,
2185259698Sdim                                      const TargetRegisterClass *RC,
2186259698Sdim                                      unsigned Op0, bool Op0IsKill,
2187259698Sdim                                      uint64_t Imm) {
2188259698Sdim  if (MachineInstOpcode == PPC::ADDI)
2189259698Sdim    MRI.setRegClass(Op0, &PPC::GPRC_and_GPRC_NOR0RegClass);
2190259698Sdim  else if (MachineInstOpcode == PPC::ADDI8)
2191259698Sdim    MRI.setRegClass(Op0, &PPC::G8RC_and_G8RC_NOX0RegClass);
2192259698Sdim
2193259698Sdim  const TargetRegisterClass *UseRC =
2194259698Sdim    (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
2195259698Sdim     (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
2196259698Sdim
2197259698Sdim  return FastISel::FastEmitInst_ri(MachineInstOpcode, UseRC,
2198259698Sdim                                   Op0, Op0IsKill, Imm);
2199259698Sdim}
2200259698Sdim
2201259698Sdim// Override for instructions with one register operand to avoid use of
2202259698Sdim// R0/X0.  The automatic infrastructure isn't aware of the context so
2203259698Sdim// we must be conservative.
2204259698Sdimunsigned PPCFastISel::FastEmitInst_r(unsigned MachineInstOpcode,
2205259698Sdim                                     const TargetRegisterClass* RC,
2206259698Sdim                                     unsigned Op0, bool Op0IsKill) {
2207259698Sdim  const TargetRegisterClass *UseRC =
2208259698Sdim    (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
2209259698Sdim     (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
2210259698Sdim
2211259698Sdim  return FastISel::FastEmitInst_r(MachineInstOpcode, UseRC, Op0, Op0IsKill);
2212259698Sdim}
2213259698Sdim
2214259698Sdim// Override for instructions with two register operands to avoid use
2215259698Sdim// of R0/X0.  The automatic infrastructure isn't aware of the context
2216259698Sdim// so we must be conservative.
2217259698Sdimunsigned PPCFastISel::FastEmitInst_rr(unsigned MachineInstOpcode,
2218259698Sdim                                      const TargetRegisterClass* RC,
2219259698Sdim                                      unsigned Op0, bool Op0IsKill,
2220259698Sdim                                      unsigned Op1, bool Op1IsKill) {
2221259698Sdim  const TargetRegisterClass *UseRC =
2222259698Sdim    (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
2223259698Sdim     (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
2224259698Sdim
2225259698Sdim  return FastISel::FastEmitInst_rr(MachineInstOpcode, UseRC, Op0, Op0IsKill,
2226259698Sdim                                   Op1, Op1IsKill);
2227259698Sdim}
2228259698Sdim
2229259698Sdimnamespace llvm {
2230259698Sdim  // Create the fast instruction selector for PowerPC64 ELF.
2231259698Sdim  FastISel *PPC::createFastISel(FunctionLoweringInfo &FuncInfo,
2232259698Sdim                                const TargetLibraryInfo *LibInfo) {
2233259698Sdim    const TargetMachine &TM = FuncInfo.MF->getTarget();
2234259698Sdim
2235259698Sdim    // Only available on 64-bit ELF for now.
2236259698Sdim    const PPCSubtarget *Subtarget = &TM.getSubtarget<PPCSubtarget>();
2237259698Sdim    if (Subtarget->isPPC64() && Subtarget->isSVR4ABI())
2238259698Sdim      return new PPCFastISel(FuncInfo, LibInfo);
2239259698Sdim
2240259698Sdim    return 0;
2241259698Sdim  }
2242259698Sdim}
2243