1234353Sdim//===-- ARMBaseInstrInfo.cpp - ARM Instruction Information ----------------===// 2198090Srdivacky// 3198090Srdivacky// The LLVM Compiler Infrastructure 4198090Srdivacky// 5198090Srdivacky// This file is distributed under the University of Illinois Open Source 6198090Srdivacky// License. See LICENSE.TXT for details. 7198090Srdivacky// 8198090Srdivacky//===----------------------------------------------------------------------===// 9198090Srdivacky// 10198090Srdivacky// This file contains the Base ARM implementation of the TargetInstrInfo class. 11198090Srdivacky// 12198090Srdivacky//===----------------------------------------------------------------------===// 13198090Srdivacky 14198090Srdivacky#include "ARMBaseInstrInfo.h" 15198090Srdivacky#include "ARM.h" 16234353Sdim#include "ARMBaseRegisterInfo.h" 17199481Srdivacky#include "ARMConstantPoolValue.h" 18218893Sdim#include "ARMHazardRecognizer.h" 19198090Srdivacky#include "ARMMachineFunctionInfo.h" 20226633Sdim#include "MCTargetDesc/ARMAddressingModes.h" 21249423Sdim#include "llvm/ADT/STLExtras.h" 22198090Srdivacky#include "llvm/CodeGen/LiveVariables.h" 23199481Srdivacky#include "llvm/CodeGen/MachineConstantPool.h" 24198090Srdivacky#include "llvm/CodeGen/MachineFrameInfo.h" 25198090Srdivacky#include "llvm/CodeGen/MachineInstrBuilder.h" 26198090Srdivacky#include "llvm/CodeGen/MachineJumpTableInfo.h" 27198090Srdivacky#include "llvm/CodeGen/MachineMemOperand.h" 28208599Srdivacky#include "llvm/CodeGen/MachineRegisterInfo.h" 29226633Sdim#include "llvm/CodeGen/SelectionDAGNodes.h" 30249423Sdim#include "llvm/IR/Constants.h" 31249423Sdim#include "llvm/IR/Function.h" 32249423Sdim#include "llvm/IR/GlobalValue.h" 33198090Srdivacky#include "llvm/MC/MCAsmInfo.h" 34224145Sdim#include "llvm/Support/BranchProbability.h" 35198090Srdivacky#include "llvm/Support/CommandLine.h" 36198892Srdivacky#include "llvm/Support/Debug.h" 37198090Srdivacky#include "llvm/Support/ErrorHandling.h" 38224145Sdim 39224145Sdim#define GET_INSTRINFO_CTOR 40224145Sdim#include "ARMGenInstrInfo.inc" 41224145Sdim 42198090Srdivackyusing namespace llvm; 43198090Srdivacky 44198090Srdivackystatic cl::opt<bool> 45198090SrdivackyEnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden, 46198090Srdivacky cl::desc("Enable ARM 2-addr to 3-addr conv")); 47198090Srdivacky 48226633Sdimstatic cl::opt<bool> 49234353SdimWidenVMOVS("widen-vmovs", cl::Hidden, cl::init(true), 50226633Sdim cl::desc("Widen ARM vmovs to vmovd when possible")); 51226633Sdim 52243830Sdimstatic cl::opt<unsigned> 53243830SdimSwiftPartialUpdateClearance("swift-partial-update-clearance", 54243830Sdim cl::Hidden, cl::init(12), 55243830Sdim cl::desc("Clearance before partial register updates")); 56243830Sdim 57218893Sdim/// ARM_MLxEntry - Record information about MLA / MLS instructions. 58218893Sdimstruct ARM_MLxEntry { 59239462Sdim uint16_t MLxOpc; // MLA / MLS opcode 60239462Sdim uint16_t MulOpc; // Expanded multiplication opcode 61239462Sdim uint16_t AddSubOpc; // Expanded add / sub opcode 62218893Sdim bool NegAcc; // True if the acc is negated before the add / sub. 63218893Sdim bool HasLane; // True if instruction has an extra "lane" operand. 64218893Sdim}; 65218893Sdim 66218893Sdimstatic const ARM_MLxEntry ARM_MLxTable[] = { 67218893Sdim // MLxOpc, MulOpc, AddSubOpc, NegAcc, HasLane 68218893Sdim // fp scalar ops 69218893Sdim { ARM::VMLAS, ARM::VMULS, ARM::VADDS, false, false }, 70218893Sdim { ARM::VMLSS, ARM::VMULS, ARM::VSUBS, false, false }, 71218893Sdim { ARM::VMLAD, ARM::VMULD, ARM::VADDD, false, false }, 72218893Sdim { ARM::VMLSD, ARM::VMULD, ARM::VSUBD, false, false }, 73218893Sdim { ARM::VNMLAS, ARM::VNMULS, ARM::VSUBS, true, false }, 74218893Sdim { ARM::VNMLSS, ARM::VMULS, ARM::VSUBS, true, false }, 75218893Sdim { ARM::VNMLAD, ARM::VNMULD, ARM::VSUBD, true, false }, 76218893Sdim { ARM::VNMLSD, ARM::VMULD, ARM::VSUBD, true, false }, 77218893Sdim 78218893Sdim // fp SIMD ops 79218893Sdim { ARM::VMLAfd, ARM::VMULfd, ARM::VADDfd, false, false }, 80218893Sdim { ARM::VMLSfd, ARM::VMULfd, ARM::VSUBfd, false, false }, 81218893Sdim { ARM::VMLAfq, ARM::VMULfq, ARM::VADDfq, false, false }, 82218893Sdim { ARM::VMLSfq, ARM::VMULfq, ARM::VSUBfq, false, false }, 83218893Sdim { ARM::VMLAslfd, ARM::VMULslfd, ARM::VADDfd, false, true }, 84218893Sdim { ARM::VMLSslfd, ARM::VMULslfd, ARM::VSUBfd, false, true }, 85218893Sdim { ARM::VMLAslfq, ARM::VMULslfq, ARM::VADDfq, false, true }, 86218893Sdim { ARM::VMLSslfq, ARM::VMULslfq, ARM::VSUBfq, false, true }, 87218893Sdim}; 88218893Sdim 89198892SrdivackyARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget& STI) 90224145Sdim : ARMGenInstrInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP), 91198892Srdivacky Subtarget(STI) { 92218893Sdim for (unsigned i = 0, e = array_lengthof(ARM_MLxTable); i != e; ++i) { 93218893Sdim if (!MLxEntryMap.insert(std::make_pair(ARM_MLxTable[i].MLxOpc, i)).second) 94218893Sdim assert(false && "Duplicated entries?"); 95218893Sdim MLxHazardOpcodes.insert(ARM_MLxTable[i].AddSubOpc); 96218893Sdim MLxHazardOpcodes.insert(ARM_MLxTable[i].MulOpc); 97218893Sdim } 98198090Srdivacky} 99198090Srdivacky 100218893Sdim// Use a ScoreboardHazardRecognizer for prepass ARM scheduling. TargetInstrImpl 101218893Sdim// currently defaults to no prepass hazard recognizer. 102218893SdimScheduleHazardRecognizer *ARMBaseInstrInfo:: 103218893SdimCreateTargetHazardRecognizer(const TargetMachine *TM, 104218893Sdim const ScheduleDAG *DAG) const { 105218893Sdim if (usePreRAHazardRecognizer()) { 106218893Sdim const InstrItineraryData *II = TM->getInstrItineraryData(); 107218893Sdim return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched"); 108218893Sdim } 109249423Sdim return TargetInstrInfo::CreateTargetHazardRecognizer(TM, DAG); 110218893Sdim} 111218893Sdim 112218893SdimScheduleHazardRecognizer *ARMBaseInstrInfo:: 113218893SdimCreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, 114218893Sdim const ScheduleDAG *DAG) const { 115218893Sdim if (Subtarget.isThumb2() || Subtarget.hasVFP2()) 116218893Sdim return (ScheduleHazardRecognizer *) 117218893Sdim new ARMHazardRecognizer(II, *this, getRegisterInfo(), Subtarget, DAG); 118249423Sdim return TargetInstrInfo::CreateTargetPostRAHazardRecognizer(II, DAG); 119218893Sdim} 120218893Sdim 121198090SrdivackyMachineInstr * 122198090SrdivackyARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, 123198090Srdivacky MachineBasicBlock::iterator &MBBI, 124198090Srdivacky LiveVariables *LV) const { 125198090Srdivacky // FIXME: Thumb2 support. 126198090Srdivacky 127198090Srdivacky if (!EnableARM3Addr) 128198090Srdivacky return NULL; 129198090Srdivacky 130198090Srdivacky MachineInstr *MI = MBBI; 131198090Srdivacky MachineFunction &MF = *MI->getParent()->getParent(); 132210299Sed uint64_t TSFlags = MI->getDesc().TSFlags; 133198090Srdivacky bool isPre = false; 134198090Srdivacky switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) { 135198090Srdivacky default: return NULL; 136198090Srdivacky case ARMII::IndexModePre: 137198090Srdivacky isPre = true; 138198090Srdivacky break; 139198090Srdivacky case ARMII::IndexModePost: 140198090Srdivacky break; 141198090Srdivacky } 142198090Srdivacky 143198090Srdivacky // Try splitting an indexed load/store to an un-indexed one plus an add/sub 144198090Srdivacky // operation. 145198090Srdivacky unsigned MemOpc = getUnindexedOpcode(MI->getOpcode()); 146198090Srdivacky if (MemOpc == 0) 147198090Srdivacky return NULL; 148198090Srdivacky 149198090Srdivacky MachineInstr *UpdateMI = NULL; 150198090Srdivacky MachineInstr *MemMI = NULL; 151198090Srdivacky unsigned AddrMode = (TSFlags & ARMII::AddrModeMask); 152224145Sdim const MCInstrDesc &MCID = MI->getDesc(); 153224145Sdim unsigned NumOps = MCID.getNumOperands(); 154234353Sdim bool isLoad = !MI->mayStore(); 155198090Srdivacky const MachineOperand &WB = isLoad ? MI->getOperand(1) : MI->getOperand(0); 156198090Srdivacky const MachineOperand &Base = MI->getOperand(2); 157198090Srdivacky const MachineOperand &Offset = MI->getOperand(NumOps-3); 158198090Srdivacky unsigned WBReg = WB.getReg(); 159198090Srdivacky unsigned BaseReg = Base.getReg(); 160198090Srdivacky unsigned OffReg = Offset.getReg(); 161198090Srdivacky unsigned OffImm = MI->getOperand(NumOps-2).getImm(); 162198090Srdivacky ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI->getOperand(NumOps-1).getImm(); 163198090Srdivacky switch (AddrMode) { 164234353Sdim default: llvm_unreachable("Unknown indexed op!"); 165198090Srdivacky case ARMII::AddrMode2: { 166198090Srdivacky bool isSub = ARM_AM::getAM2Op(OffImm) == ARM_AM::sub; 167198090Srdivacky unsigned Amt = ARM_AM::getAM2Offset(OffImm); 168198090Srdivacky if (OffReg == 0) { 169198090Srdivacky if (ARM_AM::getSOImmVal(Amt) == -1) 170198090Srdivacky // Can't encode it in a so_imm operand. This transformation will 171198090Srdivacky // add more than 1 instruction. Abandon! 172198090Srdivacky return NULL; 173198090Srdivacky UpdateMI = BuildMI(MF, MI->getDebugLoc(), 174198090Srdivacky get(isSub ? ARM::SUBri : ARM::ADDri), WBReg) 175198090Srdivacky .addReg(BaseReg).addImm(Amt) 176198090Srdivacky .addImm(Pred).addReg(0).addReg(0); 177198090Srdivacky } else if (Amt != 0) { 178198090Srdivacky ARM_AM::ShiftOpc ShOpc = ARM_AM::getAM2ShiftOpc(OffImm); 179198090Srdivacky unsigned SOOpc = ARM_AM::getSORegOpc(ShOpc, Amt); 180198090Srdivacky UpdateMI = BuildMI(MF, MI->getDebugLoc(), 181226633Sdim get(isSub ? ARM::SUBrsi : ARM::ADDrsi), WBReg) 182198090Srdivacky .addReg(BaseReg).addReg(OffReg).addReg(0).addImm(SOOpc) 183198090Srdivacky .addImm(Pred).addReg(0).addReg(0); 184198090Srdivacky } else 185198090Srdivacky UpdateMI = BuildMI(MF, MI->getDebugLoc(), 186198090Srdivacky get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg) 187198090Srdivacky .addReg(BaseReg).addReg(OffReg) 188198090Srdivacky .addImm(Pred).addReg(0).addReg(0); 189198090Srdivacky break; 190198090Srdivacky } 191198090Srdivacky case ARMII::AddrMode3 : { 192198090Srdivacky bool isSub = ARM_AM::getAM3Op(OffImm) == ARM_AM::sub; 193198090Srdivacky unsigned Amt = ARM_AM::getAM3Offset(OffImm); 194198090Srdivacky if (OffReg == 0) 195198090Srdivacky // Immediate is 8-bits. It's guaranteed to fit in a so_imm operand. 196198090Srdivacky UpdateMI = BuildMI(MF, MI->getDebugLoc(), 197198090Srdivacky get(isSub ? ARM::SUBri : ARM::ADDri), WBReg) 198198090Srdivacky .addReg(BaseReg).addImm(Amt) 199198090Srdivacky .addImm(Pred).addReg(0).addReg(0); 200198090Srdivacky else 201198090Srdivacky UpdateMI = BuildMI(MF, MI->getDebugLoc(), 202198090Srdivacky get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg) 203198090Srdivacky .addReg(BaseReg).addReg(OffReg) 204198090Srdivacky .addImm(Pred).addReg(0).addReg(0); 205198090Srdivacky break; 206198090Srdivacky } 207198090Srdivacky } 208198090Srdivacky 209198090Srdivacky std::vector<MachineInstr*> NewMIs; 210198090Srdivacky if (isPre) { 211198090Srdivacky if (isLoad) 212198090Srdivacky MemMI = BuildMI(MF, MI->getDebugLoc(), 213198090Srdivacky get(MemOpc), MI->getOperand(0).getReg()) 214218893Sdim .addReg(WBReg).addImm(0).addImm(Pred); 215198090Srdivacky else 216198090Srdivacky MemMI = BuildMI(MF, MI->getDebugLoc(), 217198090Srdivacky get(MemOpc)).addReg(MI->getOperand(1).getReg()) 218198090Srdivacky .addReg(WBReg).addReg(0).addImm(0).addImm(Pred); 219198090Srdivacky NewMIs.push_back(MemMI); 220198090Srdivacky NewMIs.push_back(UpdateMI); 221198090Srdivacky } else { 222198090Srdivacky if (isLoad) 223198090Srdivacky MemMI = BuildMI(MF, MI->getDebugLoc(), 224198090Srdivacky get(MemOpc), MI->getOperand(0).getReg()) 225218893Sdim .addReg(BaseReg).addImm(0).addImm(Pred); 226198090Srdivacky else 227198090Srdivacky MemMI = BuildMI(MF, MI->getDebugLoc(), 228198090Srdivacky get(MemOpc)).addReg(MI->getOperand(1).getReg()) 229198090Srdivacky .addReg(BaseReg).addReg(0).addImm(0).addImm(Pred); 230198090Srdivacky if (WB.isDead()) 231198090Srdivacky UpdateMI->getOperand(0).setIsDead(); 232198090Srdivacky NewMIs.push_back(UpdateMI); 233198090Srdivacky NewMIs.push_back(MemMI); 234198090Srdivacky } 235198090Srdivacky 236198090Srdivacky // Transfer LiveVariables states, kill / dead info. 237198090Srdivacky if (LV) { 238198090Srdivacky for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 239198090Srdivacky MachineOperand &MO = MI->getOperand(i); 240218893Sdim if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) { 241198090Srdivacky unsigned Reg = MO.getReg(); 242198090Srdivacky 243198090Srdivacky LiveVariables::VarInfo &VI = LV->getVarInfo(Reg); 244198090Srdivacky if (MO.isDef()) { 245198090Srdivacky MachineInstr *NewMI = (Reg == WBReg) ? UpdateMI : MemMI; 246198090Srdivacky if (MO.isDead()) 247198090Srdivacky LV->addVirtualRegisterDead(Reg, NewMI); 248198090Srdivacky } 249198090Srdivacky if (MO.isUse() && MO.isKill()) { 250198090Srdivacky for (unsigned j = 0; j < 2; ++j) { 251198090Srdivacky // Look at the two new MI's in reverse order. 252198090Srdivacky MachineInstr *NewMI = NewMIs[j]; 253198090Srdivacky if (!NewMI->readsRegister(Reg)) 254198090Srdivacky continue; 255198090Srdivacky LV->addVirtualRegisterKilled(Reg, NewMI); 256198090Srdivacky if (VI.removeKill(MI)) 257198090Srdivacky VI.Kills.push_back(NewMI); 258198090Srdivacky break; 259198090Srdivacky } 260198090Srdivacky } 261198090Srdivacky } 262198090Srdivacky } 263198090Srdivacky } 264198090Srdivacky 265198090Srdivacky MFI->insert(MBBI, NewMIs[1]); 266198090Srdivacky MFI->insert(MBBI, NewMIs[0]); 267198090Srdivacky return NewMIs[0]; 268198090Srdivacky} 269198090Srdivacky 270198090Srdivacky// Branch analysis. 271198090Srdivackybool 272198090SrdivackyARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, 273198090Srdivacky MachineBasicBlock *&FBB, 274198090Srdivacky SmallVectorImpl<MachineOperand> &Cond, 275198090Srdivacky bool AllowModify) const { 276198090Srdivacky // If the block has no terminators, it just falls into the block after it. 277198090Srdivacky MachineBasicBlock::iterator I = MBB.end(); 278206083Srdivacky if (I == MBB.begin()) 279198090Srdivacky return false; 280206083Srdivacky --I; 281206083Srdivacky while (I->isDebugValue()) { 282206083Srdivacky if (I == MBB.begin()) 283206083Srdivacky return false; 284206083Srdivacky --I; 285206083Srdivacky } 286198090Srdivacky 287198090Srdivacky // Get the last instruction in the block. 288198090Srdivacky MachineInstr *LastInst = I; 289251662Sdim unsigned LastOpc = LastInst->getOpcode(); 290198090Srdivacky 291251662Sdim // Check if it's an indirect branch first, this should return 'unanalyzable' 292251662Sdim // even if it's predicated. 293251662Sdim if (isIndirectBranchOpcode(LastOpc)) 294251662Sdim return true; 295251662Sdim 296251662Sdim if (!isUnpredicatedTerminator(I)) 297251662Sdim return false; 298251662Sdim 299198090Srdivacky // If there is only one terminator instruction, process it. 300198090Srdivacky if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { 301198090Srdivacky if (isUncondBranchOpcode(LastOpc)) { 302198090Srdivacky TBB = LastInst->getOperand(0).getMBB(); 303198090Srdivacky return false; 304198090Srdivacky } 305198090Srdivacky if (isCondBranchOpcode(LastOpc)) { 306198090Srdivacky // Block ends with fall-through condbranch. 307198090Srdivacky TBB = LastInst->getOperand(0).getMBB(); 308198090Srdivacky Cond.push_back(LastInst->getOperand(1)); 309198090Srdivacky Cond.push_back(LastInst->getOperand(2)); 310198090Srdivacky return false; 311198090Srdivacky } 312198090Srdivacky return true; // Can't handle indirect branch. 313198090Srdivacky } 314198090Srdivacky 315198090Srdivacky // Get the instruction before it if it is a terminator. 316198090Srdivacky MachineInstr *SecondLastInst = I; 317218893Sdim unsigned SecondLastOpc = SecondLastInst->getOpcode(); 318198090Srdivacky 319218893Sdim // If AllowModify is true and the block ends with two or more unconditional 320218893Sdim // branches, delete all but the first unconditional branch. 321218893Sdim if (AllowModify && isUncondBranchOpcode(LastOpc)) { 322218893Sdim while (isUncondBranchOpcode(SecondLastOpc)) { 323218893Sdim LastInst->eraseFromParent(); 324218893Sdim LastInst = SecondLastInst; 325218893Sdim LastOpc = LastInst->getOpcode(); 326218893Sdim if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { 327218893Sdim // Return now the only terminator is an unconditional branch. 328218893Sdim TBB = LastInst->getOperand(0).getMBB(); 329218893Sdim return false; 330218893Sdim } else { 331218893Sdim SecondLastInst = I; 332218893Sdim SecondLastOpc = SecondLastInst->getOpcode(); 333218893Sdim } 334218893Sdim } 335218893Sdim } 336218893Sdim 337198090Srdivacky // If there are three terminators, we don't know what sort of block this is. 338198090Srdivacky if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I)) 339198090Srdivacky return true; 340198090Srdivacky 341198090Srdivacky // If the block ends with a B and a Bcc, handle it. 342198090Srdivacky if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { 343198090Srdivacky TBB = SecondLastInst->getOperand(0).getMBB(); 344198090Srdivacky Cond.push_back(SecondLastInst->getOperand(1)); 345198090Srdivacky Cond.push_back(SecondLastInst->getOperand(2)); 346198090Srdivacky FBB = LastInst->getOperand(0).getMBB(); 347198090Srdivacky return false; 348198090Srdivacky } 349198090Srdivacky 350198090Srdivacky // If the block ends with two unconditional branches, handle it. The second 351198090Srdivacky // one is not executed, so remove it. 352198090Srdivacky if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { 353198090Srdivacky TBB = SecondLastInst->getOperand(0).getMBB(); 354198090Srdivacky I = LastInst; 355198090Srdivacky if (AllowModify) 356198090Srdivacky I->eraseFromParent(); 357198090Srdivacky return false; 358198090Srdivacky } 359198090Srdivacky 360198090Srdivacky // ...likewise if it ends with a branch table followed by an unconditional 361198090Srdivacky // branch. The branch folder can create these, and we must get rid of them for 362198090Srdivacky // correctness of Thumb constant islands. 363198892Srdivacky if ((isJumpTableBranchOpcode(SecondLastOpc) || 364198892Srdivacky isIndirectBranchOpcode(SecondLastOpc)) && 365198090Srdivacky isUncondBranchOpcode(LastOpc)) { 366198090Srdivacky I = LastInst; 367198090Srdivacky if (AllowModify) 368198090Srdivacky I->eraseFromParent(); 369198090Srdivacky return true; 370198090Srdivacky } 371198090Srdivacky 372198090Srdivacky // Otherwise, can't handle this. 373198090Srdivacky return true; 374198090Srdivacky} 375198090Srdivacky 376198090Srdivacky 377198090Srdivackyunsigned ARMBaseInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { 378198090Srdivacky MachineBasicBlock::iterator I = MBB.end(); 379198090Srdivacky if (I == MBB.begin()) return 0; 380198090Srdivacky --I; 381206083Srdivacky while (I->isDebugValue()) { 382206083Srdivacky if (I == MBB.begin()) 383206083Srdivacky return 0; 384206083Srdivacky --I; 385206083Srdivacky } 386198090Srdivacky if (!isUncondBranchOpcode(I->getOpcode()) && 387198090Srdivacky !isCondBranchOpcode(I->getOpcode())) 388198090Srdivacky return 0; 389198090Srdivacky 390198090Srdivacky // Remove the branch. 391198090Srdivacky I->eraseFromParent(); 392198090Srdivacky 393198090Srdivacky I = MBB.end(); 394198090Srdivacky 395198090Srdivacky if (I == MBB.begin()) return 1; 396198090Srdivacky --I; 397198090Srdivacky if (!isCondBranchOpcode(I->getOpcode())) 398198090Srdivacky return 1; 399198090Srdivacky 400198090Srdivacky // Remove the branch. 401198090Srdivacky I->eraseFromParent(); 402198090Srdivacky return 2; 403198090Srdivacky} 404198090Srdivacky 405198090Srdivackyunsigned 406198090SrdivackyARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, 407198090Srdivacky MachineBasicBlock *FBB, 408210299Sed const SmallVectorImpl<MachineOperand> &Cond, 409210299Sed DebugLoc DL) const { 410198090Srdivacky ARMFunctionInfo *AFI = MBB.getParent()->getInfo<ARMFunctionInfo>(); 411198090Srdivacky int BOpc = !AFI->isThumbFunction() 412198090Srdivacky ? ARM::B : (AFI->isThumb2Function() ? ARM::t2B : ARM::tB); 413198090Srdivacky int BccOpc = !AFI->isThumbFunction() 414198090Srdivacky ? ARM::Bcc : (AFI->isThumb2Function() ? ARM::t2Bcc : ARM::tBcc); 415226633Sdim bool isThumb = AFI->isThumbFunction() || AFI->isThumb2Function(); 416198090Srdivacky 417198090Srdivacky // Shouldn't be a fall through. 418198090Srdivacky assert(TBB && "InsertBranch must not be told to insert a fallthrough"); 419198090Srdivacky assert((Cond.size() == 2 || Cond.size() == 0) && 420198090Srdivacky "ARM branch conditions have two components!"); 421198090Srdivacky 422198090Srdivacky if (FBB == 0) { 423226633Sdim if (Cond.empty()) { // Unconditional branch? 424226633Sdim if (isThumb) 425226633Sdim BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB).addImm(ARMCC::AL).addReg(0); 426226633Sdim else 427226633Sdim BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB); 428226633Sdim } else 429210299Sed BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB) 430198090Srdivacky .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()); 431198090Srdivacky return 1; 432198090Srdivacky } 433198090Srdivacky 434198090Srdivacky // Two-way conditional branch. 435210299Sed BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB) 436198090Srdivacky .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()); 437226633Sdim if (isThumb) 438226633Sdim BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB).addImm(ARMCC::AL).addReg(0); 439226633Sdim else 440226633Sdim BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB); 441198090Srdivacky return 2; 442198090Srdivacky} 443198090Srdivacky 444198090Srdivackybool ARMBaseInstrInfo:: 445198090SrdivackyReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { 446198090Srdivacky ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm(); 447198090Srdivacky Cond[0].setImm(ARMCC::getOppositeCondition(CC)); 448198090Srdivacky return false; 449198090Srdivacky} 450198090Srdivacky 451234353Sdimbool ARMBaseInstrInfo::isPredicated(const MachineInstr *MI) const { 452234353Sdim if (MI->isBundle()) { 453234353Sdim MachineBasicBlock::const_instr_iterator I = MI; 454234353Sdim MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end(); 455234353Sdim while (++I != E && I->isInsideBundle()) { 456234353Sdim int PIdx = I->findFirstPredOperandIdx(); 457234353Sdim if (PIdx != -1 && I->getOperand(PIdx).getImm() != ARMCC::AL) 458234353Sdim return true; 459234353Sdim } 460234353Sdim return false; 461234353Sdim } 462234353Sdim 463234353Sdim int PIdx = MI->findFirstPredOperandIdx(); 464234353Sdim return PIdx != -1 && MI->getOperand(PIdx).getImm() != ARMCC::AL; 465234353Sdim} 466234353Sdim 467198090Srdivackybool ARMBaseInstrInfo:: 468198090SrdivackyPredicateInstruction(MachineInstr *MI, 469198090Srdivacky const SmallVectorImpl<MachineOperand> &Pred) const { 470198090Srdivacky unsigned Opc = MI->getOpcode(); 471198090Srdivacky if (isUncondBranchOpcode(Opc)) { 472198090Srdivacky MI->setDesc(get(getMatchingCondBranchOpcode(Opc))); 473249423Sdim MachineInstrBuilder(*MI->getParent()->getParent(), MI) 474249423Sdim .addImm(Pred[0].getImm()) 475249423Sdim .addReg(Pred[1].getReg()); 476198090Srdivacky return true; 477198090Srdivacky } 478198090Srdivacky 479198090Srdivacky int PIdx = MI->findFirstPredOperandIdx(); 480198090Srdivacky if (PIdx != -1) { 481198090Srdivacky MachineOperand &PMO = MI->getOperand(PIdx); 482198090Srdivacky PMO.setImm(Pred[0].getImm()); 483198090Srdivacky MI->getOperand(PIdx+1).setReg(Pred[1].getReg()); 484198090Srdivacky return true; 485198090Srdivacky } 486198090Srdivacky return false; 487198090Srdivacky} 488198090Srdivacky 489198090Srdivackybool ARMBaseInstrInfo:: 490198090SrdivackySubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1, 491198090Srdivacky const SmallVectorImpl<MachineOperand> &Pred2) const { 492198090Srdivacky if (Pred1.size() > 2 || Pred2.size() > 2) 493198090Srdivacky return false; 494198090Srdivacky 495198090Srdivacky ARMCC::CondCodes CC1 = (ARMCC::CondCodes)Pred1[0].getImm(); 496198090Srdivacky ARMCC::CondCodes CC2 = (ARMCC::CondCodes)Pred2[0].getImm(); 497198090Srdivacky if (CC1 == CC2) 498198090Srdivacky return true; 499198090Srdivacky 500198090Srdivacky switch (CC1) { 501198090Srdivacky default: 502198090Srdivacky return false; 503198090Srdivacky case ARMCC::AL: 504198090Srdivacky return true; 505198090Srdivacky case ARMCC::HS: 506198090Srdivacky return CC2 == ARMCC::HI; 507198090Srdivacky case ARMCC::LS: 508198090Srdivacky return CC2 == ARMCC::LO || CC2 == ARMCC::EQ; 509198090Srdivacky case ARMCC::GE: 510198090Srdivacky return CC2 == ARMCC::GT; 511198090Srdivacky case ARMCC::LE: 512198090Srdivacky return CC2 == ARMCC::LT; 513198090Srdivacky } 514198090Srdivacky} 515198090Srdivacky 516198090Srdivackybool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI, 517198090Srdivacky std::vector<MachineOperand> &Pred) const { 518198090Srdivacky bool Found = false; 519198090Srdivacky for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 520198090Srdivacky const MachineOperand &MO = MI->getOperand(i); 521234353Sdim if ((MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) || 522234353Sdim (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR)) { 523198090Srdivacky Pred.push_back(MO); 524198090Srdivacky Found = true; 525198090Srdivacky } 526198090Srdivacky } 527198090Srdivacky 528198090Srdivacky return Found; 529198090Srdivacky} 530198090Srdivacky 531199989Srdivacky/// isPredicable - Return true if the specified instruction can be predicated. 532199989Srdivacky/// By default, this returns true for every instruction with a 533199989Srdivacky/// PredicateOperand. 534199989Srdivackybool ARMBaseInstrInfo::isPredicable(MachineInstr *MI) const { 535234353Sdim if (!MI->isPredicable()) 536199989Srdivacky return false; 537198090Srdivacky 538234353Sdim if ((MI->getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) { 539199989Srdivacky ARMFunctionInfo *AFI = 540199989Srdivacky MI->getParent()->getParent()->getInfo<ARMFunctionInfo>(); 541199989Srdivacky return AFI->isThumb2Function(); 542199989Srdivacky } 543199989Srdivacky return true; 544199989Srdivacky} 545199989Srdivacky 546200581Srdivacky/// FIXME: Works around a gcc miscompilation with -fstrict-aliasing. 547218893SdimLLVM_ATTRIBUTE_NOINLINE 548198090Srdivackystatic unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT, 549200581Srdivacky unsigned JTI); 550198090Srdivackystatic unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT, 551198090Srdivacky unsigned JTI) { 552200581Srdivacky assert(JTI < JT.size()); 553198090Srdivacky return JT[JTI].MBBs.size(); 554198090Srdivacky} 555198090Srdivacky 556198090Srdivacky/// GetInstSize - Return the size of the specified MachineInstr. 557198090Srdivacky/// 558198090Srdivackyunsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { 559198090Srdivacky const MachineBasicBlock &MBB = *MI->getParent(); 560198090Srdivacky const MachineFunction *MF = MBB.getParent(); 561198090Srdivacky const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo(); 562198090Srdivacky 563224145Sdim const MCInstrDesc &MCID = MI->getDesc(); 564224145Sdim if (MCID.getSize()) 565224145Sdim return MCID.getSize(); 566198090Srdivacky 567234353Sdim // If this machine instr is an inline asm, measure it. 568234353Sdim if (MI->getOpcode() == ARM::INLINEASM) 569234353Sdim return getInlineAsmLength(MI->getOperand(0).getSymbolName(), *MAI); 570234353Sdim if (MI->isLabel()) 571234353Sdim return 0; 572224145Sdim unsigned Opc = MI->getOpcode(); 573234353Sdim switch (Opc) { 574234353Sdim case TargetOpcode::IMPLICIT_DEF: 575234353Sdim case TargetOpcode::KILL: 576234353Sdim case TargetOpcode::PROLOG_LABEL: 577234353Sdim case TargetOpcode::EH_LABEL: 578234353Sdim case TargetOpcode::DBG_VALUE: 579234353Sdim return 0; 580234353Sdim case TargetOpcode::BUNDLE: 581234353Sdim return getInstBundleLength(MI); 582234353Sdim case ARM::MOVi16_ga_pcrel: 583234353Sdim case ARM::MOVTi16_ga_pcrel: 584234353Sdim case ARM::t2MOVi16_ga_pcrel: 585234353Sdim case ARM::t2MOVTi16_ga_pcrel: 586234353Sdim return 4; 587234353Sdim case ARM::MOVi32imm: 588234353Sdim case ARM::t2MOVi32imm: 589234353Sdim return 8; 590234353Sdim case ARM::CONSTPOOL_ENTRY: 591234353Sdim // If this machine instr is a constant pool entry, its size is recorded as 592234353Sdim // operand #2. 593234353Sdim return MI->getOperand(2).getImm(); 594234353Sdim case ARM::Int_eh_sjlj_longjmp: 595234353Sdim return 16; 596234353Sdim case ARM::tInt_eh_sjlj_longjmp: 597234353Sdim return 10; 598234353Sdim case ARM::Int_eh_sjlj_setjmp: 599234353Sdim case ARM::Int_eh_sjlj_setjmp_nofp: 600234353Sdim return 20; 601234353Sdim case ARM::tInt_eh_sjlj_setjmp: 602234353Sdim case ARM::t2Int_eh_sjlj_setjmp: 603234353Sdim case ARM::t2Int_eh_sjlj_setjmp_nofp: 604234353Sdim return 12; 605234353Sdim case ARM::BR_JTr: 606234353Sdim case ARM::BR_JTm: 607234353Sdim case ARM::BR_JTadd: 608234353Sdim case ARM::tBR_JTr: 609234353Sdim case ARM::t2BR_JT: 610234353Sdim case ARM::t2TBB_JT: 611234353Sdim case ARM::t2TBH_JT: { 612234353Sdim // These are jumptable branches, i.e. a branch followed by an inlined 613234353Sdim // jumptable. The size is 4 + 4 * number of entries. For TBB, each 614234353Sdim // entry is one byte; TBH two byte each. 615234353Sdim unsigned EntrySize = (Opc == ARM::t2TBB_JT) 616234353Sdim ? 1 : ((Opc == ARM::t2TBH_JT) ? 2 : 4); 617234353Sdim unsigned NumOps = MCID.getNumOperands(); 618234353Sdim MachineOperand JTOP = 619234353Sdim MI->getOperand(NumOps - (MI->isPredicable() ? 3 : 2)); 620234353Sdim unsigned JTI = JTOP.getIndex(); 621234353Sdim const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); 622234353Sdim assert(MJTI != 0); 623234353Sdim const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); 624234353Sdim assert(JTI < JT.size()); 625234353Sdim // Thumb instructions are 2 byte aligned, but JT entries are 4 byte 626234353Sdim // 4 aligned. The assembler / linker may add 2 byte padding just before 627234353Sdim // the JT entries. The size does not include this padding; the 628234353Sdim // constant islands pass does separate bookkeeping for it. 629234353Sdim // FIXME: If we know the size of the function is less than (1 << 16) *2 630234353Sdim // bytes, we can use 16-bit entries instead. Then there won't be an 631234353Sdim // alignment issue. 632234353Sdim unsigned InstSize = (Opc == ARM::tBR_JTr || Opc == ARM::t2BR_JT) ? 2 : 4; 633234353Sdim unsigned NumEntries = getNumJTEntries(JT, JTI); 634234353Sdim if (Opc == ARM::t2TBB_JT && (NumEntries & 1)) 635234353Sdim // Make sure the instruction that follows TBB is 2-byte aligned. 636234353Sdim // FIXME: Constant island pass should insert an "ALIGN" instruction 637234353Sdim // instead. 638234353Sdim ++NumEntries; 639234353Sdim return NumEntries * EntrySize + InstSize; 640234353Sdim } 641234353Sdim default: 642234353Sdim // Otherwise, pseudo-instruction sizes are zero. 643234353Sdim return 0; 644234353Sdim } 645198090Srdivacky} 646198090Srdivacky 647234353Sdimunsigned ARMBaseInstrInfo::getInstBundleLength(const MachineInstr *MI) const { 648234353Sdim unsigned Size = 0; 649234353Sdim MachineBasicBlock::const_instr_iterator I = MI; 650234353Sdim MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end(); 651234353Sdim while (++I != E && I->isInsideBundle()) { 652234353Sdim assert(!I->isBundle() && "No nested bundle!"); 653234353Sdim Size += GetInstSizeInBytes(&*I); 654234353Sdim } 655234353Sdim return Size; 656234353Sdim} 657234353Sdim 658210299Sedvoid ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, 659210299Sed MachineBasicBlock::iterator I, DebugLoc DL, 660210299Sed unsigned DestReg, unsigned SrcReg, 661210299Sed bool KillSrc) const { 662210299Sed bool GPRDest = ARM::GPRRegClass.contains(DestReg); 663210299Sed bool GPRSrc = ARM::GPRRegClass.contains(SrcReg); 664204642Srdivacky 665210299Sed if (GPRDest && GPRSrc) { 666210299Sed AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg) 667210299Sed .addReg(SrcReg, getKillRegState(KillSrc)))); 668210299Sed return; 669210299Sed } 670198892Srdivacky 671210299Sed bool SPRDest = ARM::SPRRegClass.contains(DestReg); 672210299Sed bool SPRSrc = ARM::SPRRegClass.contains(SrcReg); 673205407Srdivacky 674226633Sdim unsigned Opc = 0; 675210299Sed if (SPRDest && SPRSrc) 676210299Sed Opc = ARM::VMOVS; 677210299Sed else if (GPRDest && SPRSrc) 678210299Sed Opc = ARM::VMOVRS; 679210299Sed else if (SPRDest && GPRSrc) 680210299Sed Opc = ARM::VMOVSR; 681210299Sed else if (ARM::DPRRegClass.contains(DestReg, SrcReg)) 682210299Sed Opc = ARM::VMOVD; 683210299Sed else if (ARM::QPRRegClass.contains(DestReg, SrcReg)) 684224145Sdim Opc = ARM::VORRq; 685208599Srdivacky 686226633Sdim if (Opc) { 687226633Sdim MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc), DestReg); 688224145Sdim MIB.addReg(SrcReg, getKillRegState(KillSrc)); 689226633Sdim if (Opc == ARM::VORRq) 690226633Sdim MIB.addReg(SrcReg, getKillRegState(KillSrc)); 691210299Sed AddDefaultPred(MIB); 692226633Sdim return; 693226633Sdim } 694226633Sdim 695234353Sdim // Handle register classes that require multiple instructions. 696234353Sdim unsigned BeginIdx = 0; 697234353Sdim unsigned SubRegs = 0; 698243830Sdim int Spacing = 1; 699234353Sdim 700234353Sdim // Use VORRq when possible. 701234353Sdim if (ARM::QQPRRegClass.contains(DestReg, SrcReg)) 702234353Sdim Opc = ARM::VORRq, BeginIdx = ARM::qsub_0, SubRegs = 2; 703234353Sdim else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) 704234353Sdim Opc = ARM::VORRq, BeginIdx = ARM::qsub_0, SubRegs = 4; 705234353Sdim // Fall back to VMOVD. 706234353Sdim else if (ARM::DPairRegClass.contains(DestReg, SrcReg)) 707234353Sdim Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 2; 708234353Sdim else if (ARM::DTripleRegClass.contains(DestReg, SrcReg)) 709234353Sdim Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 3; 710234353Sdim else if (ARM::DQuadRegClass.contains(DestReg, SrcReg)) 711234353Sdim Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 4; 712243830Sdim else if (ARM::GPRPairRegClass.contains(DestReg, SrcReg)) 713243830Sdim Opc = ARM::MOVr, BeginIdx = ARM::gsub_0, SubRegs = 2; 714234353Sdim 715234353Sdim else if (ARM::DPairSpcRegClass.contains(DestReg, SrcReg)) 716234353Sdim Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 2, Spacing = 2; 717234353Sdim else if (ARM::DTripleSpcRegClass.contains(DestReg, SrcReg)) 718234353Sdim Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 3, Spacing = 2; 719234353Sdim else if (ARM::DQuadSpcRegClass.contains(DestReg, SrcReg)) 720234353Sdim Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 4, Spacing = 2; 721234353Sdim 722243830Sdim assert(Opc && "Impossible reg-to-reg copy"); 723243830Sdim 724243830Sdim const TargetRegisterInfo *TRI = &getRegisterInfo(); 725243830Sdim MachineInstrBuilder Mov; 726243830Sdim 727243830Sdim // Copy register tuples backward when the first Dest reg overlaps with SrcReg. 728243830Sdim if (TRI->regsOverlap(SrcReg, TRI->getSubReg(DestReg, BeginIdx))) { 729243830Sdim BeginIdx = BeginIdx + ((SubRegs-1)*Spacing); 730243830Sdim Spacing = -Spacing; 731226633Sdim } 732243830Sdim#ifndef NDEBUG 733243830Sdim SmallSet<unsigned, 4> DstRegs; 734243830Sdim#endif 735243830Sdim for (unsigned i = 0; i != SubRegs; ++i) { 736243830Sdim unsigned Dst = TRI->getSubReg(DestReg, BeginIdx + i*Spacing); 737243830Sdim unsigned Src = TRI->getSubReg(SrcReg, BeginIdx + i*Spacing); 738243830Sdim assert(Dst && Src && "Bad sub-register"); 739243830Sdim#ifndef NDEBUG 740243830Sdim assert(!DstRegs.count(Src) && "destructive vector copy"); 741243830Sdim DstRegs.insert(Dst); 742243830Sdim#endif 743243830Sdim Mov = BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst) 744243830Sdim .addReg(Src); 745243830Sdim // VORR takes two source operands. 746243830Sdim if (Opc == ARM::VORRq) 747243830Sdim Mov.addReg(Src); 748243830Sdim Mov = AddDefaultPred(Mov); 749243830Sdim } 750243830Sdim // Add implicit super-register defs and kills to the last instruction. 751243830Sdim Mov->addRegisterDefined(DestReg, TRI); 752243830Sdim if (KillSrc) 753243830Sdim Mov->addRegisterKilled(SrcReg, TRI); 754198090Srdivacky} 755198090Srdivacky 756251662Sdimconst MachineInstrBuilder & 757251662SdimARMBaseInstrInfo::AddDReg(MachineInstrBuilder &MIB, unsigned Reg, 758251662Sdim unsigned SubIdx, unsigned State, 759251662Sdim const TargetRegisterInfo *TRI) const { 760208599Srdivacky if (!SubIdx) 761208599Srdivacky return MIB.addReg(Reg, State); 762208599Srdivacky 763208599Srdivacky if (TargetRegisterInfo::isPhysicalRegister(Reg)) 764208599Srdivacky return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State); 765208599Srdivacky return MIB.addReg(Reg, State, SubIdx); 766208599Srdivacky} 767208599Srdivacky 768198090Srdivackyvoid ARMBaseInstrInfo:: 769198090SrdivackystoreRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, 770198090Srdivacky unsigned SrcReg, bool isKill, int FI, 771208599Srdivacky const TargetRegisterClass *RC, 772208599Srdivacky const TargetRegisterInfo *TRI) const { 773206124Srdivacky DebugLoc DL; 774198090Srdivacky if (I != MBB.end()) DL = I->getDebugLoc(); 775198090Srdivacky MachineFunction &MF = *MBB.getParent(); 776198090Srdivacky MachineFrameInfo &MFI = *MF.getFrameInfo(); 777199481Srdivacky unsigned Align = MFI.getObjectAlignment(FI); 778198090Srdivacky 779198090Srdivacky MachineMemOperand *MMO = 780234353Sdim MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), 781218893Sdim MachineMemOperand::MOStore, 782198090Srdivacky MFI.getObjectSize(FI), 783199481Srdivacky Align); 784198090Srdivacky 785226633Sdim switch (RC->getSize()) { 786226633Sdim case 4: 787226633Sdim if (ARM::GPRRegClass.hasSubClassEq(RC)) { 788226633Sdim AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STRi12)) 789198090Srdivacky .addReg(SrcReg, getKillRegState(isKill)) 790218893Sdim .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); 791226633Sdim } else if (ARM::SPRRegClass.hasSubClassEq(RC)) { 792226633Sdim AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRS)) 793208599Srdivacky .addReg(SrcReg, getKillRegState(isKill)) 794208599Srdivacky .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); 795226633Sdim } else 796226633Sdim llvm_unreachable("Unknown reg class!"); 797226633Sdim break; 798226633Sdim case 8: 799226633Sdim if (ARM::DPRRegClass.hasSubClassEq(RC)) { 800226633Sdim AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRD)) 801198090Srdivacky .addReg(SrcReg, getKillRegState(isKill)) 802198090Srdivacky .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); 803243830Sdim } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) { 804251662Sdim if (Subtarget.hasV5TEOps()) { 805251662Sdim MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::STRD)); 806251662Sdim AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI); 807251662Sdim AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI); 808251662Sdim MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO); 809251662Sdim 810251662Sdim AddDefaultPred(MIB); 811251662Sdim } else { 812251662Sdim // Fallback to STM instruction, which has existed since the dawn of 813251662Sdim // time. 814251662Sdim MachineInstrBuilder MIB = 815251662Sdim AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STMIA)) 816251662Sdim .addFrameIndex(FI).addMemOperand(MMO)); 817251662Sdim AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI); 818251662Sdim AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI); 819251662Sdim } 820226633Sdim } else 821226633Sdim llvm_unreachable("Unknown reg class!"); 822226633Sdim break; 823226633Sdim case 16: 824234353Sdim if (ARM::DPairRegClass.hasSubClassEq(RC)) { 825234353Sdim // Use aligned spills if the stack can be realigned. 826234353Sdim if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { 827234353Sdim AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q64)) 828210299Sed .addFrameIndex(FI).addImm(16) 829208599Srdivacky .addReg(SrcReg, getKillRegState(isKill)) 830208599Srdivacky .addMemOperand(MMO)); 831226633Sdim } else { 832226633Sdim AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMQIA)) 833208599Srdivacky .addReg(SrcReg, getKillRegState(isKill)) 834206083Srdivacky .addFrameIndex(FI) 835206083Srdivacky .addMemOperand(MMO)); 836226633Sdim } 837226633Sdim } else 838226633Sdim llvm_unreachable("Unknown reg class!"); 839226633Sdim break; 840239462Sdim case 24: 841239462Sdim if (ARM::DTripleRegClass.hasSubClassEq(RC)) { 842239462Sdim // Use aligned spills if the stack can be realigned. 843239462Sdim if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { 844239462Sdim AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1d64TPseudo)) 845239462Sdim .addFrameIndex(FI).addImm(16) 846239462Sdim .addReg(SrcReg, getKillRegState(isKill)) 847239462Sdim .addMemOperand(MMO)); 848239462Sdim } else { 849239462Sdim MachineInstrBuilder MIB = 850239462Sdim AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA)) 851239462Sdim .addFrameIndex(FI)) 852239462Sdim .addMemOperand(MMO); 853239462Sdim MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI); 854239462Sdim MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI); 855239462Sdim AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI); 856239462Sdim } 857239462Sdim } else 858239462Sdim llvm_unreachable("Unknown reg class!"); 859239462Sdim break; 860226633Sdim case 32: 861239462Sdim if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) { 862226633Sdim if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { 863226633Sdim // FIXME: It's possible to only store part of the QQ register if the 864226633Sdim // spilled def has a sub-register index. 865226633Sdim AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1d64QPseudo)) 866218893Sdim .addFrameIndex(FI).addImm(16) 867218893Sdim .addReg(SrcReg, getKillRegState(isKill)) 868218893Sdim .addMemOperand(MMO)); 869226633Sdim } else { 870226633Sdim MachineInstrBuilder MIB = 871226633Sdim AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA)) 872218893Sdim .addFrameIndex(FI)) 873226633Sdim .addMemOperand(MMO); 874226633Sdim MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI); 875226633Sdim MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI); 876226633Sdim MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI); 877226633Sdim AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI); 878226633Sdim } 879226633Sdim } else 880226633Sdim llvm_unreachable("Unknown reg class!"); 881226633Sdim break; 882226633Sdim case 64: 883226633Sdim if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) { 884226633Sdim MachineInstrBuilder MIB = 885226633Sdim AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA)) 886226633Sdim .addFrameIndex(FI)) 887226633Sdim .addMemOperand(MMO); 888226633Sdim MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI); 889226633Sdim MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI); 890226633Sdim MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI); 891226633Sdim MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI); 892226633Sdim MIB = AddDReg(MIB, SrcReg, ARM::dsub_4, 0, TRI); 893226633Sdim MIB = AddDReg(MIB, SrcReg, ARM::dsub_5, 0, TRI); 894226633Sdim MIB = AddDReg(MIB, SrcReg, ARM::dsub_6, 0, TRI); 895226633Sdim AddDReg(MIB, SrcReg, ARM::dsub_7, 0, TRI); 896226633Sdim } else 897226633Sdim llvm_unreachable("Unknown reg class!"); 898226633Sdim break; 899226633Sdim default: 900226633Sdim llvm_unreachable("Unknown reg class!"); 901198090Srdivacky } 902198090Srdivacky} 903198090Srdivacky 904218893Sdimunsigned 905218893SdimARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI, 906218893Sdim int &FrameIndex) const { 907218893Sdim switch (MI->getOpcode()) { 908218893Sdim default: break; 909218893Sdim case ARM::STRrs: 910218893Sdim case ARM::t2STRs: // FIXME: don't use t2STRs to access frame. 911218893Sdim if (MI->getOperand(1).isFI() && 912218893Sdim MI->getOperand(2).isReg() && 913218893Sdim MI->getOperand(3).isImm() && 914218893Sdim MI->getOperand(2).getReg() == 0 && 915218893Sdim MI->getOperand(3).getImm() == 0) { 916218893Sdim FrameIndex = MI->getOperand(1).getIndex(); 917218893Sdim return MI->getOperand(0).getReg(); 918218893Sdim } 919218893Sdim break; 920218893Sdim case ARM::STRi12: 921218893Sdim case ARM::t2STRi12: 922224145Sdim case ARM::tSTRspi: 923218893Sdim case ARM::VSTRD: 924218893Sdim case ARM::VSTRS: 925218893Sdim if (MI->getOperand(1).isFI() && 926218893Sdim MI->getOperand(2).isImm() && 927218893Sdim MI->getOperand(2).getImm() == 0) { 928218893Sdim FrameIndex = MI->getOperand(1).getIndex(); 929218893Sdim return MI->getOperand(0).getReg(); 930218893Sdim } 931218893Sdim break; 932234353Sdim case ARM::VST1q64: 933239462Sdim case ARM::VST1d64TPseudo: 934239462Sdim case ARM::VST1d64QPseudo: 935218893Sdim if (MI->getOperand(0).isFI() && 936218893Sdim MI->getOperand(2).getSubReg() == 0) { 937218893Sdim FrameIndex = MI->getOperand(0).getIndex(); 938218893Sdim return MI->getOperand(2).getReg(); 939218893Sdim } 940218893Sdim break; 941218893Sdim case ARM::VSTMQIA: 942218893Sdim if (MI->getOperand(1).isFI() && 943218893Sdim MI->getOperand(0).getSubReg() == 0) { 944218893Sdim FrameIndex = MI->getOperand(1).getIndex(); 945218893Sdim return MI->getOperand(0).getReg(); 946218893Sdim } 947218893Sdim break; 948218893Sdim } 949218893Sdim 950218893Sdim return 0; 951218893Sdim} 952218893Sdim 953226633Sdimunsigned ARMBaseInstrInfo::isStoreToStackSlotPostFE(const MachineInstr *MI, 954226633Sdim int &FrameIndex) const { 955226633Sdim const MachineMemOperand *Dummy; 956234353Sdim return MI->mayStore() && hasStoreToStackSlot(MI, Dummy, FrameIndex); 957226633Sdim} 958226633Sdim 959198090Srdivackyvoid ARMBaseInstrInfo:: 960198090SrdivackyloadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, 961198090Srdivacky unsigned DestReg, int FI, 962208599Srdivacky const TargetRegisterClass *RC, 963208599Srdivacky const TargetRegisterInfo *TRI) const { 964206124Srdivacky DebugLoc DL; 965198090Srdivacky if (I != MBB.end()) DL = I->getDebugLoc(); 966198090Srdivacky MachineFunction &MF = *MBB.getParent(); 967198090Srdivacky MachineFrameInfo &MFI = *MF.getFrameInfo(); 968199481Srdivacky unsigned Align = MFI.getObjectAlignment(FI); 969198090Srdivacky MachineMemOperand *MMO = 970218893Sdim MF.getMachineMemOperand( 971234353Sdim MachinePointerInfo::getFixedStack(FI), 972218893Sdim MachineMemOperand::MOLoad, 973198090Srdivacky MFI.getObjectSize(FI), 974199481Srdivacky Align); 975198090Srdivacky 976226633Sdim switch (RC->getSize()) { 977226633Sdim case 4: 978226633Sdim if (ARM::GPRRegClass.hasSubClassEq(RC)) { 979226633Sdim AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDRi12), DestReg) 980226633Sdim .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); 981204642Srdivacky 982226633Sdim } else if (ARM::SPRRegClass.hasSubClassEq(RC)) { 983226633Sdim AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg) 984218893Sdim .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); 985226633Sdim } else 986226633Sdim llvm_unreachable("Unknown reg class!"); 987210299Sed break; 988226633Sdim case 8: 989226633Sdim if (ARM::DPRRegClass.hasSubClassEq(RC)) { 990226633Sdim AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg) 991208599Srdivacky .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); 992243830Sdim } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) { 993251662Sdim MachineInstrBuilder MIB; 994251662Sdim 995251662Sdim if (Subtarget.hasV5TEOps()) { 996251662Sdim MIB = BuildMI(MBB, I, DL, get(ARM::LDRD)); 997251662Sdim AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI); 998251662Sdim AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI); 999251662Sdim MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO); 1000251662Sdim 1001251662Sdim AddDefaultPred(MIB); 1002251662Sdim } else { 1003251662Sdim // Fallback to LDM instruction, which has existed since the dawn of 1004251662Sdim // time. 1005251662Sdim MIB = AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDMIA)) 1006251662Sdim .addFrameIndex(FI).addMemOperand(MMO)); 1007251662Sdim MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI); 1008251662Sdim MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI); 1009251662Sdim } 1010251662Sdim 1011243830Sdim if (TargetRegisterInfo::isPhysicalRegister(DestReg)) 1012243830Sdim MIB.addReg(DestReg, RegState::ImplicitDefine); 1013226633Sdim } else 1014226633Sdim llvm_unreachable("Unknown reg class!"); 1015210299Sed break; 1016226633Sdim case 16: 1017234353Sdim if (ARM::DPairRegClass.hasSubClassEq(RC)) { 1018234353Sdim if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { 1019234353Sdim AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg) 1020210299Sed .addFrameIndex(FI).addImm(16) 1021199989Srdivacky .addMemOperand(MMO)); 1022226633Sdim } else { 1023226633Sdim AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMQIA), DestReg) 1024226633Sdim .addFrameIndex(FI) 1025226633Sdim .addMemOperand(MMO)); 1026226633Sdim } 1027226633Sdim } else 1028226633Sdim llvm_unreachable("Unknown reg class!"); 1029210299Sed break; 1030239462Sdim case 24: 1031239462Sdim if (ARM::DTripleRegClass.hasSubClassEq(RC)) { 1032226633Sdim if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { 1033239462Sdim AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1d64TPseudo), DestReg) 1034239462Sdim .addFrameIndex(FI).addImm(16) 1035239462Sdim .addMemOperand(MMO)); 1036239462Sdim } else { 1037239462Sdim MachineInstrBuilder MIB = 1038239462Sdim AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA)) 1039239462Sdim .addFrameIndex(FI) 1040239462Sdim .addMemOperand(MMO)); 1041239462Sdim MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI); 1042239462Sdim MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI); 1043239462Sdim MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI); 1044239462Sdim if (TargetRegisterInfo::isPhysicalRegister(DestReg)) 1045239462Sdim MIB.addReg(DestReg, RegState::ImplicitDefine); 1046239462Sdim } 1047239462Sdim } else 1048239462Sdim llvm_unreachable("Unknown reg class!"); 1049239462Sdim break; 1050239462Sdim case 32: 1051239462Sdim if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) { 1052239462Sdim if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { 1053226633Sdim AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg) 1054218893Sdim .addFrameIndex(FI).addImm(16) 1055218893Sdim .addMemOperand(MMO)); 1056226633Sdim } else { 1057226633Sdim MachineInstrBuilder MIB = 1058218893Sdim AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA)) 1059218893Sdim .addFrameIndex(FI)) 1060226633Sdim .addMemOperand(MMO); 1061234353Sdim MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI); 1062234353Sdim MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI); 1063234353Sdim MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI); 1064234353Sdim MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI); 1065234353Sdim if (TargetRegisterInfo::isPhysicalRegister(DestReg)) 1066234353Sdim MIB.addReg(DestReg, RegState::ImplicitDefine); 1067226633Sdim } 1068226633Sdim } else 1069226633Sdim llvm_unreachable("Unknown reg class!"); 1070226633Sdim break; 1071226633Sdim case 64: 1072226633Sdim if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) { 1073226633Sdim MachineInstrBuilder MIB = 1074226633Sdim AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA)) 1075226633Sdim .addFrameIndex(FI)) 1076226633Sdim .addMemOperand(MMO); 1077234353Sdim MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI); 1078234353Sdim MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI); 1079234353Sdim MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI); 1080234353Sdim MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI); 1081234353Sdim MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::DefineNoRead, TRI); 1082234353Sdim MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::DefineNoRead, TRI); 1083234353Sdim MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::DefineNoRead, TRI); 1084234353Sdim MIB = AddDReg(MIB, DestReg, ARM::dsub_7, RegState::DefineNoRead, TRI); 1085234353Sdim if (TargetRegisterInfo::isPhysicalRegister(DestReg)) 1086234353Sdim MIB.addReg(DestReg, RegState::ImplicitDefine); 1087226633Sdim } else 1088226633Sdim llvm_unreachable("Unknown reg class!"); 1089210299Sed break; 1090210299Sed default: 1091210299Sed llvm_unreachable("Unknown regclass!"); 1092210299Sed } 1093198090Srdivacky} 1094198090Srdivacky 1095218893Sdimunsigned 1096218893SdimARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, 1097218893Sdim int &FrameIndex) const { 1098218893Sdim switch (MI->getOpcode()) { 1099218893Sdim default: break; 1100218893Sdim case ARM::LDRrs: 1101218893Sdim case ARM::t2LDRs: // FIXME: don't use t2LDRs to access frame. 1102218893Sdim if (MI->getOperand(1).isFI() && 1103218893Sdim MI->getOperand(2).isReg() && 1104218893Sdim MI->getOperand(3).isImm() && 1105218893Sdim MI->getOperand(2).getReg() == 0 && 1106218893Sdim MI->getOperand(3).getImm() == 0) { 1107218893Sdim FrameIndex = MI->getOperand(1).getIndex(); 1108218893Sdim return MI->getOperand(0).getReg(); 1109218893Sdim } 1110218893Sdim break; 1111218893Sdim case ARM::LDRi12: 1112218893Sdim case ARM::t2LDRi12: 1113224145Sdim case ARM::tLDRspi: 1114218893Sdim case ARM::VLDRD: 1115218893Sdim case ARM::VLDRS: 1116218893Sdim if (MI->getOperand(1).isFI() && 1117218893Sdim MI->getOperand(2).isImm() && 1118218893Sdim MI->getOperand(2).getImm() == 0) { 1119218893Sdim FrameIndex = MI->getOperand(1).getIndex(); 1120218893Sdim return MI->getOperand(0).getReg(); 1121218893Sdim } 1122218893Sdim break; 1123234353Sdim case ARM::VLD1q64: 1124239462Sdim case ARM::VLD1d64TPseudo: 1125239462Sdim case ARM::VLD1d64QPseudo: 1126218893Sdim if (MI->getOperand(1).isFI() && 1127218893Sdim MI->getOperand(0).getSubReg() == 0) { 1128218893Sdim FrameIndex = MI->getOperand(1).getIndex(); 1129218893Sdim return MI->getOperand(0).getReg(); 1130218893Sdim } 1131218893Sdim break; 1132218893Sdim case ARM::VLDMQIA: 1133218893Sdim if (MI->getOperand(1).isFI() && 1134218893Sdim MI->getOperand(0).getSubReg() == 0) { 1135218893Sdim FrameIndex = MI->getOperand(1).getIndex(); 1136218893Sdim return MI->getOperand(0).getReg(); 1137218893Sdim } 1138218893Sdim break; 1139218893Sdim } 1140218893Sdim 1141218893Sdim return 0; 1142218893Sdim} 1143218893Sdim 1144226633Sdimunsigned ARMBaseInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI, 1145226633Sdim int &FrameIndex) const { 1146226633Sdim const MachineMemOperand *Dummy; 1147234353Sdim return MI->mayLoad() && hasLoadFromStackSlot(MI, Dummy, FrameIndex); 1148226633Sdim} 1149226633Sdim 1150226633Sdimbool ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const{ 1151226633Sdim // This hook gets to expand COPY instructions before they become 1152226633Sdim // copyPhysReg() calls. Look for VMOVS instructions that can legally be 1153226633Sdim // widened to VMOVD. We prefer the VMOVD when possible because it may be 1154226633Sdim // changed into a VORR that can go down the NEON pipeline. 1155249423Sdim if (!WidenVMOVS || !MI->isCopy() || Subtarget.isCortexA15()) 1156226633Sdim return false; 1157226633Sdim 1158226633Sdim // Look for a copy between even S-registers. That is where we keep floats 1159226633Sdim // when using NEON v2f32 instructions for f32 arithmetic. 1160226633Sdim unsigned DstRegS = MI->getOperand(0).getReg(); 1161226633Sdim unsigned SrcRegS = MI->getOperand(1).getReg(); 1162226633Sdim if (!ARM::SPRRegClass.contains(DstRegS, SrcRegS)) 1163226633Sdim return false; 1164226633Sdim 1165226633Sdim const TargetRegisterInfo *TRI = &getRegisterInfo(); 1166226633Sdim unsigned DstRegD = TRI->getMatchingSuperReg(DstRegS, ARM::ssub_0, 1167226633Sdim &ARM::DPRRegClass); 1168226633Sdim unsigned SrcRegD = TRI->getMatchingSuperReg(SrcRegS, ARM::ssub_0, 1169226633Sdim &ARM::DPRRegClass); 1170226633Sdim if (!DstRegD || !SrcRegD) 1171226633Sdim return false; 1172226633Sdim 1173226633Sdim // We want to widen this into a DstRegD = VMOVD SrcRegD copy. This is only 1174226633Sdim // legal if the COPY already defines the full DstRegD, and it isn't a 1175226633Sdim // sub-register insertion. 1176226633Sdim if (!MI->definesRegister(DstRegD, TRI) || MI->readsRegister(DstRegD, TRI)) 1177226633Sdim return false; 1178226633Sdim 1179226633Sdim // A dead copy shouldn't show up here, but reject it just in case. 1180226633Sdim if (MI->getOperand(0).isDead()) 1181226633Sdim return false; 1182226633Sdim 1183226633Sdim // All clear, widen the COPY. 1184226633Sdim DEBUG(dbgs() << "widening: " << *MI); 1185249423Sdim MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI); 1186226633Sdim 1187226633Sdim // Get rid of the old <imp-def> of DstRegD. Leave it if it defines a Q-reg 1188226633Sdim // or some other super-register. 1189226633Sdim int ImpDefIdx = MI->findRegisterDefOperandIdx(DstRegD); 1190226633Sdim if (ImpDefIdx != -1) 1191226633Sdim MI->RemoveOperand(ImpDefIdx); 1192226633Sdim 1193226633Sdim // Change the opcode and operands. 1194226633Sdim MI->setDesc(get(ARM::VMOVD)); 1195226633Sdim MI->getOperand(0).setReg(DstRegD); 1196226633Sdim MI->getOperand(1).setReg(SrcRegD); 1197249423Sdim AddDefaultPred(MIB); 1198226633Sdim 1199226633Sdim // We are now reading SrcRegD instead of SrcRegS. This may upset the 1200226633Sdim // register scavenger and machine verifier, so we need to indicate that we 1201226633Sdim // are reading an undefined value from SrcRegD, but a proper value from 1202226633Sdim // SrcRegS. 1203226633Sdim MI->getOperand(1).setIsUndef(); 1204249423Sdim MIB.addReg(SrcRegS, RegState::Implicit); 1205226633Sdim 1206226633Sdim // SrcRegD may actually contain an unrelated value in the ssub_1 1207226633Sdim // sub-register. Don't kill it. Only kill the ssub_0 sub-register. 1208226633Sdim if (MI->getOperand(1).isKill()) { 1209226633Sdim MI->getOperand(1).setIsKill(false); 1210226633Sdim MI->addRegisterKilled(SrcRegS, TRI, true); 1211226633Sdim } 1212226633Sdim 1213226633Sdim DEBUG(dbgs() << "replaced by: " << *MI); 1214226633Sdim return true; 1215226633Sdim} 1216226633Sdim 1217207618SrdivackyMachineInstr* 1218207618SrdivackyARMBaseInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, 1219207618Srdivacky int FrameIx, uint64_t Offset, 1220207618Srdivacky const MDNode *MDPtr, 1221207618Srdivacky DebugLoc DL) const { 1222207618Srdivacky MachineInstrBuilder MIB = BuildMI(MF, DL, get(ARM::DBG_VALUE)) 1223207618Srdivacky .addFrameIndex(FrameIx).addImm(0).addImm(Offset).addMetadata(MDPtr); 1224207618Srdivacky return &*MIB; 1225207618Srdivacky} 1226207618Srdivacky 1227202375Srdivacky/// Create a copy of a const pool value. Update CPI to the new index and return 1228202375Srdivacky/// the label UID. 1229202375Srdivackystatic unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) { 1230202375Srdivacky MachineConstantPool *MCP = MF.getConstantPool(); 1231202375Srdivacky ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1232202375Srdivacky 1233202375Srdivacky const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI]; 1234202375Srdivacky assert(MCPE.isMachineConstantPoolEntry() && 1235202375Srdivacky "Expecting a machine constantpool entry!"); 1236202375Srdivacky ARMConstantPoolValue *ACPV = 1237202375Srdivacky static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal); 1238202375Srdivacky 1239218893Sdim unsigned PCLabelId = AFI->createPICLabelUId(); 1240202375Srdivacky ARMConstantPoolValue *NewCPV = 0; 1241212904Sdim // FIXME: The below assumes PIC relocation model and that the function 1242212904Sdim // is Thumb mode (t1 or t2). PCAdjustment would be 8 for ARM mode PIC, and 1243212904Sdim // zero for non-PIC in ARM or Thumb. The callers are all of thumb LDR 1244212904Sdim // instructions, so that's probably OK, but is PIC always correct when 1245212904Sdim // we get here? 1246202375Srdivacky if (ACPV->isGlobalValue()) 1247226633Sdim NewCPV = ARMConstantPoolConstant:: 1248226633Sdim Create(cast<ARMConstantPoolConstant>(ACPV)->getGV(), PCLabelId, 1249226633Sdim ARMCP::CPValue, 4); 1250202375Srdivacky else if (ACPV->isExtSymbol()) 1251226633Sdim NewCPV = ARMConstantPoolSymbol:: 1252226633Sdim Create(MF.getFunction()->getContext(), 1253226633Sdim cast<ARMConstantPoolSymbol>(ACPV)->getSymbol(), PCLabelId, 4); 1254202375Srdivacky else if (ACPV->isBlockAddress()) 1255226633Sdim NewCPV = ARMConstantPoolConstant:: 1256226633Sdim Create(cast<ARMConstantPoolConstant>(ACPV)->getBlockAddress(), PCLabelId, 1257226633Sdim ARMCP::CPBlockAddress, 4); 1258212904Sdim else if (ACPV->isLSDA()) 1259226633Sdim NewCPV = ARMConstantPoolConstant::Create(MF.getFunction(), PCLabelId, 1260226633Sdim ARMCP::CPLSDA, 4); 1261226633Sdim else if (ACPV->isMachineBasicBlock()) 1262226633Sdim NewCPV = ARMConstantPoolMBB:: 1263226633Sdim Create(MF.getFunction()->getContext(), 1264226633Sdim cast<ARMConstantPoolMBB>(ACPV)->getMBB(), PCLabelId, 4); 1265202375Srdivacky else 1266202375Srdivacky llvm_unreachable("Unexpected ARM constantpool value type!!"); 1267202375Srdivacky CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlignment()); 1268202375Srdivacky return PCLabelId; 1269202375Srdivacky} 1270202375Srdivacky 1271199481Srdivackyvoid ARMBaseInstrInfo:: 1272199481SrdivackyreMaterialize(MachineBasicBlock &MBB, 1273199481Srdivacky MachineBasicBlock::iterator I, 1274199481Srdivacky unsigned DestReg, unsigned SubIdx, 1275199481Srdivacky const MachineInstr *Orig, 1276210299Sed const TargetRegisterInfo &TRI) const { 1277199481Srdivacky unsigned Opcode = Orig->getOpcode(); 1278199481Srdivacky switch (Opcode) { 1279199481Srdivacky default: { 1280199481Srdivacky MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig); 1281210299Sed MI->substituteRegister(Orig->getOperand(0).getReg(), DestReg, SubIdx, TRI); 1282199481Srdivacky MBB.insert(I, MI); 1283199481Srdivacky break; 1284199481Srdivacky } 1285199481Srdivacky case ARM::tLDRpci_pic: 1286199481Srdivacky case ARM::t2LDRpci_pic: { 1287199481Srdivacky MachineFunction &MF = *MBB.getParent(); 1288199481Srdivacky unsigned CPI = Orig->getOperand(1).getIndex(); 1289202375Srdivacky unsigned PCLabelId = duplicateCPV(MF, CPI); 1290199481Srdivacky MachineInstrBuilder MIB = BuildMI(MBB, I, Orig->getDebugLoc(), get(Opcode), 1291199481Srdivacky DestReg) 1292199481Srdivacky .addConstantPoolIndex(CPI).addImm(PCLabelId); 1293221345Sdim MIB->setMemRefs(Orig->memoperands_begin(), Orig->memoperands_end()); 1294199481Srdivacky break; 1295199481Srdivacky } 1296199481Srdivacky } 1297199481Srdivacky} 1298199481Srdivacky 1299202375SrdivackyMachineInstr * 1300202375SrdivackyARMBaseInstrInfo::duplicate(MachineInstr *Orig, MachineFunction &MF) const { 1301249423Sdim MachineInstr *MI = TargetInstrInfo::duplicate(Orig, MF); 1302202375Srdivacky switch(Orig->getOpcode()) { 1303202375Srdivacky case ARM::tLDRpci_pic: 1304202375Srdivacky case ARM::t2LDRpci_pic: { 1305202375Srdivacky unsigned CPI = Orig->getOperand(1).getIndex(); 1306202375Srdivacky unsigned PCLabelId = duplicateCPV(MF, CPI); 1307202375Srdivacky Orig->getOperand(1).setIndex(CPI); 1308202375Srdivacky Orig->getOperand(2).setImm(PCLabelId); 1309202375Srdivacky break; 1310202375Srdivacky } 1311202375Srdivacky } 1312202375Srdivacky return MI; 1313202375Srdivacky} 1314202375Srdivacky 1315204642Srdivackybool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0, 1316218893Sdim const MachineInstr *MI1, 1317218893Sdim const MachineRegisterInfo *MRI) const { 1318199481Srdivacky int Opcode = MI0->getOpcode(); 1319199989Srdivacky if (Opcode == ARM::t2LDRpci || 1320199989Srdivacky Opcode == ARM::t2LDRpci_pic || 1321199989Srdivacky Opcode == ARM::tLDRpci || 1322218893Sdim Opcode == ARM::tLDRpci_pic || 1323218893Sdim Opcode == ARM::MOV_ga_dyn || 1324218893Sdim Opcode == ARM::MOV_ga_pcrel || 1325218893Sdim Opcode == ARM::MOV_ga_pcrel_ldr || 1326218893Sdim Opcode == ARM::t2MOV_ga_dyn || 1327218893Sdim Opcode == ARM::t2MOV_ga_pcrel) { 1328199481Srdivacky if (MI1->getOpcode() != Opcode) 1329199481Srdivacky return false; 1330199481Srdivacky if (MI0->getNumOperands() != MI1->getNumOperands()) 1331199481Srdivacky return false; 1332199481Srdivacky 1333199481Srdivacky const MachineOperand &MO0 = MI0->getOperand(1); 1334199481Srdivacky const MachineOperand &MO1 = MI1->getOperand(1); 1335199481Srdivacky if (MO0.getOffset() != MO1.getOffset()) 1336199481Srdivacky return false; 1337199481Srdivacky 1338218893Sdim if (Opcode == ARM::MOV_ga_dyn || 1339218893Sdim Opcode == ARM::MOV_ga_pcrel || 1340218893Sdim Opcode == ARM::MOV_ga_pcrel_ldr || 1341218893Sdim Opcode == ARM::t2MOV_ga_dyn || 1342218893Sdim Opcode == ARM::t2MOV_ga_pcrel) 1343218893Sdim // Ignore the PC labels. 1344218893Sdim return MO0.getGlobal() == MO1.getGlobal(); 1345218893Sdim 1346199481Srdivacky const MachineFunction *MF = MI0->getParent()->getParent(); 1347199481Srdivacky const MachineConstantPool *MCP = MF->getConstantPool(); 1348199481Srdivacky int CPI0 = MO0.getIndex(); 1349199481Srdivacky int CPI1 = MO1.getIndex(); 1350199481Srdivacky const MachineConstantPoolEntry &MCPE0 = MCP->getConstants()[CPI0]; 1351199481Srdivacky const MachineConstantPoolEntry &MCPE1 = MCP->getConstants()[CPI1]; 1352221345Sdim bool isARMCP0 = MCPE0.isMachineConstantPoolEntry(); 1353221345Sdim bool isARMCP1 = MCPE1.isMachineConstantPoolEntry(); 1354221345Sdim if (isARMCP0 && isARMCP1) { 1355221345Sdim ARMConstantPoolValue *ACPV0 = 1356221345Sdim static_cast<ARMConstantPoolValue*>(MCPE0.Val.MachineCPVal); 1357221345Sdim ARMConstantPoolValue *ACPV1 = 1358221345Sdim static_cast<ARMConstantPoolValue*>(MCPE1.Val.MachineCPVal); 1359221345Sdim return ACPV0->hasSameValue(ACPV1); 1360221345Sdim } else if (!isARMCP0 && !isARMCP1) { 1361221345Sdim return MCPE0.Val.ConstVal == MCPE1.Val.ConstVal; 1362221345Sdim } 1363221345Sdim return false; 1364218893Sdim } else if (Opcode == ARM::PICLDR) { 1365218893Sdim if (MI1->getOpcode() != Opcode) 1366218893Sdim return false; 1367218893Sdim if (MI0->getNumOperands() != MI1->getNumOperands()) 1368218893Sdim return false; 1369218893Sdim 1370218893Sdim unsigned Addr0 = MI0->getOperand(1).getReg(); 1371218893Sdim unsigned Addr1 = MI1->getOperand(1).getReg(); 1372218893Sdim if (Addr0 != Addr1) { 1373218893Sdim if (!MRI || 1374218893Sdim !TargetRegisterInfo::isVirtualRegister(Addr0) || 1375218893Sdim !TargetRegisterInfo::isVirtualRegister(Addr1)) 1376218893Sdim return false; 1377218893Sdim 1378218893Sdim // This assumes SSA form. 1379218893Sdim MachineInstr *Def0 = MRI->getVRegDef(Addr0); 1380218893Sdim MachineInstr *Def1 = MRI->getVRegDef(Addr1); 1381218893Sdim // Check if the loaded value, e.g. a constantpool of a global address, are 1382218893Sdim // the same. 1383218893Sdim if (!produceSameValue(Def0, Def1, MRI)) 1384218893Sdim return false; 1385218893Sdim } 1386218893Sdim 1387218893Sdim for (unsigned i = 3, e = MI0->getNumOperands(); i != e; ++i) { 1388218893Sdim // %vreg12<def> = PICLDR %vreg11, 0, pred:14, pred:%noreg 1389218893Sdim const MachineOperand &MO0 = MI0->getOperand(i); 1390218893Sdim const MachineOperand &MO1 = MI1->getOperand(i); 1391218893Sdim if (!MO0.isIdenticalTo(MO1)) 1392218893Sdim return false; 1393218893Sdim } 1394218893Sdim return true; 1395199481Srdivacky } 1396199481Srdivacky 1397204642Srdivacky return MI0->isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs); 1398199481Srdivacky} 1399199481Srdivacky 1400210299Sed/// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to 1401210299Sed/// determine if two loads are loading from the same base address. It should 1402210299Sed/// only return true if the base pointers are the same and the only differences 1403210299Sed/// between the two addresses is the offset. It also returns the offsets by 1404210299Sed/// reference. 1405249423Sdim/// 1406249423Sdim/// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched 1407249423Sdim/// is permanently disabled. 1408210299Sedbool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, 1409210299Sed int64_t &Offset1, 1410210299Sed int64_t &Offset2) const { 1411210299Sed // Don't worry about Thumb: just ARM and Thumb2. 1412210299Sed if (Subtarget.isThumb1Only()) return false; 1413210299Sed 1414210299Sed if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode()) 1415210299Sed return false; 1416210299Sed 1417210299Sed switch (Load1->getMachineOpcode()) { 1418210299Sed default: 1419210299Sed return false; 1420218893Sdim case ARM::LDRi12: 1421218893Sdim case ARM::LDRBi12: 1422210299Sed case ARM::LDRD: 1423210299Sed case ARM::LDRH: 1424210299Sed case ARM::LDRSB: 1425210299Sed case ARM::LDRSH: 1426210299Sed case ARM::VLDRD: 1427210299Sed case ARM::VLDRS: 1428210299Sed case ARM::t2LDRi8: 1429210299Sed case ARM::t2LDRDi8: 1430210299Sed case ARM::t2LDRSHi8: 1431210299Sed case ARM::t2LDRi12: 1432210299Sed case ARM::t2LDRSHi12: 1433210299Sed break; 1434210299Sed } 1435210299Sed 1436210299Sed switch (Load2->getMachineOpcode()) { 1437210299Sed default: 1438210299Sed return false; 1439218893Sdim case ARM::LDRi12: 1440218893Sdim case ARM::LDRBi12: 1441210299Sed case ARM::LDRD: 1442210299Sed case ARM::LDRH: 1443210299Sed case ARM::LDRSB: 1444210299Sed case ARM::LDRSH: 1445210299Sed case ARM::VLDRD: 1446210299Sed case ARM::VLDRS: 1447210299Sed case ARM::t2LDRi8: 1448210299Sed case ARM::t2LDRSHi8: 1449210299Sed case ARM::t2LDRi12: 1450210299Sed case ARM::t2LDRSHi12: 1451210299Sed break; 1452210299Sed } 1453210299Sed 1454210299Sed // Check if base addresses and chain operands match. 1455210299Sed if (Load1->getOperand(0) != Load2->getOperand(0) || 1456210299Sed Load1->getOperand(4) != Load2->getOperand(4)) 1457210299Sed return false; 1458210299Sed 1459210299Sed // Index should be Reg0. 1460210299Sed if (Load1->getOperand(3) != Load2->getOperand(3)) 1461210299Sed return false; 1462210299Sed 1463210299Sed // Determine the offsets. 1464210299Sed if (isa<ConstantSDNode>(Load1->getOperand(1)) && 1465210299Sed isa<ConstantSDNode>(Load2->getOperand(1))) { 1466210299Sed Offset1 = cast<ConstantSDNode>(Load1->getOperand(1))->getSExtValue(); 1467210299Sed Offset2 = cast<ConstantSDNode>(Load2->getOperand(1))->getSExtValue(); 1468210299Sed return true; 1469210299Sed } 1470210299Sed 1471210299Sed return false; 1472210299Sed} 1473210299Sed 1474210299Sed/// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to 1475221345Sdim/// determine (in conjunction with areLoadsFromSameBasePtr) if two loads should 1476210299Sed/// be scheduled togther. On some targets if two loads are loading from 1477210299Sed/// addresses in the same cache line, it's better if they are scheduled 1478210299Sed/// together. This function takes two integers that represent the load offsets 1479210299Sed/// from the common base address. It returns true if it decides it's desirable 1480210299Sed/// to schedule the two loads together. "NumLoads" is the number of loads that 1481210299Sed/// have already been scheduled after Load1. 1482249423Sdim/// 1483249423Sdim/// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched 1484249423Sdim/// is permanently disabled. 1485210299Sedbool ARMBaseInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, 1486210299Sed int64_t Offset1, int64_t Offset2, 1487210299Sed unsigned NumLoads) const { 1488210299Sed // Don't worry about Thumb: just ARM and Thumb2. 1489210299Sed if (Subtarget.isThumb1Only()) return false; 1490210299Sed 1491210299Sed assert(Offset2 > Offset1); 1492210299Sed 1493210299Sed if ((Offset2 - Offset1) / 8 > 64) 1494210299Sed return false; 1495210299Sed 1496210299Sed if (Load1->getMachineOpcode() != Load2->getMachineOpcode()) 1497210299Sed return false; // FIXME: overly conservative? 1498210299Sed 1499210299Sed // Four loads in a row should be sufficient. 1500210299Sed if (NumLoads >= 3) 1501210299Sed return false; 1502210299Sed 1503210299Sed return true; 1504210299Sed} 1505210299Sed 1506210299Sedbool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr *MI, 1507210299Sed const MachineBasicBlock *MBB, 1508210299Sed const MachineFunction &MF) const { 1509210299Sed // Debug info is never a scheduling boundary. It's necessary to be explicit 1510210299Sed // due to the special treatment of IT instructions below, otherwise a 1511210299Sed // dbg_value followed by an IT will result in the IT instruction being 1512210299Sed // considered a scheduling hazard, which is wrong. It should be the actual 1513210299Sed // instruction preceding the dbg_value instruction(s), just like it is 1514210299Sed // when debug info is not present. 1515210299Sed if (MI->isDebugValue()) 1516210299Sed return false; 1517210299Sed 1518210299Sed // Terminators and labels can't be scheduled around. 1519234353Sdim if (MI->isTerminator() || MI->isLabel()) 1520210299Sed return true; 1521210299Sed 1522210299Sed // Treat the start of the IT block as a scheduling boundary, but schedule 1523210299Sed // t2IT along with all instructions following it. 1524210299Sed // FIXME: This is a big hammer. But the alternative is to add all potential 1525210299Sed // true and anti dependencies to IT block instructions as implicit operands 1526210299Sed // to the t2IT instruction. The added compile time and complexity does not 1527210299Sed // seem worth it. 1528210299Sed MachineBasicBlock::const_iterator I = MI; 1529210299Sed // Make sure to skip any dbg_value instructions 1530210299Sed while (++I != MBB->end() && I->isDebugValue()) 1531210299Sed ; 1532210299Sed if (I != MBB->end() && I->getOpcode() == ARM::t2IT) 1533210299Sed return true; 1534210299Sed 1535210299Sed // Don't attempt to schedule around any instruction that defines 1536210299Sed // a stack-oriented pointer, as it's unlikely to be profitable. This 1537210299Sed // saves compile time, because it doesn't require every single 1538210299Sed // stack slot reference to depend on the instruction that does the 1539210299Sed // modification. 1540234353Sdim // Calls don't actually change the stack pointer, even if they have imp-defs. 1541234353Sdim // No ARM calling conventions change the stack pointer. (X86 calling 1542234353Sdim // conventions sometimes do). 1543234353Sdim if (!MI->isCall() && MI->definesRegister(ARM::SP)) 1544210299Sed return true; 1545210299Sed 1546210299Sed return false; 1547210299Sed} 1548210299Sed 1549224145Sdimbool ARMBaseInstrInfo:: 1550224145SdimisProfitableToIfCvt(MachineBasicBlock &MBB, 1551224145Sdim unsigned NumCycles, unsigned ExtraPredCycles, 1552224145Sdim const BranchProbability &Probability) const { 1553221345Sdim if (!NumCycles) 1554210299Sed return false; 1555218893Sdim 1556218893Sdim // Attempt to estimate the relative costs of predication versus branching. 1557224145Sdim unsigned UnpredCost = Probability.getNumerator() * NumCycles; 1558224145Sdim UnpredCost /= Probability.getDenominator(); 1559224145Sdim UnpredCost += 1; // The branch itself 1560224145Sdim UnpredCost += Subtarget.getMispredictionPenalty() / 10; 1561218893Sdim 1562224145Sdim return (NumCycles + ExtraPredCycles) <= UnpredCost; 1563210299Sed} 1564218893Sdim 1565210299Sedbool ARMBaseInstrInfo:: 1566218893SdimisProfitableToIfCvt(MachineBasicBlock &TMBB, 1567218893Sdim unsigned TCycles, unsigned TExtra, 1568218893Sdim MachineBasicBlock &FMBB, 1569218893Sdim unsigned FCycles, unsigned FExtra, 1570224145Sdim const BranchProbability &Probability) const { 1571218893Sdim if (!TCycles || !FCycles) 1572218893Sdim return false; 1573218893Sdim 1574218893Sdim // Attempt to estimate the relative costs of predication versus branching. 1575224145Sdim unsigned TUnpredCost = Probability.getNumerator() * TCycles; 1576224145Sdim TUnpredCost /= Probability.getDenominator(); 1577226633Sdim 1578224145Sdim uint32_t Comp = Probability.getDenominator() - Probability.getNumerator(); 1579224145Sdim unsigned FUnpredCost = Comp * FCycles; 1580224145Sdim FUnpredCost /= Probability.getDenominator(); 1581218893Sdim 1582224145Sdim unsigned UnpredCost = TUnpredCost + FUnpredCost; 1583224145Sdim UnpredCost += 1; // The branch itself 1584224145Sdim UnpredCost += Subtarget.getMispredictionPenalty() / 10; 1585224145Sdim 1586224145Sdim return (TCycles + FCycles + TExtra + FExtra) <= UnpredCost; 1587210299Sed} 1588210299Sed 1589243830Sdimbool 1590243830SdimARMBaseInstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB, 1591243830Sdim MachineBasicBlock &FMBB) const { 1592243830Sdim // Reduce false anti-dependencies to let Swift's out-of-order execution 1593243830Sdim // engine do its thing. 1594243830Sdim return Subtarget.isSwift(); 1595243830Sdim} 1596243830Sdim 1597198090Srdivacky/// getInstrPredicate - If instruction is predicated, returns its predicate 1598198090Srdivacky/// condition, otherwise returns AL. It also returns the condition code 1599198090Srdivacky/// register by reference. 1600198090SrdivackyARMCC::CondCodes 1601198090Srdivackyllvm::getInstrPredicate(const MachineInstr *MI, unsigned &PredReg) { 1602198090Srdivacky int PIdx = MI->findFirstPredOperandIdx(); 1603198090Srdivacky if (PIdx == -1) { 1604198090Srdivacky PredReg = 0; 1605198090Srdivacky return ARMCC::AL; 1606198090Srdivacky } 1607198090Srdivacky 1608198090Srdivacky PredReg = MI->getOperand(PIdx+1).getReg(); 1609198090Srdivacky return (ARMCC::CondCodes)MI->getOperand(PIdx).getImm(); 1610198090Srdivacky} 1611198090Srdivacky 1612198090Srdivacky 1613198090Srdivackyint llvm::getMatchingCondBranchOpcode(int Opc) { 1614198090Srdivacky if (Opc == ARM::B) 1615198090Srdivacky return ARM::Bcc; 1616234353Sdim if (Opc == ARM::tB) 1617198090Srdivacky return ARM::tBcc; 1618234353Sdim if (Opc == ARM::t2B) 1619234353Sdim return ARM::t2Bcc; 1620198090Srdivacky 1621198090Srdivacky llvm_unreachable("Unknown unconditional branch opcode!"); 1622198090Srdivacky} 1623198090Srdivacky 1624234353Sdim/// commuteInstruction - Handle commutable instructions. 1625234353SdimMachineInstr * 1626234353SdimARMBaseInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { 1627234353Sdim switch (MI->getOpcode()) { 1628234353Sdim case ARM::MOVCCr: 1629234353Sdim case ARM::t2MOVCCr: { 1630234353Sdim // MOVCC can be commuted by inverting the condition. 1631234353Sdim unsigned PredReg = 0; 1632234353Sdim ARMCC::CondCodes CC = getInstrPredicate(MI, PredReg); 1633234353Sdim // MOVCC AL can't be inverted. Shouldn't happen. 1634234353Sdim if (CC == ARMCC::AL || PredReg != ARM::CPSR) 1635234353Sdim return NULL; 1636249423Sdim MI = TargetInstrInfo::commuteInstruction(MI, NewMI); 1637234353Sdim if (!MI) 1638234353Sdim return NULL; 1639234353Sdim // After swapping the MOVCC operands, also invert the condition. 1640234353Sdim MI->getOperand(MI->findFirstPredOperandIdx()) 1641234353Sdim .setImm(ARMCC::getOppositeCondition(CC)); 1642234353Sdim return MI; 1643234353Sdim } 1644234353Sdim } 1645249423Sdim return TargetInstrInfo::commuteInstruction(MI, NewMI); 1646234353Sdim} 1647198090Srdivacky 1648239462Sdim/// Identify instructions that can be folded into a MOVCC instruction, and 1649243830Sdim/// return the defining instruction. 1650243830Sdimstatic MachineInstr *canFoldIntoMOVCC(unsigned Reg, 1651243830Sdim const MachineRegisterInfo &MRI, 1652243830Sdim const TargetInstrInfo *TII) { 1653239462Sdim if (!TargetRegisterInfo::isVirtualRegister(Reg)) 1654239462Sdim return 0; 1655239462Sdim if (!MRI.hasOneNonDBGUse(Reg)) 1656239462Sdim return 0; 1657243830Sdim MachineInstr *MI = MRI.getVRegDef(Reg); 1658239462Sdim if (!MI) 1659239462Sdim return 0; 1660243830Sdim // MI is folded into the MOVCC by predicating it. 1661243830Sdim if (!MI->isPredicable()) 1662243830Sdim return 0; 1663239462Sdim // Check if MI has any non-dead defs or physreg uses. This also detects 1664239462Sdim // predicated instructions which will be reading CPSR. 1665239462Sdim for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) { 1666239462Sdim const MachineOperand &MO = MI->getOperand(i); 1667243830Sdim // Reject frame index operands, PEI can't handle the predicated pseudos. 1668243830Sdim if (MO.isFI() || MO.isCPI() || MO.isJTI()) 1669243830Sdim return 0; 1670239462Sdim if (!MO.isReg()) 1671239462Sdim continue; 1672243830Sdim // MI can't have any tied operands, that would conflict with predication. 1673243830Sdim if (MO.isTied()) 1674243830Sdim return 0; 1675239462Sdim if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) 1676239462Sdim return 0; 1677239462Sdim if (MO.isDef() && !MO.isDead()) 1678239462Sdim return 0; 1679239462Sdim } 1680243830Sdim bool DontMoveAcrossStores = true; 1681243830Sdim if (!MI->isSafeToMove(TII, /* AliasAnalysis = */ 0, DontMoveAcrossStores)) 1682243830Sdim return 0; 1683243830Sdim return MI; 1684239462Sdim} 1685239462Sdim 1686239462Sdimbool ARMBaseInstrInfo::analyzeSelect(const MachineInstr *MI, 1687239462Sdim SmallVectorImpl<MachineOperand> &Cond, 1688239462Sdim unsigned &TrueOp, unsigned &FalseOp, 1689239462Sdim bool &Optimizable) const { 1690239462Sdim assert((MI->getOpcode() == ARM::MOVCCr || MI->getOpcode() == ARM::t2MOVCCr) && 1691239462Sdim "Unknown select instruction"); 1692239462Sdim // MOVCC operands: 1693239462Sdim // 0: Def. 1694239462Sdim // 1: True use. 1695239462Sdim // 2: False use. 1696239462Sdim // 3: Condition code. 1697239462Sdim // 4: CPSR use. 1698239462Sdim TrueOp = 1; 1699239462Sdim FalseOp = 2; 1700239462Sdim Cond.push_back(MI->getOperand(3)); 1701239462Sdim Cond.push_back(MI->getOperand(4)); 1702239462Sdim // We can always fold a def. 1703239462Sdim Optimizable = true; 1704239462Sdim return false; 1705239462Sdim} 1706239462Sdim 1707239462SdimMachineInstr *ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI, 1708239462Sdim bool PreferFalse) const { 1709239462Sdim assert((MI->getOpcode() == ARM::MOVCCr || MI->getOpcode() == ARM::t2MOVCCr) && 1710239462Sdim "Unknown select instruction"); 1711239462Sdim const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); 1712243830Sdim MachineInstr *DefMI = canFoldIntoMOVCC(MI->getOperand(2).getReg(), MRI, this); 1713243830Sdim bool Invert = !DefMI; 1714243830Sdim if (!DefMI) 1715243830Sdim DefMI = canFoldIntoMOVCC(MI->getOperand(1).getReg(), MRI, this); 1716243830Sdim if (!DefMI) 1717239462Sdim return 0; 1718239462Sdim 1719239462Sdim // Create a new predicated version of DefMI. 1720239462Sdim // Rfalse is the first use. 1721239462Sdim MachineInstrBuilder NewMI = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), 1722243830Sdim DefMI->getDesc(), 1723243830Sdim MI->getOperand(0).getReg()); 1724239462Sdim 1725239462Sdim // Copy all the DefMI operands, excluding its (null) predicate. 1726239462Sdim const MCInstrDesc &DefDesc = DefMI->getDesc(); 1727239462Sdim for (unsigned i = 1, e = DefDesc.getNumOperands(); 1728239462Sdim i != e && !DefDesc.OpInfo[i].isPredicate(); ++i) 1729239462Sdim NewMI.addOperand(DefMI->getOperand(i)); 1730239462Sdim 1731239462Sdim unsigned CondCode = MI->getOperand(3).getImm(); 1732239462Sdim if (Invert) 1733239462Sdim NewMI.addImm(ARMCC::getOppositeCondition(ARMCC::CondCodes(CondCode))); 1734239462Sdim else 1735239462Sdim NewMI.addImm(CondCode); 1736239462Sdim NewMI.addOperand(MI->getOperand(4)); 1737239462Sdim 1738239462Sdim // DefMI is not the -S version that sets CPSR, so add an optional %noreg. 1739239462Sdim if (NewMI->hasOptionalDef()) 1740239462Sdim AddDefaultCC(NewMI); 1741239462Sdim 1742243830Sdim // The output register value when the predicate is false is an implicit 1743243830Sdim // register operand tied to the first def. 1744243830Sdim // The tie makes the register allocator ensure the FalseReg is allocated the 1745243830Sdim // same register as operand 0. 1746243830Sdim MachineOperand FalseReg = MI->getOperand(Invert ? 2 : 1); 1747243830Sdim FalseReg.setImplicit(); 1748249423Sdim NewMI.addOperand(FalseReg); 1749243830Sdim NewMI->tieOperands(0, NewMI->getNumOperands() - 1); 1750243830Sdim 1751239462Sdim // The caller will erase MI, but not DefMI. 1752239462Sdim DefMI->eraseFromParent(); 1753239462Sdim return NewMI; 1754239462Sdim} 1755239462Sdim 1756226633Sdim/// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether the 1757226633Sdim/// instruction is encoded with an 'S' bit is determined by the optional CPSR 1758226633Sdim/// def operand. 1759226633Sdim/// 1760226633Sdim/// This will go away once we can teach tblgen how to set the optional CPSR def 1761226633Sdim/// operand itself. 1762226633Sdimstruct AddSubFlagsOpcodePair { 1763239462Sdim uint16_t PseudoOpc; 1764239462Sdim uint16_t MachineOpc; 1765226633Sdim}; 1766226633Sdim 1767239462Sdimstatic const AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = { 1768226633Sdim {ARM::ADDSri, ARM::ADDri}, 1769226633Sdim {ARM::ADDSrr, ARM::ADDrr}, 1770226633Sdim {ARM::ADDSrsi, ARM::ADDrsi}, 1771226633Sdim {ARM::ADDSrsr, ARM::ADDrsr}, 1772226633Sdim 1773226633Sdim {ARM::SUBSri, ARM::SUBri}, 1774226633Sdim {ARM::SUBSrr, ARM::SUBrr}, 1775226633Sdim {ARM::SUBSrsi, ARM::SUBrsi}, 1776226633Sdim {ARM::SUBSrsr, ARM::SUBrsr}, 1777226633Sdim 1778226633Sdim {ARM::RSBSri, ARM::RSBri}, 1779226633Sdim {ARM::RSBSrsi, ARM::RSBrsi}, 1780226633Sdim {ARM::RSBSrsr, ARM::RSBrsr}, 1781226633Sdim 1782226633Sdim {ARM::t2ADDSri, ARM::t2ADDri}, 1783226633Sdim {ARM::t2ADDSrr, ARM::t2ADDrr}, 1784226633Sdim {ARM::t2ADDSrs, ARM::t2ADDrs}, 1785226633Sdim 1786226633Sdim {ARM::t2SUBSri, ARM::t2SUBri}, 1787226633Sdim {ARM::t2SUBSrr, ARM::t2SUBrr}, 1788226633Sdim {ARM::t2SUBSrs, ARM::t2SUBrs}, 1789226633Sdim 1790226633Sdim {ARM::t2RSBSri, ARM::t2RSBri}, 1791226633Sdim {ARM::t2RSBSrs, ARM::t2RSBrs}, 1792226633Sdim}; 1793226633Sdim 1794226633Sdimunsigned llvm::convertAddSubFlagsOpcode(unsigned OldOpc) { 1795239462Sdim for (unsigned i = 0, e = array_lengthof(AddSubFlagsOpcodeMap); i != e; ++i) 1796239462Sdim if (OldOpc == AddSubFlagsOpcodeMap[i].PseudoOpc) 1797239462Sdim return AddSubFlagsOpcodeMap[i].MachineOpc; 1798226633Sdim return 0; 1799226633Sdim} 1800226633Sdim 1801198090Srdivackyvoid llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB, 1802198090Srdivacky MachineBasicBlock::iterator &MBBI, DebugLoc dl, 1803198090Srdivacky unsigned DestReg, unsigned BaseReg, int NumBytes, 1804198090Srdivacky ARMCC::CondCodes Pred, unsigned PredReg, 1805221345Sdim const ARMBaseInstrInfo &TII, unsigned MIFlags) { 1806198090Srdivacky bool isSub = NumBytes < 0; 1807198090Srdivacky if (isSub) NumBytes = -NumBytes; 1808198090Srdivacky 1809198090Srdivacky while (NumBytes) { 1810198090Srdivacky unsigned RotAmt = ARM_AM::getSOImmValRotate(NumBytes); 1811198090Srdivacky unsigned ThisVal = NumBytes & ARM_AM::rotr32(0xFF, RotAmt); 1812198090Srdivacky assert(ThisVal && "Didn't extract field correctly"); 1813198090Srdivacky 1814198090Srdivacky // We will handle these bits from offset, clear them. 1815198090Srdivacky NumBytes &= ~ThisVal; 1816198090Srdivacky 1817198090Srdivacky assert(ARM_AM::getSOImmVal(ThisVal) != -1 && "Bit extraction didn't work?"); 1818198090Srdivacky 1819198090Srdivacky // Build the new ADD / SUB. 1820198090Srdivacky unsigned Opc = isSub ? ARM::SUBri : ARM::ADDri; 1821198090Srdivacky BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg) 1822198090Srdivacky .addReg(BaseReg, RegState::Kill).addImm(ThisVal) 1823221345Sdim .addImm((unsigned)Pred).addReg(PredReg).addReg(0) 1824221345Sdim .setMIFlags(MIFlags); 1825198090Srdivacky BaseReg = DestReg; 1826198090Srdivacky } 1827198090Srdivacky} 1828198090Srdivacky 1829198090Srdivackybool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, 1830198090Srdivacky unsigned FrameReg, int &Offset, 1831198090Srdivacky const ARMBaseInstrInfo &TII) { 1832198090Srdivacky unsigned Opcode = MI.getOpcode(); 1833224145Sdim const MCInstrDesc &Desc = MI.getDesc(); 1834198090Srdivacky unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); 1835198090Srdivacky bool isSub = false; 1836198090Srdivacky 1837198090Srdivacky // Memory operands in inline assembly always use AddrMode2. 1838198090Srdivacky if (Opcode == ARM::INLINEASM) 1839198090Srdivacky AddrMode = ARMII::AddrMode2; 1840198090Srdivacky 1841198090Srdivacky if (Opcode == ARM::ADDri) { 1842198090Srdivacky Offset += MI.getOperand(FrameRegIdx+1).getImm(); 1843198090Srdivacky if (Offset == 0) { 1844198090Srdivacky // Turn it into a move. 1845198090Srdivacky MI.setDesc(TII.get(ARM::MOVr)); 1846198090Srdivacky MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); 1847198090Srdivacky MI.RemoveOperand(FrameRegIdx+1); 1848198090Srdivacky Offset = 0; 1849198090Srdivacky return true; 1850198090Srdivacky } else if (Offset < 0) { 1851198090Srdivacky Offset = -Offset; 1852198090Srdivacky isSub = true; 1853198090Srdivacky MI.setDesc(TII.get(ARM::SUBri)); 1854198090Srdivacky } 1855198090Srdivacky 1856198090Srdivacky // Common case: small offset, fits into instruction. 1857198090Srdivacky if (ARM_AM::getSOImmVal(Offset) != -1) { 1858198090Srdivacky // Replace the FrameIndex with sp / fp 1859198090Srdivacky MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); 1860198090Srdivacky MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset); 1861198090Srdivacky Offset = 0; 1862198090Srdivacky return true; 1863198090Srdivacky } 1864198090Srdivacky 1865198090Srdivacky // Otherwise, pull as much of the immedidate into this ADDri/SUBri 1866198090Srdivacky // as possible. 1867198090Srdivacky unsigned RotAmt = ARM_AM::getSOImmValRotate(Offset); 1868198090Srdivacky unsigned ThisImmVal = Offset & ARM_AM::rotr32(0xFF, RotAmt); 1869198090Srdivacky 1870198090Srdivacky // We will handle these bits from offset, clear them. 1871198090Srdivacky Offset &= ~ThisImmVal; 1872198090Srdivacky 1873198090Srdivacky // Get the properly encoded SOImmVal field. 1874198090Srdivacky assert(ARM_AM::getSOImmVal(ThisImmVal) != -1 && 1875198090Srdivacky "Bit extraction didn't work?"); 1876198090Srdivacky MI.getOperand(FrameRegIdx+1).ChangeToImmediate(ThisImmVal); 1877198090Srdivacky } else { 1878198090Srdivacky unsigned ImmIdx = 0; 1879198090Srdivacky int InstrOffs = 0; 1880198090Srdivacky unsigned NumBits = 0; 1881198090Srdivacky unsigned Scale = 1; 1882198090Srdivacky switch (AddrMode) { 1883218893Sdim case ARMII::AddrMode_i12: { 1884218893Sdim ImmIdx = FrameRegIdx + 1; 1885218893Sdim InstrOffs = MI.getOperand(ImmIdx).getImm(); 1886218893Sdim NumBits = 12; 1887218893Sdim break; 1888218893Sdim } 1889198090Srdivacky case ARMII::AddrMode2: { 1890198090Srdivacky ImmIdx = FrameRegIdx+2; 1891198090Srdivacky InstrOffs = ARM_AM::getAM2Offset(MI.getOperand(ImmIdx).getImm()); 1892198090Srdivacky if (ARM_AM::getAM2Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub) 1893198090Srdivacky InstrOffs *= -1; 1894198090Srdivacky NumBits = 12; 1895198090Srdivacky break; 1896198090Srdivacky } 1897198090Srdivacky case ARMII::AddrMode3: { 1898198090Srdivacky ImmIdx = FrameRegIdx+2; 1899198090Srdivacky InstrOffs = ARM_AM::getAM3Offset(MI.getOperand(ImmIdx).getImm()); 1900198090Srdivacky if (ARM_AM::getAM3Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub) 1901198090Srdivacky InstrOffs *= -1; 1902198090Srdivacky NumBits = 8; 1903198090Srdivacky break; 1904198090Srdivacky } 1905198090Srdivacky case ARMII::AddrMode4: 1906199481Srdivacky case ARMII::AddrMode6: 1907198090Srdivacky // Can't fold any offset even if it's zero. 1908198090Srdivacky return false; 1909198090Srdivacky case ARMII::AddrMode5: { 1910198090Srdivacky ImmIdx = FrameRegIdx+1; 1911198090Srdivacky InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm()); 1912198090Srdivacky if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub) 1913198090Srdivacky InstrOffs *= -1; 1914198090Srdivacky NumBits = 8; 1915198090Srdivacky Scale = 4; 1916198090Srdivacky break; 1917198090Srdivacky } 1918198090Srdivacky default: 1919198090Srdivacky llvm_unreachable("Unsupported addressing mode!"); 1920198090Srdivacky } 1921198090Srdivacky 1922198090Srdivacky Offset += InstrOffs * Scale; 1923198090Srdivacky assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!"); 1924198090Srdivacky if (Offset < 0) { 1925198090Srdivacky Offset = -Offset; 1926198090Srdivacky isSub = true; 1927198090Srdivacky } 1928198090Srdivacky 1929198090Srdivacky // Attempt to fold address comp. if opcode has offset bits 1930198090Srdivacky if (NumBits > 0) { 1931198090Srdivacky // Common case: small offset, fits into instruction. 1932198090Srdivacky MachineOperand &ImmOp = MI.getOperand(ImmIdx); 1933198090Srdivacky int ImmedOffset = Offset / Scale; 1934198090Srdivacky unsigned Mask = (1 << NumBits) - 1; 1935198090Srdivacky if ((unsigned)Offset <= Mask * Scale) { 1936198090Srdivacky // Replace the FrameIndex with sp 1937198090Srdivacky MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); 1938218893Sdim // FIXME: When addrmode2 goes away, this will simplify (like the 1939218893Sdim // T2 version), as the LDR.i12 versions don't need the encoding 1940218893Sdim // tricks for the offset value. 1941218893Sdim if (isSub) { 1942218893Sdim if (AddrMode == ARMII::AddrMode_i12) 1943218893Sdim ImmedOffset = -ImmedOffset; 1944218893Sdim else 1945218893Sdim ImmedOffset |= 1 << NumBits; 1946218893Sdim } 1947198090Srdivacky ImmOp.ChangeToImmediate(ImmedOffset); 1948198090Srdivacky Offset = 0; 1949198090Srdivacky return true; 1950198090Srdivacky } 1951198090Srdivacky 1952198090Srdivacky // Otherwise, it didn't fit. Pull in what we can to simplify the immed. 1953198090Srdivacky ImmedOffset = ImmedOffset & Mask; 1954218893Sdim if (isSub) { 1955218893Sdim if (AddrMode == ARMII::AddrMode_i12) 1956218893Sdim ImmedOffset = -ImmedOffset; 1957218893Sdim else 1958218893Sdim ImmedOffset |= 1 << NumBits; 1959218893Sdim } 1960198090Srdivacky ImmOp.ChangeToImmediate(ImmedOffset); 1961198090Srdivacky Offset &= ~(Mask*Scale); 1962198090Srdivacky } 1963198090Srdivacky } 1964198090Srdivacky 1965198090Srdivacky Offset = (isSub) ? -Offset : Offset; 1966198090Srdivacky return Offset == 0; 1967198090Srdivacky} 1968212904Sdim 1969239462Sdim/// analyzeCompare - For a comparison instruction, return the source registers 1970239462Sdim/// in SrcReg and SrcReg2 if having two register operands, and the value it 1971239462Sdim/// compares against in CmpValue. Return true if the comparison instruction 1972239462Sdim/// can be analyzed. 1973212904Sdimbool ARMBaseInstrInfo:: 1974239462SdimanalyzeCompare(const MachineInstr *MI, unsigned &SrcReg, unsigned &SrcReg2, 1975239462Sdim int &CmpMask, int &CmpValue) const { 1976212904Sdim switch (MI->getOpcode()) { 1977212904Sdim default: break; 1978212904Sdim case ARM::CMPri: 1979212904Sdim case ARM::t2CMPri: 1980212904Sdim SrcReg = MI->getOperand(0).getReg(); 1981239462Sdim SrcReg2 = 0; 1982218893Sdim CmpMask = ~0; 1983212904Sdim CmpValue = MI->getOperand(1).getImm(); 1984212904Sdim return true; 1985239462Sdim case ARM::CMPrr: 1986239462Sdim case ARM::t2CMPrr: 1987239462Sdim SrcReg = MI->getOperand(0).getReg(); 1988239462Sdim SrcReg2 = MI->getOperand(1).getReg(); 1989239462Sdim CmpMask = ~0; 1990239462Sdim CmpValue = 0; 1991239462Sdim return true; 1992218893Sdim case ARM::TSTri: 1993218893Sdim case ARM::t2TSTri: 1994218893Sdim SrcReg = MI->getOperand(0).getReg(); 1995239462Sdim SrcReg2 = 0; 1996218893Sdim CmpMask = MI->getOperand(1).getImm(); 1997218893Sdim CmpValue = 0; 1998218893Sdim return true; 1999212904Sdim } 2000212904Sdim 2001212904Sdim return false; 2002212904Sdim} 2003212904Sdim 2004218893Sdim/// isSuitableForMask - Identify a suitable 'and' instruction that 2005218893Sdim/// operates on the given source register and applies the same mask 2006218893Sdim/// as a 'tst' instruction. Provide a limited look-through for copies. 2007218893Sdim/// When successful, MI will hold the found instruction. 2008218893Sdimstatic bool isSuitableForMask(MachineInstr *&MI, unsigned SrcReg, 2009218893Sdim int CmpMask, bool CommonUse) { 2010218893Sdim switch (MI->getOpcode()) { 2011218893Sdim case ARM::ANDri: 2012218893Sdim case ARM::t2ANDri: 2013218893Sdim if (CmpMask != MI->getOperand(2).getImm()) 2014218893Sdim return false; 2015218893Sdim if (SrcReg == MI->getOperand(CommonUse ? 1 : 0).getReg()) 2016218893Sdim return true; 2017218893Sdim break; 2018218893Sdim case ARM::COPY: { 2019218893Sdim // Walk down one instruction which is potentially an 'and'. 2020218893Sdim const MachineInstr &Copy = *MI; 2021218893Sdim MachineBasicBlock::iterator AND( 2022218893Sdim llvm::next(MachineBasicBlock::iterator(MI))); 2023218893Sdim if (AND == MI->getParent()->end()) return false; 2024218893Sdim MI = AND; 2025218893Sdim return isSuitableForMask(MI, Copy.getOperand(0).getReg(), 2026218893Sdim CmpMask, true); 2027218893Sdim } 2028218893Sdim } 2029218893Sdim 2030218893Sdim return false; 2031218893Sdim} 2032218893Sdim 2033239462Sdim/// getSwappedCondition - assume the flags are set by MI(a,b), return 2034239462Sdim/// the condition code if we modify the instructions such that flags are 2035239462Sdim/// set by MI(b,a). 2036239462Sdiminline static ARMCC::CondCodes getSwappedCondition(ARMCC::CondCodes CC) { 2037239462Sdim switch (CC) { 2038239462Sdim default: return ARMCC::AL; 2039239462Sdim case ARMCC::EQ: return ARMCC::EQ; 2040239462Sdim case ARMCC::NE: return ARMCC::NE; 2041239462Sdim case ARMCC::HS: return ARMCC::LS; 2042239462Sdim case ARMCC::LO: return ARMCC::HI; 2043239462Sdim case ARMCC::HI: return ARMCC::LO; 2044239462Sdim case ARMCC::LS: return ARMCC::HS; 2045239462Sdim case ARMCC::GE: return ARMCC::LE; 2046239462Sdim case ARMCC::LT: return ARMCC::GT; 2047239462Sdim case ARMCC::GT: return ARMCC::LT; 2048239462Sdim case ARMCC::LE: return ARMCC::GE; 2049239462Sdim } 2050239462Sdim} 2051218893Sdim 2052239462Sdim/// isRedundantFlagInstr - check whether the first instruction, whose only 2053239462Sdim/// purpose is to update flags, can be made redundant. 2054239462Sdim/// CMPrr can be made redundant by SUBrr if the operands are the same. 2055239462Sdim/// CMPri can be made redundant by SUBri if the operands are the same. 2056239462Sdim/// This function can be extended later on. 2057239462Sdiminline static bool isRedundantFlagInstr(MachineInstr *CmpI, unsigned SrcReg, 2058239462Sdim unsigned SrcReg2, int ImmValue, 2059239462Sdim MachineInstr *OI) { 2060239462Sdim if ((CmpI->getOpcode() == ARM::CMPrr || 2061239462Sdim CmpI->getOpcode() == ARM::t2CMPrr) && 2062239462Sdim (OI->getOpcode() == ARM::SUBrr || 2063239462Sdim OI->getOpcode() == ARM::t2SUBrr) && 2064239462Sdim ((OI->getOperand(1).getReg() == SrcReg && 2065239462Sdim OI->getOperand(2).getReg() == SrcReg2) || 2066239462Sdim (OI->getOperand(1).getReg() == SrcReg2 && 2067239462Sdim OI->getOperand(2).getReg() == SrcReg))) 2068239462Sdim return true; 2069218893Sdim 2070239462Sdim if ((CmpI->getOpcode() == ARM::CMPri || 2071239462Sdim CmpI->getOpcode() == ARM::t2CMPri) && 2072239462Sdim (OI->getOpcode() == ARM::SUBri || 2073239462Sdim OI->getOpcode() == ARM::t2SUBri) && 2074239462Sdim OI->getOperand(1).getReg() == SrcReg && 2075239462Sdim OI->getOperand(2).getImm() == ImmValue) 2076239462Sdim return true; 2077239462Sdim return false; 2078239462Sdim} 2079218893Sdim 2080239462Sdim/// optimizeCompareInstr - Convert the instruction supplying the argument to the 2081239462Sdim/// comparison into one that sets the zero bit in the flags register; 2082239462Sdim/// Remove a redundant Compare instruction if an earlier instruction can set the 2083239462Sdim/// flags in the same way as Compare. 2084239462Sdim/// E.g. SUBrr(r1,r2) and CMPrr(r1,r2). We also handle the case where two 2085239462Sdim/// operands are swapped: SUBrr(r1,r2) and CMPrr(r2,r1), by updating the 2086239462Sdim/// condition code of instructions which use the flags. 2087239462Sdimbool ARMBaseInstrInfo:: 2088239462SdimoptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, 2089239462Sdim int CmpMask, int CmpValue, 2090239462Sdim const MachineRegisterInfo *MRI) const { 2091239462Sdim // Get the unique definition of SrcReg. 2092239462Sdim MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg); 2093239462Sdim if (!MI) return false; 2094239462Sdim 2095218893Sdim // Masked compares sometimes use the same register as the corresponding 'and'. 2096218893Sdim if (CmpMask != ~0) { 2097243830Sdim if (!isSuitableForMask(MI, SrcReg, CmpMask, false) || isPredicated(MI)) { 2098218893Sdim MI = 0; 2099218893Sdim for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(SrcReg), 2100218893Sdim UE = MRI->use_end(); UI != UE; ++UI) { 2101218893Sdim if (UI->getParent() != CmpInstr->getParent()) continue; 2102218893Sdim MachineInstr *PotentialAND = &*UI; 2103243830Sdim if (!isSuitableForMask(PotentialAND, SrcReg, CmpMask, true) || 2104243830Sdim isPredicated(PotentialAND)) 2105218893Sdim continue; 2106218893Sdim MI = PotentialAND; 2107218893Sdim break; 2108218893Sdim } 2109218893Sdim if (!MI) return false; 2110218893Sdim } 2111218893Sdim } 2112218893Sdim 2113239462Sdim // Get ready to iterate backward from CmpInstr. 2114239462Sdim MachineBasicBlock::iterator I = CmpInstr, E = MI, 2115239462Sdim B = CmpInstr->getParent()->begin(); 2116212904Sdim 2117218893Sdim // Early exit if CmpInstr is at the beginning of the BB. 2118218893Sdim if (I == B) return false; 2119218893Sdim 2120239462Sdim // There are two possible candidates which can be changed to set CPSR: 2121239462Sdim // One is MI, the other is a SUB instruction. 2122239462Sdim // For CMPrr(r1,r2), we are looking for SUB(r1,r2) or SUB(r2,r1). 2123239462Sdim // For CMPri(r1, CmpValue), we are looking for SUBri(r1, CmpValue). 2124239462Sdim MachineInstr *Sub = NULL; 2125239462Sdim if (SrcReg2 != 0) 2126239462Sdim // MI is not a candidate for CMPrr. 2127239462Sdim MI = NULL; 2128239462Sdim else if (MI->getParent() != CmpInstr->getParent() || CmpValue != 0) { 2129239462Sdim // Conservatively refuse to convert an instruction which isn't in the same 2130239462Sdim // BB as the comparison. 2131239462Sdim // For CMPri, we need to check Sub, thus we can't return here. 2132239462Sdim if (CmpInstr->getOpcode() == ARM::CMPri || 2133239462Sdim CmpInstr->getOpcode() == ARM::t2CMPri) 2134239462Sdim MI = NULL; 2135239462Sdim else 2136239462Sdim return false; 2137239462Sdim } 2138239462Sdim 2139239462Sdim // Check that CPSR isn't set between the comparison instruction and the one we 2140239462Sdim // want to change. At the same time, search for Sub. 2141239462Sdim const TargetRegisterInfo *TRI = &getRegisterInfo(); 2142212904Sdim --I; 2143212904Sdim for (; I != E; --I) { 2144212904Sdim const MachineInstr &Instr = *I; 2145212904Sdim 2146239462Sdim if (Instr.modifiesRegister(ARM::CPSR, TRI) || 2147239462Sdim Instr.readsRegister(ARM::CPSR, TRI)) 2148218893Sdim // This instruction modifies or uses CPSR after the one we want to 2149218893Sdim // change. We can't do this transformation. 2150239462Sdim return false; 2151239462Sdim 2152239462Sdim // Check whether CmpInstr can be made redundant by the current instruction. 2153239462Sdim if (isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2, CmpValue, &*I)) { 2154239462Sdim Sub = &*I; 2155239462Sdim break; 2156212904Sdim } 2157213534Sdim 2158213534Sdim if (I == B) 2159213534Sdim // The 'and' is below the comparison instruction. 2160213534Sdim return false; 2161212904Sdim } 2162212904Sdim 2163239462Sdim // Return false if no candidates exist. 2164239462Sdim if (!MI && !Sub) 2165239462Sdim return false; 2166239462Sdim 2167239462Sdim // The single candidate is called MI. 2168239462Sdim if (!MI) MI = Sub; 2169239462Sdim 2170243830Sdim // We can't use a predicated instruction - it doesn't always write the flags. 2171243830Sdim if (isPredicated(MI)) 2172243830Sdim return false; 2173243830Sdim 2174212904Sdim switch (MI->getOpcode()) { 2175212904Sdim default: break; 2176221345Sdim case ARM::RSBrr: 2177221345Sdim case ARM::RSBri: 2178221345Sdim case ARM::RSCrr: 2179221345Sdim case ARM::RSCri: 2180221345Sdim case ARM::ADDrr: 2181212904Sdim case ARM::ADDri: 2182221345Sdim case ARM::ADCrr: 2183221345Sdim case ARM::ADCri: 2184221345Sdim case ARM::SUBrr: 2185212904Sdim case ARM::SUBri: 2186221345Sdim case ARM::SBCrr: 2187221345Sdim case ARM::SBCri: 2188221345Sdim case ARM::t2RSBri: 2189221345Sdim case ARM::t2ADDrr: 2190212904Sdim case ARM::t2ADDri: 2191221345Sdim case ARM::t2ADCrr: 2192221345Sdim case ARM::t2ADCri: 2193221345Sdim case ARM::t2SUBrr: 2194212904Sdim case ARM::t2SUBri: 2195221345Sdim case ARM::t2SBCrr: 2196221345Sdim case ARM::t2SBCri: 2197221345Sdim case ARM::ANDrr: 2198221345Sdim case ARM::ANDri: 2199221345Sdim case ARM::t2ANDrr: 2200221345Sdim case ARM::t2ANDri: 2201221345Sdim case ARM::ORRrr: 2202221345Sdim case ARM::ORRri: 2203221345Sdim case ARM::t2ORRrr: 2204221345Sdim case ARM::t2ORRri: 2205221345Sdim case ARM::EORrr: 2206221345Sdim case ARM::EORri: 2207221345Sdim case ARM::t2EORrr: 2208221345Sdim case ARM::t2EORri: { 2209239462Sdim // Scan forward for the use of CPSR 2210239462Sdim // When checking against MI: if it's a conditional code requires 2211239462Sdim // checking of V bit, then this is not safe to do. 2212239462Sdim // It is safe to remove CmpInstr if CPSR is redefined or killed. 2213239462Sdim // If we are done with the basic block, we need to check whether CPSR is 2214239462Sdim // live-out. 2215239462Sdim SmallVector<std::pair<MachineOperand*, ARMCC::CondCodes>, 4> 2216239462Sdim OperandsToUpdate; 2217221345Sdim bool isSafe = false; 2218221345Sdim I = CmpInstr; 2219239462Sdim E = CmpInstr->getParent()->end(); 2220221345Sdim while (!isSafe && ++I != E) { 2221221345Sdim const MachineInstr &Instr = *I; 2222221345Sdim for (unsigned IO = 0, EO = Instr.getNumOperands(); 2223221345Sdim !isSafe && IO != EO; ++IO) { 2224221345Sdim const MachineOperand &MO = Instr.getOperand(IO); 2225234353Sdim if (MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) { 2226234353Sdim isSafe = true; 2227234353Sdim break; 2228234353Sdim } 2229221345Sdim if (!MO.isReg() || MO.getReg() != ARM::CPSR) 2230221345Sdim continue; 2231221345Sdim if (MO.isDef()) { 2232221345Sdim isSafe = true; 2233221345Sdim break; 2234221345Sdim } 2235221345Sdim // Condition code is after the operand before CPSR. 2236221345Sdim ARMCC::CondCodes CC = (ARMCC::CondCodes)Instr.getOperand(IO-1).getImm(); 2237239462Sdim if (Sub) { 2238239462Sdim ARMCC::CondCodes NewCC = getSwappedCondition(CC); 2239239462Sdim if (NewCC == ARMCC::AL) 2240239462Sdim return false; 2241239462Sdim // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based 2242239462Sdim // on CMP needs to be updated to be based on SUB. 2243239462Sdim // Push the condition code operands to OperandsToUpdate. 2244239462Sdim // If it is safe to remove CmpInstr, the condition code of these 2245239462Sdim // operands will be modified. 2246239462Sdim if (SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 && 2247239462Sdim Sub->getOperand(2).getReg() == SrcReg) 2248239462Sdim OperandsToUpdate.push_back(std::make_pair(&((*I).getOperand(IO-1)), 2249239462Sdim NewCC)); 2250221345Sdim } 2251239462Sdim else 2252239462Sdim switch (CC) { 2253239462Sdim default: 2254239462Sdim // CPSR can be used multiple times, we should continue. 2255239462Sdim break; 2256239462Sdim case ARMCC::VS: 2257239462Sdim case ARMCC::VC: 2258239462Sdim case ARMCC::GE: 2259239462Sdim case ARMCC::LT: 2260239462Sdim case ARMCC::GT: 2261239462Sdim case ARMCC::LE: 2262239462Sdim return false; 2263239462Sdim } 2264221345Sdim } 2265221345Sdim } 2266221345Sdim 2267239462Sdim // If CPSR is not killed nor re-defined, we should check whether it is 2268239462Sdim // live-out. If it is live-out, do not optimize. 2269239462Sdim if (!isSafe) { 2270239462Sdim MachineBasicBlock *MBB = CmpInstr->getParent(); 2271239462Sdim for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), 2272239462Sdim SE = MBB->succ_end(); SI != SE; ++SI) 2273239462Sdim if ((*SI)->isLiveIn(ARM::CPSR)) 2274239462Sdim return false; 2275239462Sdim } 2276221345Sdim 2277218893Sdim // Toggle the optional operand to CPSR. 2278218893Sdim MI->getOperand(5).setReg(ARM::CPSR); 2279218893Sdim MI->getOperand(5).setIsDef(true); 2280243830Sdim assert(!isPredicated(MI) && "Can't use flags from predicated instruction"); 2281212904Sdim CmpInstr->eraseFromParent(); 2282239462Sdim 2283239462Sdim // Modify the condition code of operands in OperandsToUpdate. 2284239462Sdim // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to 2285239462Sdim // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc. 2286239462Sdim for (unsigned i = 0, e = OperandsToUpdate.size(); i < e; i++) 2287239462Sdim OperandsToUpdate[i].first->setImm(OperandsToUpdate[i].second); 2288212904Sdim return true; 2289212904Sdim } 2290221345Sdim } 2291212904Sdim 2292212904Sdim return false; 2293212904Sdim} 2294218893Sdim 2295218893Sdimbool ARMBaseInstrInfo::FoldImmediate(MachineInstr *UseMI, 2296218893Sdim MachineInstr *DefMI, unsigned Reg, 2297218893Sdim MachineRegisterInfo *MRI) const { 2298218893Sdim // Fold large immediates into add, sub, or, xor. 2299218893Sdim unsigned DefOpc = DefMI->getOpcode(); 2300218893Sdim if (DefOpc != ARM::t2MOVi32imm && DefOpc != ARM::MOVi32imm) 2301218893Sdim return false; 2302218893Sdim if (!DefMI->getOperand(1).isImm()) 2303218893Sdim // Could be t2MOVi32imm <ga:xx> 2304218893Sdim return false; 2305218893Sdim 2306218893Sdim if (!MRI->hasOneNonDBGUse(Reg)) 2307218893Sdim return false; 2308218893Sdim 2309234353Sdim const MCInstrDesc &DefMCID = DefMI->getDesc(); 2310234353Sdim if (DefMCID.hasOptionalDef()) { 2311234353Sdim unsigned NumOps = DefMCID.getNumOperands(); 2312234353Sdim const MachineOperand &MO = DefMI->getOperand(NumOps-1); 2313234353Sdim if (MO.getReg() == ARM::CPSR && !MO.isDead()) 2314234353Sdim // If DefMI defines CPSR and it is not dead, it's obviously not safe 2315234353Sdim // to delete DefMI. 2316234353Sdim return false; 2317234353Sdim } 2318234353Sdim 2319234353Sdim const MCInstrDesc &UseMCID = UseMI->getDesc(); 2320234353Sdim if (UseMCID.hasOptionalDef()) { 2321234353Sdim unsigned NumOps = UseMCID.getNumOperands(); 2322234353Sdim if (UseMI->getOperand(NumOps-1).getReg() == ARM::CPSR) 2323234353Sdim // If the instruction sets the flag, do not attempt this optimization 2324234353Sdim // since it may change the semantics of the code. 2325234353Sdim return false; 2326234353Sdim } 2327234353Sdim 2328218893Sdim unsigned UseOpc = UseMI->getOpcode(); 2329218893Sdim unsigned NewUseOpc = 0; 2330218893Sdim uint32_t ImmVal = (uint32_t)DefMI->getOperand(1).getImm(); 2331218893Sdim uint32_t SOImmValV1 = 0, SOImmValV2 = 0; 2332218893Sdim bool Commute = false; 2333218893Sdim switch (UseOpc) { 2334218893Sdim default: return false; 2335218893Sdim case ARM::SUBrr: 2336218893Sdim case ARM::ADDrr: 2337218893Sdim case ARM::ORRrr: 2338218893Sdim case ARM::EORrr: 2339218893Sdim case ARM::t2SUBrr: 2340218893Sdim case ARM::t2ADDrr: 2341218893Sdim case ARM::t2ORRrr: 2342218893Sdim case ARM::t2EORrr: { 2343218893Sdim Commute = UseMI->getOperand(2).getReg() != Reg; 2344218893Sdim switch (UseOpc) { 2345218893Sdim default: break; 2346218893Sdim case ARM::SUBrr: { 2347218893Sdim if (Commute) 2348218893Sdim return false; 2349218893Sdim ImmVal = -ImmVal; 2350218893Sdim NewUseOpc = ARM::SUBri; 2351218893Sdim // Fallthrough 2352218893Sdim } 2353218893Sdim case ARM::ADDrr: 2354218893Sdim case ARM::ORRrr: 2355218893Sdim case ARM::EORrr: { 2356218893Sdim if (!ARM_AM::isSOImmTwoPartVal(ImmVal)) 2357218893Sdim return false; 2358218893Sdim SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal); 2359218893Sdim SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal); 2360218893Sdim switch (UseOpc) { 2361218893Sdim default: break; 2362218893Sdim case ARM::ADDrr: NewUseOpc = ARM::ADDri; break; 2363218893Sdim case ARM::ORRrr: NewUseOpc = ARM::ORRri; break; 2364218893Sdim case ARM::EORrr: NewUseOpc = ARM::EORri; break; 2365218893Sdim } 2366218893Sdim break; 2367218893Sdim } 2368218893Sdim case ARM::t2SUBrr: { 2369218893Sdim if (Commute) 2370218893Sdim return false; 2371218893Sdim ImmVal = -ImmVal; 2372218893Sdim NewUseOpc = ARM::t2SUBri; 2373218893Sdim // Fallthrough 2374218893Sdim } 2375218893Sdim case ARM::t2ADDrr: 2376218893Sdim case ARM::t2ORRrr: 2377218893Sdim case ARM::t2EORrr: { 2378218893Sdim if (!ARM_AM::isT2SOImmTwoPartVal(ImmVal)) 2379218893Sdim return false; 2380218893Sdim SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal); 2381218893Sdim SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal); 2382218893Sdim switch (UseOpc) { 2383218893Sdim default: break; 2384218893Sdim case ARM::t2ADDrr: NewUseOpc = ARM::t2ADDri; break; 2385218893Sdim case ARM::t2ORRrr: NewUseOpc = ARM::t2ORRri; break; 2386218893Sdim case ARM::t2EORrr: NewUseOpc = ARM::t2EORri; break; 2387218893Sdim } 2388218893Sdim break; 2389218893Sdim } 2390218893Sdim } 2391218893Sdim } 2392218893Sdim } 2393218893Sdim 2394218893Sdim unsigned OpIdx = Commute ? 2 : 1; 2395218893Sdim unsigned Reg1 = UseMI->getOperand(OpIdx).getReg(); 2396218893Sdim bool isKill = UseMI->getOperand(OpIdx).isKill(); 2397218893Sdim unsigned NewReg = MRI->createVirtualRegister(MRI->getRegClass(Reg)); 2398218893Sdim AddDefaultCC(AddDefaultPred(BuildMI(*UseMI->getParent(), 2399234353Sdim UseMI, UseMI->getDebugLoc(), 2400218893Sdim get(NewUseOpc), NewReg) 2401218893Sdim .addReg(Reg1, getKillRegState(isKill)) 2402218893Sdim .addImm(SOImmValV1))); 2403218893Sdim UseMI->setDesc(get(NewUseOpc)); 2404218893Sdim UseMI->getOperand(1).setReg(NewReg); 2405218893Sdim UseMI->getOperand(1).setIsKill(); 2406218893Sdim UseMI->getOperand(2).ChangeToImmediate(SOImmValV2); 2407218893Sdim DefMI->eraseFromParent(); 2408218893Sdim return true; 2409218893Sdim} 2410218893Sdim 2411243830Sdimstatic unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData, 2412243830Sdim const MachineInstr *MI) { 2413243830Sdim switch (MI->getOpcode()) { 2414243830Sdim default: { 2415243830Sdim const MCInstrDesc &Desc = MI->getDesc(); 2416243830Sdim int UOps = ItinData->getNumMicroOps(Desc.getSchedClass()); 2417243830Sdim assert(UOps >= 0 && "bad # UOps"); 2418243830Sdim return UOps; 2419243830Sdim } 2420243830Sdim 2421243830Sdim case ARM::LDRrs: 2422243830Sdim case ARM::LDRBrs: 2423243830Sdim case ARM::STRrs: 2424243830Sdim case ARM::STRBrs: { 2425243830Sdim unsigned ShOpVal = MI->getOperand(3).getImm(); 2426243830Sdim bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; 2427243830Sdim unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); 2428243830Sdim if (!isSub && 2429243830Sdim (ShImm == 0 || 2430243830Sdim ((ShImm == 1 || ShImm == 2 || ShImm == 3) && 2431243830Sdim ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) 2432243830Sdim return 1; 2433243830Sdim return 2; 2434243830Sdim } 2435243830Sdim 2436243830Sdim case ARM::LDRH: 2437243830Sdim case ARM::STRH: { 2438243830Sdim if (!MI->getOperand(2).getReg()) 2439243830Sdim return 1; 2440243830Sdim 2441243830Sdim unsigned ShOpVal = MI->getOperand(3).getImm(); 2442243830Sdim bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; 2443243830Sdim unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); 2444243830Sdim if (!isSub && 2445243830Sdim (ShImm == 0 || 2446243830Sdim ((ShImm == 1 || ShImm == 2 || ShImm == 3) && 2447243830Sdim ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) 2448243830Sdim return 1; 2449243830Sdim return 2; 2450243830Sdim } 2451243830Sdim 2452243830Sdim case ARM::LDRSB: 2453243830Sdim case ARM::LDRSH: 2454243830Sdim return (ARM_AM::getAM3Op(MI->getOperand(3).getImm()) == ARM_AM::sub) ? 3:2; 2455243830Sdim 2456243830Sdim case ARM::LDRSB_POST: 2457243830Sdim case ARM::LDRSH_POST: { 2458243830Sdim unsigned Rt = MI->getOperand(0).getReg(); 2459243830Sdim unsigned Rm = MI->getOperand(3).getReg(); 2460243830Sdim return (Rt == Rm) ? 4 : 3; 2461243830Sdim } 2462243830Sdim 2463243830Sdim case ARM::LDR_PRE_REG: 2464243830Sdim case ARM::LDRB_PRE_REG: { 2465243830Sdim unsigned Rt = MI->getOperand(0).getReg(); 2466243830Sdim unsigned Rm = MI->getOperand(3).getReg(); 2467243830Sdim if (Rt == Rm) 2468243830Sdim return 3; 2469243830Sdim unsigned ShOpVal = MI->getOperand(4).getImm(); 2470243830Sdim bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; 2471243830Sdim unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); 2472243830Sdim if (!isSub && 2473243830Sdim (ShImm == 0 || 2474243830Sdim ((ShImm == 1 || ShImm == 2 || ShImm == 3) && 2475243830Sdim ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) 2476243830Sdim return 2; 2477243830Sdim return 3; 2478243830Sdim } 2479243830Sdim 2480243830Sdim case ARM::STR_PRE_REG: 2481243830Sdim case ARM::STRB_PRE_REG: { 2482243830Sdim unsigned ShOpVal = MI->getOperand(4).getImm(); 2483243830Sdim bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; 2484243830Sdim unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); 2485243830Sdim if (!isSub && 2486243830Sdim (ShImm == 0 || 2487243830Sdim ((ShImm == 1 || ShImm == 2 || ShImm == 3) && 2488243830Sdim ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) 2489243830Sdim return 2; 2490243830Sdim return 3; 2491243830Sdim } 2492243830Sdim 2493243830Sdim case ARM::LDRH_PRE: 2494243830Sdim case ARM::STRH_PRE: { 2495243830Sdim unsigned Rt = MI->getOperand(0).getReg(); 2496243830Sdim unsigned Rm = MI->getOperand(3).getReg(); 2497243830Sdim if (!Rm) 2498243830Sdim return 2; 2499243830Sdim if (Rt == Rm) 2500243830Sdim return 3; 2501243830Sdim return (ARM_AM::getAM3Op(MI->getOperand(4).getImm()) == ARM_AM::sub) 2502243830Sdim ? 3 : 2; 2503243830Sdim } 2504243830Sdim 2505243830Sdim case ARM::LDR_POST_REG: 2506243830Sdim case ARM::LDRB_POST_REG: 2507243830Sdim case ARM::LDRH_POST: { 2508243830Sdim unsigned Rt = MI->getOperand(0).getReg(); 2509243830Sdim unsigned Rm = MI->getOperand(3).getReg(); 2510243830Sdim return (Rt == Rm) ? 3 : 2; 2511243830Sdim } 2512243830Sdim 2513243830Sdim case ARM::LDR_PRE_IMM: 2514243830Sdim case ARM::LDRB_PRE_IMM: 2515243830Sdim case ARM::LDR_POST_IMM: 2516243830Sdim case ARM::LDRB_POST_IMM: 2517243830Sdim case ARM::STRB_POST_IMM: 2518243830Sdim case ARM::STRB_POST_REG: 2519243830Sdim case ARM::STRB_PRE_IMM: 2520243830Sdim case ARM::STRH_POST: 2521243830Sdim case ARM::STR_POST_IMM: 2522243830Sdim case ARM::STR_POST_REG: 2523243830Sdim case ARM::STR_PRE_IMM: 2524243830Sdim return 2; 2525243830Sdim 2526243830Sdim case ARM::LDRSB_PRE: 2527243830Sdim case ARM::LDRSH_PRE: { 2528243830Sdim unsigned Rm = MI->getOperand(3).getReg(); 2529243830Sdim if (Rm == 0) 2530243830Sdim return 3; 2531243830Sdim unsigned Rt = MI->getOperand(0).getReg(); 2532243830Sdim if (Rt == Rm) 2533243830Sdim return 4; 2534243830Sdim unsigned ShOpVal = MI->getOperand(4).getImm(); 2535243830Sdim bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; 2536243830Sdim unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); 2537243830Sdim if (!isSub && 2538243830Sdim (ShImm == 0 || 2539243830Sdim ((ShImm == 1 || ShImm == 2 || ShImm == 3) && 2540243830Sdim ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) 2541243830Sdim return 3; 2542243830Sdim return 4; 2543243830Sdim } 2544243830Sdim 2545243830Sdim case ARM::LDRD: { 2546243830Sdim unsigned Rt = MI->getOperand(0).getReg(); 2547243830Sdim unsigned Rn = MI->getOperand(2).getReg(); 2548243830Sdim unsigned Rm = MI->getOperand(3).getReg(); 2549243830Sdim if (Rm) 2550243830Sdim return (ARM_AM::getAM3Op(MI->getOperand(4).getImm()) == ARM_AM::sub) ?4:3; 2551243830Sdim return (Rt == Rn) ? 3 : 2; 2552243830Sdim } 2553243830Sdim 2554243830Sdim case ARM::STRD: { 2555243830Sdim unsigned Rm = MI->getOperand(3).getReg(); 2556243830Sdim if (Rm) 2557243830Sdim return (ARM_AM::getAM3Op(MI->getOperand(4).getImm()) == ARM_AM::sub) ?4:3; 2558243830Sdim return 2; 2559243830Sdim } 2560243830Sdim 2561243830Sdim case ARM::LDRD_POST: 2562243830Sdim case ARM::t2LDRD_POST: 2563243830Sdim return 3; 2564243830Sdim 2565243830Sdim case ARM::STRD_POST: 2566243830Sdim case ARM::t2STRD_POST: 2567243830Sdim return 4; 2568243830Sdim 2569243830Sdim case ARM::LDRD_PRE: { 2570243830Sdim unsigned Rt = MI->getOperand(0).getReg(); 2571243830Sdim unsigned Rn = MI->getOperand(3).getReg(); 2572243830Sdim unsigned Rm = MI->getOperand(4).getReg(); 2573243830Sdim if (Rm) 2574243830Sdim return (ARM_AM::getAM3Op(MI->getOperand(5).getImm()) == ARM_AM::sub) ?5:4; 2575243830Sdim return (Rt == Rn) ? 4 : 3; 2576243830Sdim } 2577243830Sdim 2578243830Sdim case ARM::t2LDRD_PRE: { 2579243830Sdim unsigned Rt = MI->getOperand(0).getReg(); 2580243830Sdim unsigned Rn = MI->getOperand(3).getReg(); 2581243830Sdim return (Rt == Rn) ? 4 : 3; 2582243830Sdim } 2583243830Sdim 2584243830Sdim case ARM::STRD_PRE: { 2585243830Sdim unsigned Rm = MI->getOperand(4).getReg(); 2586243830Sdim if (Rm) 2587243830Sdim return (ARM_AM::getAM3Op(MI->getOperand(5).getImm()) == ARM_AM::sub) ?5:4; 2588243830Sdim return 3; 2589243830Sdim } 2590243830Sdim 2591243830Sdim case ARM::t2STRD_PRE: 2592243830Sdim return 3; 2593243830Sdim 2594243830Sdim case ARM::t2LDR_POST: 2595243830Sdim case ARM::t2LDRB_POST: 2596243830Sdim case ARM::t2LDRB_PRE: 2597243830Sdim case ARM::t2LDRSBi12: 2598243830Sdim case ARM::t2LDRSBi8: 2599243830Sdim case ARM::t2LDRSBpci: 2600243830Sdim case ARM::t2LDRSBs: 2601243830Sdim case ARM::t2LDRH_POST: 2602243830Sdim case ARM::t2LDRH_PRE: 2603243830Sdim case ARM::t2LDRSBT: 2604243830Sdim case ARM::t2LDRSB_POST: 2605243830Sdim case ARM::t2LDRSB_PRE: 2606243830Sdim case ARM::t2LDRSH_POST: 2607243830Sdim case ARM::t2LDRSH_PRE: 2608243830Sdim case ARM::t2LDRSHi12: 2609243830Sdim case ARM::t2LDRSHi8: 2610243830Sdim case ARM::t2LDRSHpci: 2611243830Sdim case ARM::t2LDRSHs: 2612243830Sdim return 2; 2613243830Sdim 2614243830Sdim case ARM::t2LDRDi8: { 2615243830Sdim unsigned Rt = MI->getOperand(0).getReg(); 2616243830Sdim unsigned Rn = MI->getOperand(2).getReg(); 2617243830Sdim return (Rt == Rn) ? 3 : 2; 2618243830Sdim } 2619243830Sdim 2620243830Sdim case ARM::t2STRB_POST: 2621243830Sdim case ARM::t2STRB_PRE: 2622243830Sdim case ARM::t2STRBs: 2623243830Sdim case ARM::t2STRDi8: 2624243830Sdim case ARM::t2STRH_POST: 2625243830Sdim case ARM::t2STRH_PRE: 2626243830Sdim case ARM::t2STRHs: 2627243830Sdim case ARM::t2STR_POST: 2628243830Sdim case ARM::t2STR_PRE: 2629243830Sdim case ARM::t2STRs: 2630243830Sdim return 2; 2631243830Sdim } 2632243830Sdim} 2633243830Sdim 2634243830Sdim// Return the number of 32-bit words loaded by LDM or stored by STM. If this 2635243830Sdim// can't be easily determined return 0 (missing MachineMemOperand). 2636243830Sdim// 2637243830Sdim// FIXME: The current MachineInstr design does not support relying on machine 2638243830Sdim// mem operands to determine the width of a memory access. Instead, we expect 2639243830Sdim// the target to provide this information based on the instruction opcode and 2640243830Sdim// operands. However, using MachineMemOperand is a the best solution now for 2641243830Sdim// two reasons: 2642243830Sdim// 2643243830Sdim// 1) getNumMicroOps tries to infer LDM memory width from the total number of MI 2644243830Sdim// operands. This is much more dangerous than using the MachineMemOperand 2645243830Sdim// sizes because CodeGen passes can insert/remove optional machine operands. In 2646243830Sdim// fact, it's totally incorrect for preRA passes and appears to be wrong for 2647243830Sdim// postRA passes as well. 2648243830Sdim// 2649243830Sdim// 2) getNumLDMAddresses is only used by the scheduling machine model and any 2650243830Sdim// machine model that calls this should handle the unknown (zero size) case. 2651243830Sdim// 2652243830Sdim// Long term, we should require a target hook that verifies MachineMemOperand 2653243830Sdim// sizes during MC lowering. That target hook should be local to MC lowering 2654243830Sdim// because we can't ensure that it is aware of other MI forms. Doing this will 2655243830Sdim// ensure that MachineMemOperands are correctly propagated through all passes. 2656243830Sdimunsigned ARMBaseInstrInfo::getNumLDMAddresses(const MachineInstr *MI) const { 2657243830Sdim unsigned Size = 0; 2658243830Sdim for (MachineInstr::mmo_iterator I = MI->memoperands_begin(), 2659243830Sdim E = MI->memoperands_end(); I != E; ++I) { 2660243830Sdim Size += (*I)->getSize(); 2661243830Sdim } 2662243830Sdim return Size / 4; 2663243830Sdim} 2664243830Sdim 2665218893Sdimunsigned 2666218893SdimARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, 2667218893Sdim const MachineInstr *MI) const { 2668218893Sdim if (!ItinData || ItinData->isEmpty()) 2669218893Sdim return 1; 2670218893Sdim 2671224145Sdim const MCInstrDesc &Desc = MI->getDesc(); 2672218893Sdim unsigned Class = Desc.getSchedClass(); 2673239462Sdim int ItinUOps = ItinData->getNumMicroOps(Class); 2674243830Sdim if (ItinUOps >= 0) { 2675243830Sdim if (Subtarget.isSwift() && (Desc.mayLoad() || Desc.mayStore())) 2676243830Sdim return getNumMicroOpsSwiftLdSt(ItinData, MI); 2677243830Sdim 2678239462Sdim return ItinUOps; 2679243830Sdim } 2680218893Sdim 2681218893Sdim unsigned Opc = MI->getOpcode(); 2682218893Sdim switch (Opc) { 2683218893Sdim default: 2684218893Sdim llvm_unreachable("Unexpected multi-uops instruction!"); 2685218893Sdim case ARM::VLDMQIA: 2686218893Sdim case ARM::VSTMQIA: 2687218893Sdim return 2; 2688218893Sdim 2689218893Sdim // The number of uOps for load / store multiple are determined by the number 2690218893Sdim // registers. 2691218893Sdim // 2692218893Sdim // On Cortex-A8, each pair of register loads / stores can be scheduled on the 2693218893Sdim // same cycle. The scheduling for the first load / store must be done 2694239462Sdim // separately by assuming the address is not 64-bit aligned. 2695218893Sdim // 2696218893Sdim // On Cortex-A9, the formula is simply (#reg / 2) + (#reg % 2). If the address 2697218893Sdim // is not 64-bit aligned, then AGU would take an extra cycle. For VFP / NEON 2698218893Sdim // load / store multiple, the formula is (#reg / 2) + (#reg % 2) + 1. 2699218893Sdim case ARM::VLDMDIA: 2700218893Sdim case ARM::VLDMDIA_UPD: 2701218893Sdim case ARM::VLDMDDB_UPD: 2702218893Sdim case ARM::VLDMSIA: 2703218893Sdim case ARM::VLDMSIA_UPD: 2704218893Sdim case ARM::VLDMSDB_UPD: 2705218893Sdim case ARM::VSTMDIA: 2706218893Sdim case ARM::VSTMDIA_UPD: 2707218893Sdim case ARM::VSTMDDB_UPD: 2708218893Sdim case ARM::VSTMSIA: 2709218893Sdim case ARM::VSTMSIA_UPD: 2710218893Sdim case ARM::VSTMSDB_UPD: { 2711218893Sdim unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands(); 2712218893Sdim return (NumRegs / 2) + (NumRegs % 2) + 1; 2713218893Sdim } 2714218893Sdim 2715218893Sdim case ARM::LDMIA_RET: 2716218893Sdim case ARM::LDMIA: 2717218893Sdim case ARM::LDMDA: 2718218893Sdim case ARM::LDMDB: 2719218893Sdim case ARM::LDMIB: 2720218893Sdim case ARM::LDMIA_UPD: 2721218893Sdim case ARM::LDMDA_UPD: 2722218893Sdim case ARM::LDMDB_UPD: 2723218893Sdim case ARM::LDMIB_UPD: 2724218893Sdim case ARM::STMIA: 2725218893Sdim case ARM::STMDA: 2726218893Sdim case ARM::STMDB: 2727218893Sdim case ARM::STMIB: 2728218893Sdim case ARM::STMIA_UPD: 2729218893Sdim case ARM::STMDA_UPD: 2730218893Sdim case ARM::STMDB_UPD: 2731218893Sdim case ARM::STMIB_UPD: 2732218893Sdim case ARM::tLDMIA: 2733218893Sdim case ARM::tLDMIA_UPD: 2734218893Sdim case ARM::tSTMIA_UPD: 2735218893Sdim case ARM::tPOP_RET: 2736218893Sdim case ARM::tPOP: 2737218893Sdim case ARM::tPUSH: 2738218893Sdim case ARM::t2LDMIA_RET: 2739218893Sdim case ARM::t2LDMIA: 2740218893Sdim case ARM::t2LDMDB: 2741218893Sdim case ARM::t2LDMIA_UPD: 2742218893Sdim case ARM::t2LDMDB_UPD: 2743218893Sdim case ARM::t2STMIA: 2744218893Sdim case ARM::t2STMDB: 2745218893Sdim case ARM::t2STMIA_UPD: 2746218893Sdim case ARM::t2STMDB_UPD: { 2747218893Sdim unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands() + 1; 2748243830Sdim if (Subtarget.isSwift()) { 2749243830Sdim int UOps = 1 + NumRegs; // One for address computation, one for each ld / st. 2750243830Sdim switch (Opc) { 2751243830Sdim default: break; 2752243830Sdim case ARM::VLDMDIA_UPD: 2753243830Sdim case ARM::VLDMDDB_UPD: 2754243830Sdim case ARM::VLDMSIA_UPD: 2755243830Sdim case ARM::VLDMSDB_UPD: 2756243830Sdim case ARM::VSTMDIA_UPD: 2757243830Sdim case ARM::VSTMDDB_UPD: 2758243830Sdim case ARM::VSTMSIA_UPD: 2759243830Sdim case ARM::VSTMSDB_UPD: 2760243830Sdim case ARM::LDMIA_UPD: 2761243830Sdim case ARM::LDMDA_UPD: 2762243830Sdim case ARM::LDMDB_UPD: 2763243830Sdim case ARM::LDMIB_UPD: 2764243830Sdim case ARM::STMIA_UPD: 2765243830Sdim case ARM::STMDA_UPD: 2766243830Sdim case ARM::STMDB_UPD: 2767243830Sdim case ARM::STMIB_UPD: 2768243830Sdim case ARM::tLDMIA_UPD: 2769243830Sdim case ARM::tSTMIA_UPD: 2770243830Sdim case ARM::t2LDMIA_UPD: 2771243830Sdim case ARM::t2LDMDB_UPD: 2772243830Sdim case ARM::t2STMIA_UPD: 2773243830Sdim case ARM::t2STMDB_UPD: 2774243830Sdim ++UOps; // One for base register writeback. 2775243830Sdim break; 2776243830Sdim case ARM::LDMIA_RET: 2777243830Sdim case ARM::tPOP_RET: 2778243830Sdim case ARM::t2LDMIA_RET: 2779243830Sdim UOps += 2; // One for base reg wb, one for write to pc. 2780243830Sdim break; 2781243830Sdim } 2782243830Sdim return UOps; 2783243830Sdim } else if (Subtarget.isCortexA8()) { 2784218893Sdim if (NumRegs < 4) 2785218893Sdim return 2; 2786218893Sdim // 4 registers would be issued: 2, 2. 2787218893Sdim // 5 registers would be issued: 2, 2, 1. 2788239462Sdim int A8UOps = (NumRegs / 2); 2789218893Sdim if (NumRegs % 2) 2790239462Sdim ++A8UOps; 2791239462Sdim return A8UOps; 2792243830Sdim } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) { 2793239462Sdim int A9UOps = (NumRegs / 2); 2794218893Sdim // If there are odd number of registers or if it's not 64-bit aligned, 2795218893Sdim // then it takes an extra AGU (Address Generation Unit) cycle. 2796218893Sdim if ((NumRegs % 2) || 2797218893Sdim !MI->hasOneMemOperand() || 2798218893Sdim (*MI->memoperands_begin())->getAlignment() < 8) 2799239462Sdim ++A9UOps; 2800239462Sdim return A9UOps; 2801218893Sdim } else { 2802218893Sdim // Assume the worst. 2803218893Sdim return NumRegs; 2804218893Sdim } 2805218893Sdim } 2806218893Sdim } 2807218893Sdim} 2808218893Sdim 2809218893Sdimint 2810218893SdimARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData, 2811224145Sdim const MCInstrDesc &DefMCID, 2812218893Sdim unsigned DefClass, 2813218893Sdim unsigned DefIdx, unsigned DefAlign) const { 2814224145Sdim int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1; 2815218893Sdim if (RegNo <= 0) 2816218893Sdim // Def is the address writeback. 2817218893Sdim return ItinData->getOperandCycle(DefClass, DefIdx); 2818218893Sdim 2819218893Sdim int DefCycle; 2820218893Sdim if (Subtarget.isCortexA8()) { 2821218893Sdim // (regno / 2) + (regno % 2) + 1 2822218893Sdim DefCycle = RegNo / 2 + 1; 2823218893Sdim if (RegNo % 2) 2824218893Sdim ++DefCycle; 2825243830Sdim } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) { 2826218893Sdim DefCycle = RegNo; 2827218893Sdim bool isSLoad = false; 2828218893Sdim 2829224145Sdim switch (DefMCID.getOpcode()) { 2830218893Sdim default: break; 2831218893Sdim case ARM::VLDMSIA: 2832218893Sdim case ARM::VLDMSIA_UPD: 2833218893Sdim case ARM::VLDMSDB_UPD: 2834218893Sdim isSLoad = true; 2835218893Sdim break; 2836218893Sdim } 2837218893Sdim 2838218893Sdim // If there are odd number of 'S' registers or if it's not 64-bit aligned, 2839218893Sdim // then it takes an extra cycle. 2840218893Sdim if ((isSLoad && (RegNo % 2)) || DefAlign < 8) 2841218893Sdim ++DefCycle; 2842218893Sdim } else { 2843218893Sdim // Assume the worst. 2844218893Sdim DefCycle = RegNo + 2; 2845218893Sdim } 2846218893Sdim 2847218893Sdim return DefCycle; 2848218893Sdim} 2849218893Sdim 2850218893Sdimint 2851218893SdimARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData, 2852224145Sdim const MCInstrDesc &DefMCID, 2853218893Sdim unsigned DefClass, 2854218893Sdim unsigned DefIdx, unsigned DefAlign) const { 2855224145Sdim int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1; 2856218893Sdim if (RegNo <= 0) 2857218893Sdim // Def is the address writeback. 2858218893Sdim return ItinData->getOperandCycle(DefClass, DefIdx); 2859218893Sdim 2860218893Sdim int DefCycle; 2861218893Sdim if (Subtarget.isCortexA8()) { 2862218893Sdim // 4 registers would be issued: 1, 2, 1. 2863218893Sdim // 5 registers would be issued: 1, 2, 2. 2864218893Sdim DefCycle = RegNo / 2; 2865218893Sdim if (DefCycle < 1) 2866218893Sdim DefCycle = 1; 2867218893Sdim // Result latency is issue cycle + 2: E2. 2868218893Sdim DefCycle += 2; 2869243830Sdim } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) { 2870218893Sdim DefCycle = (RegNo / 2); 2871218893Sdim // If there are odd number of registers or if it's not 64-bit aligned, 2872218893Sdim // then it takes an extra AGU (Address Generation Unit) cycle. 2873218893Sdim if ((RegNo % 2) || DefAlign < 8) 2874218893Sdim ++DefCycle; 2875218893Sdim // Result latency is AGU cycles + 2. 2876218893Sdim DefCycle += 2; 2877218893Sdim } else { 2878218893Sdim // Assume the worst. 2879218893Sdim DefCycle = RegNo + 2; 2880218893Sdim } 2881218893Sdim 2882218893Sdim return DefCycle; 2883218893Sdim} 2884218893Sdim 2885218893Sdimint 2886218893SdimARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData, 2887224145Sdim const MCInstrDesc &UseMCID, 2888218893Sdim unsigned UseClass, 2889218893Sdim unsigned UseIdx, unsigned UseAlign) const { 2890224145Sdim int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1; 2891218893Sdim if (RegNo <= 0) 2892218893Sdim return ItinData->getOperandCycle(UseClass, UseIdx); 2893218893Sdim 2894218893Sdim int UseCycle; 2895218893Sdim if (Subtarget.isCortexA8()) { 2896218893Sdim // (regno / 2) + (regno % 2) + 1 2897218893Sdim UseCycle = RegNo / 2 + 1; 2898218893Sdim if (RegNo % 2) 2899218893Sdim ++UseCycle; 2900243830Sdim } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) { 2901218893Sdim UseCycle = RegNo; 2902218893Sdim bool isSStore = false; 2903218893Sdim 2904224145Sdim switch (UseMCID.getOpcode()) { 2905218893Sdim default: break; 2906218893Sdim case ARM::VSTMSIA: 2907218893Sdim case ARM::VSTMSIA_UPD: 2908218893Sdim case ARM::VSTMSDB_UPD: 2909218893Sdim isSStore = true; 2910218893Sdim break; 2911218893Sdim } 2912218893Sdim 2913218893Sdim // If there are odd number of 'S' registers or if it's not 64-bit aligned, 2914218893Sdim // then it takes an extra cycle. 2915218893Sdim if ((isSStore && (RegNo % 2)) || UseAlign < 8) 2916218893Sdim ++UseCycle; 2917218893Sdim } else { 2918218893Sdim // Assume the worst. 2919218893Sdim UseCycle = RegNo + 2; 2920218893Sdim } 2921218893Sdim 2922218893Sdim return UseCycle; 2923218893Sdim} 2924218893Sdim 2925218893Sdimint 2926218893SdimARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData, 2927224145Sdim const MCInstrDesc &UseMCID, 2928218893Sdim unsigned UseClass, 2929218893Sdim unsigned UseIdx, unsigned UseAlign) const { 2930224145Sdim int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1; 2931218893Sdim if (RegNo <= 0) 2932218893Sdim return ItinData->getOperandCycle(UseClass, UseIdx); 2933218893Sdim 2934218893Sdim int UseCycle; 2935218893Sdim if (Subtarget.isCortexA8()) { 2936218893Sdim UseCycle = RegNo / 2; 2937218893Sdim if (UseCycle < 2) 2938218893Sdim UseCycle = 2; 2939218893Sdim // Read in E3. 2940218893Sdim UseCycle += 2; 2941243830Sdim } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) { 2942218893Sdim UseCycle = (RegNo / 2); 2943218893Sdim // If there are odd number of registers or if it's not 64-bit aligned, 2944218893Sdim // then it takes an extra AGU (Address Generation Unit) cycle. 2945218893Sdim if ((RegNo % 2) || UseAlign < 8) 2946218893Sdim ++UseCycle; 2947218893Sdim } else { 2948218893Sdim // Assume the worst. 2949218893Sdim UseCycle = 1; 2950218893Sdim } 2951218893Sdim return UseCycle; 2952218893Sdim} 2953218893Sdim 2954218893Sdimint 2955218893SdimARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, 2956224145Sdim const MCInstrDesc &DefMCID, 2957218893Sdim unsigned DefIdx, unsigned DefAlign, 2958224145Sdim const MCInstrDesc &UseMCID, 2959218893Sdim unsigned UseIdx, unsigned UseAlign) const { 2960224145Sdim unsigned DefClass = DefMCID.getSchedClass(); 2961224145Sdim unsigned UseClass = UseMCID.getSchedClass(); 2962218893Sdim 2963224145Sdim if (DefIdx < DefMCID.getNumDefs() && UseIdx < UseMCID.getNumOperands()) 2964218893Sdim return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx); 2965218893Sdim 2966218893Sdim // This may be a def / use of a variable_ops instruction, the operand 2967218893Sdim // latency might be determinable dynamically. Let the target try to 2968218893Sdim // figure it out. 2969218893Sdim int DefCycle = -1; 2970218893Sdim bool LdmBypass = false; 2971224145Sdim switch (DefMCID.getOpcode()) { 2972218893Sdim default: 2973218893Sdim DefCycle = ItinData->getOperandCycle(DefClass, DefIdx); 2974218893Sdim break; 2975218893Sdim 2976218893Sdim case ARM::VLDMDIA: 2977218893Sdim case ARM::VLDMDIA_UPD: 2978218893Sdim case ARM::VLDMDDB_UPD: 2979218893Sdim case ARM::VLDMSIA: 2980218893Sdim case ARM::VLDMSIA_UPD: 2981218893Sdim case ARM::VLDMSDB_UPD: 2982224145Sdim DefCycle = getVLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign); 2983218893Sdim break; 2984218893Sdim 2985218893Sdim case ARM::LDMIA_RET: 2986218893Sdim case ARM::LDMIA: 2987218893Sdim case ARM::LDMDA: 2988218893Sdim case ARM::LDMDB: 2989218893Sdim case ARM::LDMIB: 2990218893Sdim case ARM::LDMIA_UPD: 2991218893Sdim case ARM::LDMDA_UPD: 2992218893Sdim case ARM::LDMDB_UPD: 2993218893Sdim case ARM::LDMIB_UPD: 2994218893Sdim case ARM::tLDMIA: 2995218893Sdim case ARM::tLDMIA_UPD: 2996218893Sdim case ARM::tPUSH: 2997218893Sdim case ARM::t2LDMIA_RET: 2998218893Sdim case ARM::t2LDMIA: 2999218893Sdim case ARM::t2LDMDB: 3000218893Sdim case ARM::t2LDMIA_UPD: 3001218893Sdim case ARM::t2LDMDB_UPD: 3002218893Sdim LdmBypass = 1; 3003224145Sdim DefCycle = getLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign); 3004218893Sdim break; 3005218893Sdim } 3006218893Sdim 3007218893Sdim if (DefCycle == -1) 3008218893Sdim // We can't seem to determine the result latency of the def, assume it's 2. 3009218893Sdim DefCycle = 2; 3010218893Sdim 3011218893Sdim int UseCycle = -1; 3012224145Sdim switch (UseMCID.getOpcode()) { 3013218893Sdim default: 3014218893Sdim UseCycle = ItinData->getOperandCycle(UseClass, UseIdx); 3015218893Sdim break; 3016218893Sdim 3017218893Sdim case ARM::VSTMDIA: 3018218893Sdim case ARM::VSTMDIA_UPD: 3019218893Sdim case ARM::VSTMDDB_UPD: 3020218893Sdim case ARM::VSTMSIA: 3021218893Sdim case ARM::VSTMSIA_UPD: 3022218893Sdim case ARM::VSTMSDB_UPD: 3023224145Sdim UseCycle = getVSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign); 3024218893Sdim break; 3025218893Sdim 3026218893Sdim case ARM::STMIA: 3027218893Sdim case ARM::STMDA: 3028218893Sdim case ARM::STMDB: 3029218893Sdim case ARM::STMIB: 3030218893Sdim case ARM::STMIA_UPD: 3031218893Sdim case ARM::STMDA_UPD: 3032218893Sdim case ARM::STMDB_UPD: 3033218893Sdim case ARM::STMIB_UPD: 3034218893Sdim case ARM::tSTMIA_UPD: 3035218893Sdim case ARM::tPOP_RET: 3036218893Sdim case ARM::tPOP: 3037218893Sdim case ARM::t2STMIA: 3038218893Sdim case ARM::t2STMDB: 3039218893Sdim case ARM::t2STMIA_UPD: 3040218893Sdim case ARM::t2STMDB_UPD: 3041224145Sdim UseCycle = getSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign); 3042218893Sdim break; 3043218893Sdim } 3044218893Sdim 3045218893Sdim if (UseCycle == -1) 3046218893Sdim // Assume it's read in the first stage. 3047218893Sdim UseCycle = 1; 3048218893Sdim 3049218893Sdim UseCycle = DefCycle - UseCycle + 1; 3050218893Sdim if (UseCycle > 0) { 3051218893Sdim if (LdmBypass) { 3052218893Sdim // It's a variable_ops instruction so we can't use DefIdx here. Just use 3053218893Sdim // first def operand. 3054224145Sdim if (ItinData->hasPipelineForwarding(DefClass, DefMCID.getNumOperands()-1, 3055218893Sdim UseClass, UseIdx)) 3056218893Sdim --UseCycle; 3057218893Sdim } else if (ItinData->hasPipelineForwarding(DefClass, DefIdx, 3058218893Sdim UseClass, UseIdx)) { 3059218893Sdim --UseCycle; 3060218893Sdim } 3061218893Sdim } 3062218893Sdim 3063218893Sdim return UseCycle; 3064218893Sdim} 3065218893Sdim 3066234353Sdimstatic const MachineInstr *getBundledDefMI(const TargetRegisterInfo *TRI, 3067234353Sdim const MachineInstr *MI, unsigned Reg, 3068234353Sdim unsigned &DefIdx, unsigned &Dist) { 3069234353Sdim Dist = 0; 3070234353Sdim 3071234353Sdim MachineBasicBlock::const_iterator I = MI; ++I; 3072234353Sdim MachineBasicBlock::const_instr_iterator II = 3073234353Sdim llvm::prior(I.getInstrIterator()); 3074234353Sdim assert(II->isInsideBundle() && "Empty bundle?"); 3075234353Sdim 3076234353Sdim int Idx = -1; 3077234353Sdim while (II->isInsideBundle()) { 3078234353Sdim Idx = II->findRegisterDefOperandIdx(Reg, false, true, TRI); 3079234353Sdim if (Idx != -1) 3080234353Sdim break; 3081234353Sdim --II; 3082234353Sdim ++Dist; 3083234353Sdim } 3084234353Sdim 3085234353Sdim assert(Idx != -1 && "Cannot find bundled definition!"); 3086234353Sdim DefIdx = Idx; 3087234353Sdim return II; 3088234353Sdim} 3089234353Sdim 3090234353Sdimstatic const MachineInstr *getBundledUseMI(const TargetRegisterInfo *TRI, 3091234353Sdim const MachineInstr *MI, unsigned Reg, 3092234353Sdim unsigned &UseIdx, unsigned &Dist) { 3093234353Sdim Dist = 0; 3094234353Sdim 3095234353Sdim MachineBasicBlock::const_instr_iterator II = MI; ++II; 3096234353Sdim assert(II->isInsideBundle() && "Empty bundle?"); 3097234353Sdim MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end(); 3098234353Sdim 3099234353Sdim // FIXME: This doesn't properly handle multiple uses. 3100234353Sdim int Idx = -1; 3101234353Sdim while (II != E && II->isInsideBundle()) { 3102234353Sdim Idx = II->findRegisterUseOperandIdx(Reg, false, TRI); 3103234353Sdim if (Idx != -1) 3104234353Sdim break; 3105234353Sdim if (II->getOpcode() != ARM::t2IT) 3106234353Sdim ++Dist; 3107234353Sdim ++II; 3108234353Sdim } 3109234353Sdim 3110234353Sdim if (Idx == -1) { 3111234353Sdim Dist = 0; 3112234353Sdim return 0; 3113234353Sdim } 3114234353Sdim 3115234353Sdim UseIdx = Idx; 3116234353Sdim return II; 3117234353Sdim} 3118234353Sdim 3119239462Sdim/// Return the number of cycles to add to (or subtract from) the static 3120239462Sdim/// itinerary based on the def opcode and alignment. The caller will ensure that 3121239462Sdim/// adjusted latency is at least one cycle. 3122239462Sdimstatic int adjustDefLatency(const ARMSubtarget &Subtarget, 3123239462Sdim const MachineInstr *DefMI, 3124239462Sdim const MCInstrDesc *DefMCID, unsigned DefAlign) { 3125239462Sdim int Adjust = 0; 3126243830Sdim if (Subtarget.isCortexA8() || Subtarget.isLikeA9()) { 3127218893Sdim // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2] 3128218893Sdim // variants are one cycle cheaper. 3129234353Sdim switch (DefMCID->getOpcode()) { 3130218893Sdim default: break; 3131218893Sdim case ARM::LDRrs: 3132218893Sdim case ARM::LDRBrs: { 3133218893Sdim unsigned ShOpVal = DefMI->getOperand(3).getImm(); 3134218893Sdim unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); 3135218893Sdim if (ShImm == 0 || 3136218893Sdim (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)) 3137239462Sdim --Adjust; 3138218893Sdim break; 3139218893Sdim } 3140218893Sdim case ARM::t2LDRs: 3141218893Sdim case ARM::t2LDRBs: 3142218893Sdim case ARM::t2LDRHs: 3143218893Sdim case ARM::t2LDRSHs: { 3144218893Sdim // Thumb2 mode: lsl only. 3145218893Sdim unsigned ShAmt = DefMI->getOperand(3).getImm(); 3146218893Sdim if (ShAmt == 0 || ShAmt == 2) 3147239462Sdim --Adjust; 3148218893Sdim break; 3149218893Sdim } 3150218893Sdim } 3151243830Sdim } else if (Subtarget.isSwift()) { 3152243830Sdim // FIXME: Properly handle all of the latency adjustments for address 3153243830Sdim // writeback. 3154243830Sdim switch (DefMCID->getOpcode()) { 3155243830Sdim default: break; 3156243830Sdim case ARM::LDRrs: 3157243830Sdim case ARM::LDRBrs: { 3158243830Sdim unsigned ShOpVal = DefMI->getOperand(3).getImm(); 3159243830Sdim bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; 3160243830Sdim unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); 3161243830Sdim if (!isSub && 3162243830Sdim (ShImm == 0 || 3163243830Sdim ((ShImm == 1 || ShImm == 2 || ShImm == 3) && 3164243830Sdim ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) 3165243830Sdim Adjust -= 2; 3166243830Sdim else if (!isSub && 3167243830Sdim ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr) 3168243830Sdim --Adjust; 3169243830Sdim break; 3170243830Sdim } 3171243830Sdim case ARM::t2LDRs: 3172243830Sdim case ARM::t2LDRBs: 3173243830Sdim case ARM::t2LDRHs: 3174243830Sdim case ARM::t2LDRSHs: { 3175243830Sdim // Thumb2 mode: lsl only. 3176243830Sdim unsigned ShAmt = DefMI->getOperand(3).getImm(); 3177243830Sdim if (ShAmt == 0 || ShAmt == 1 || ShAmt == 2 || ShAmt == 3) 3178243830Sdim Adjust -= 2; 3179243830Sdim break; 3180243830Sdim } 3181243830Sdim } 3182218893Sdim } 3183218893Sdim 3184243830Sdim if (DefAlign < 8 && Subtarget.isLikeA9()) { 3185234353Sdim switch (DefMCID->getOpcode()) { 3186221345Sdim default: break; 3187221345Sdim case ARM::VLD1q8: 3188221345Sdim case ARM::VLD1q16: 3189221345Sdim case ARM::VLD1q32: 3190221345Sdim case ARM::VLD1q64: 3191234353Sdim case ARM::VLD1q8wb_fixed: 3192234353Sdim case ARM::VLD1q16wb_fixed: 3193234353Sdim case ARM::VLD1q32wb_fixed: 3194234353Sdim case ARM::VLD1q64wb_fixed: 3195234353Sdim case ARM::VLD1q8wb_register: 3196234353Sdim case ARM::VLD1q16wb_register: 3197234353Sdim case ARM::VLD1q32wb_register: 3198234353Sdim case ARM::VLD1q64wb_register: 3199221345Sdim case ARM::VLD2d8: 3200221345Sdim case ARM::VLD2d16: 3201221345Sdim case ARM::VLD2d32: 3202221345Sdim case ARM::VLD2q8: 3203221345Sdim case ARM::VLD2q16: 3204221345Sdim case ARM::VLD2q32: 3205234353Sdim case ARM::VLD2d8wb_fixed: 3206234353Sdim case ARM::VLD2d16wb_fixed: 3207234353Sdim case ARM::VLD2d32wb_fixed: 3208234353Sdim case ARM::VLD2q8wb_fixed: 3209234353Sdim case ARM::VLD2q16wb_fixed: 3210234353Sdim case ARM::VLD2q32wb_fixed: 3211234353Sdim case ARM::VLD2d8wb_register: 3212234353Sdim case ARM::VLD2d16wb_register: 3213234353Sdim case ARM::VLD2d32wb_register: 3214234353Sdim case ARM::VLD2q8wb_register: 3215234353Sdim case ARM::VLD2q16wb_register: 3216234353Sdim case ARM::VLD2q32wb_register: 3217221345Sdim case ARM::VLD3d8: 3218221345Sdim case ARM::VLD3d16: 3219221345Sdim case ARM::VLD3d32: 3220221345Sdim case ARM::VLD1d64T: 3221221345Sdim case ARM::VLD3d8_UPD: 3222221345Sdim case ARM::VLD3d16_UPD: 3223221345Sdim case ARM::VLD3d32_UPD: 3224234353Sdim case ARM::VLD1d64Twb_fixed: 3225234353Sdim case ARM::VLD1d64Twb_register: 3226221345Sdim case ARM::VLD3q8_UPD: 3227221345Sdim case ARM::VLD3q16_UPD: 3228221345Sdim case ARM::VLD3q32_UPD: 3229221345Sdim case ARM::VLD4d8: 3230221345Sdim case ARM::VLD4d16: 3231221345Sdim case ARM::VLD4d32: 3232221345Sdim case ARM::VLD1d64Q: 3233221345Sdim case ARM::VLD4d8_UPD: 3234221345Sdim case ARM::VLD4d16_UPD: 3235221345Sdim case ARM::VLD4d32_UPD: 3236234353Sdim case ARM::VLD1d64Qwb_fixed: 3237234353Sdim case ARM::VLD1d64Qwb_register: 3238221345Sdim case ARM::VLD4q8_UPD: 3239221345Sdim case ARM::VLD4q16_UPD: 3240221345Sdim case ARM::VLD4q32_UPD: 3241221345Sdim case ARM::VLD1DUPq8: 3242221345Sdim case ARM::VLD1DUPq16: 3243221345Sdim case ARM::VLD1DUPq32: 3244234353Sdim case ARM::VLD1DUPq8wb_fixed: 3245234353Sdim case ARM::VLD1DUPq16wb_fixed: 3246234353Sdim case ARM::VLD1DUPq32wb_fixed: 3247234353Sdim case ARM::VLD1DUPq8wb_register: 3248234353Sdim case ARM::VLD1DUPq16wb_register: 3249234353Sdim case ARM::VLD1DUPq32wb_register: 3250221345Sdim case ARM::VLD2DUPd8: 3251221345Sdim case ARM::VLD2DUPd16: 3252221345Sdim case ARM::VLD2DUPd32: 3253234353Sdim case ARM::VLD2DUPd8wb_fixed: 3254234353Sdim case ARM::VLD2DUPd16wb_fixed: 3255234353Sdim case ARM::VLD2DUPd32wb_fixed: 3256234353Sdim case ARM::VLD2DUPd8wb_register: 3257234353Sdim case ARM::VLD2DUPd16wb_register: 3258234353Sdim case ARM::VLD2DUPd32wb_register: 3259221345Sdim case ARM::VLD4DUPd8: 3260221345Sdim case ARM::VLD4DUPd16: 3261221345Sdim case ARM::VLD4DUPd32: 3262221345Sdim case ARM::VLD4DUPd8_UPD: 3263221345Sdim case ARM::VLD4DUPd16_UPD: 3264221345Sdim case ARM::VLD4DUPd32_UPD: 3265221345Sdim case ARM::VLD1LNd8: 3266221345Sdim case ARM::VLD1LNd16: 3267221345Sdim case ARM::VLD1LNd32: 3268221345Sdim case ARM::VLD1LNd8_UPD: 3269221345Sdim case ARM::VLD1LNd16_UPD: 3270221345Sdim case ARM::VLD1LNd32_UPD: 3271221345Sdim case ARM::VLD2LNd8: 3272221345Sdim case ARM::VLD2LNd16: 3273221345Sdim case ARM::VLD2LNd32: 3274221345Sdim case ARM::VLD2LNq16: 3275221345Sdim case ARM::VLD2LNq32: 3276221345Sdim case ARM::VLD2LNd8_UPD: 3277221345Sdim case ARM::VLD2LNd16_UPD: 3278221345Sdim case ARM::VLD2LNd32_UPD: 3279221345Sdim case ARM::VLD2LNq16_UPD: 3280221345Sdim case ARM::VLD2LNq32_UPD: 3281221345Sdim case ARM::VLD4LNd8: 3282221345Sdim case ARM::VLD4LNd16: 3283221345Sdim case ARM::VLD4LNd32: 3284221345Sdim case ARM::VLD4LNq16: 3285221345Sdim case ARM::VLD4LNq32: 3286221345Sdim case ARM::VLD4LNd8_UPD: 3287221345Sdim case ARM::VLD4LNd16_UPD: 3288221345Sdim case ARM::VLD4LNd32_UPD: 3289221345Sdim case ARM::VLD4LNq16_UPD: 3290221345Sdim case ARM::VLD4LNq32_UPD: 3291221345Sdim // If the address is not 64-bit aligned, the latencies of these 3292221345Sdim // instructions increases by one. 3293239462Sdim ++Adjust; 3294221345Sdim break; 3295221345Sdim } 3296239462Sdim } 3297239462Sdim return Adjust; 3298239462Sdim} 3299221345Sdim 3300239462Sdim 3301239462Sdim 3302239462Sdimint 3303239462SdimARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, 3304239462Sdim const MachineInstr *DefMI, unsigned DefIdx, 3305239462Sdim const MachineInstr *UseMI, 3306239462Sdim unsigned UseIdx) const { 3307239462Sdim // No operand latency. The caller may fall back to getInstrLatency. 3308239462Sdim if (!ItinData || ItinData->isEmpty()) 3309239462Sdim return -1; 3310239462Sdim 3311239462Sdim const MachineOperand &DefMO = DefMI->getOperand(DefIdx); 3312239462Sdim unsigned Reg = DefMO.getReg(); 3313239462Sdim const MCInstrDesc *DefMCID = &DefMI->getDesc(); 3314239462Sdim const MCInstrDesc *UseMCID = &UseMI->getDesc(); 3315239462Sdim 3316239462Sdim unsigned DefAdj = 0; 3317239462Sdim if (DefMI->isBundle()) { 3318239462Sdim DefMI = getBundledDefMI(&getRegisterInfo(), DefMI, Reg, DefIdx, DefAdj); 3319239462Sdim DefMCID = &DefMI->getDesc(); 3320239462Sdim } 3321239462Sdim if (DefMI->isCopyLike() || DefMI->isInsertSubreg() || 3322239462Sdim DefMI->isRegSequence() || DefMI->isImplicitDef()) { 3323239462Sdim return 1; 3324239462Sdim } 3325239462Sdim 3326239462Sdim unsigned UseAdj = 0; 3327239462Sdim if (UseMI->isBundle()) { 3328239462Sdim unsigned NewUseIdx; 3329239462Sdim const MachineInstr *NewUseMI = getBundledUseMI(&getRegisterInfo(), UseMI, 3330239462Sdim Reg, NewUseIdx, UseAdj); 3331239462Sdim if (!NewUseMI) 3332239462Sdim return -1; 3333239462Sdim 3334239462Sdim UseMI = NewUseMI; 3335239462Sdim UseIdx = NewUseIdx; 3336239462Sdim UseMCID = &UseMI->getDesc(); 3337239462Sdim } 3338239462Sdim 3339239462Sdim if (Reg == ARM::CPSR) { 3340239462Sdim if (DefMI->getOpcode() == ARM::FMSTAT) { 3341239462Sdim // fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?) 3342243830Sdim return Subtarget.isLikeA9() ? 1 : 20; 3343239462Sdim } 3344239462Sdim 3345239462Sdim // CPSR set and branch can be paired in the same cycle. 3346239462Sdim if (UseMI->isBranch()) 3347239462Sdim return 0; 3348239462Sdim 3349239462Sdim // Otherwise it takes the instruction latency (generally one). 3350239462Sdim unsigned Latency = getInstrLatency(ItinData, DefMI); 3351239462Sdim 3352239462Sdim // For Thumb2 and -Os, prefer scheduling CPSR setting instruction close to 3353239462Sdim // its uses. Instructions which are otherwise scheduled between them may 3354239462Sdim // incur a code size penalty (not able to use the CPSR setting 16-bit 3355239462Sdim // instructions). 3356239462Sdim if (Latency > 0 && Subtarget.isThumb2()) { 3357239462Sdim const MachineFunction *MF = DefMI->getParent()->getParent(); 3358249423Sdim if (MF->getFunction()->getAttributes(). 3359249423Sdim hasAttribute(AttributeSet::FunctionIndex, 3360249423Sdim Attribute::OptimizeForSize)) 3361239462Sdim --Latency; 3362239462Sdim } 3363239462Sdim return Latency; 3364239462Sdim } 3365239462Sdim 3366239462Sdim if (DefMO.isImplicit() || UseMI->getOperand(UseIdx).isImplicit()) 3367239462Sdim return -1; 3368239462Sdim 3369239462Sdim unsigned DefAlign = DefMI->hasOneMemOperand() 3370239462Sdim ? (*DefMI->memoperands_begin())->getAlignment() : 0; 3371239462Sdim unsigned UseAlign = UseMI->hasOneMemOperand() 3372239462Sdim ? (*UseMI->memoperands_begin())->getAlignment() : 0; 3373239462Sdim 3374239462Sdim // Get the itinerary's latency if possible, and handle variable_ops. 3375239462Sdim int Latency = getOperandLatency(ItinData, *DefMCID, DefIdx, DefAlign, 3376239462Sdim *UseMCID, UseIdx, UseAlign); 3377239462Sdim // Unable to find operand latency. The caller may resort to getInstrLatency. 3378239462Sdim if (Latency < 0) 3379239462Sdim return Latency; 3380239462Sdim 3381239462Sdim // Adjust for IT block position. 3382239462Sdim int Adj = DefAdj + UseAdj; 3383239462Sdim 3384239462Sdim // Adjust for dynamic def-side opcode variants not captured by the itinerary. 3385239462Sdim Adj += adjustDefLatency(Subtarget, DefMI, DefMCID, DefAlign); 3386239462Sdim if (Adj >= 0 || (int)Latency > -Adj) { 3387239462Sdim return Latency + Adj; 3388239462Sdim } 3389239462Sdim // Return the itinerary latency, which may be zero but not less than zero. 3390218893Sdim return Latency; 3391218893Sdim} 3392218893Sdim 3393218893Sdimint 3394218893SdimARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, 3395218893Sdim SDNode *DefNode, unsigned DefIdx, 3396218893Sdim SDNode *UseNode, unsigned UseIdx) const { 3397218893Sdim if (!DefNode->isMachineOpcode()) 3398218893Sdim return 1; 3399218893Sdim 3400224145Sdim const MCInstrDesc &DefMCID = get(DefNode->getMachineOpcode()); 3401218893Sdim 3402224145Sdim if (isZeroCost(DefMCID.Opcode)) 3403218893Sdim return 0; 3404218893Sdim 3405218893Sdim if (!ItinData || ItinData->isEmpty()) 3406224145Sdim return DefMCID.mayLoad() ? 3 : 1; 3407218893Sdim 3408218893Sdim if (!UseNode->isMachineOpcode()) { 3409224145Sdim int Latency = ItinData->getOperandCycle(DefMCID.getSchedClass(), DefIdx); 3410243830Sdim if (Subtarget.isLikeA9() || Subtarget.isSwift()) 3411218893Sdim return Latency <= 2 ? 1 : Latency - 1; 3412218893Sdim else 3413218893Sdim return Latency <= 3 ? 1 : Latency - 2; 3414218893Sdim } 3415218893Sdim 3416224145Sdim const MCInstrDesc &UseMCID = get(UseNode->getMachineOpcode()); 3417218893Sdim const MachineSDNode *DefMN = dyn_cast<MachineSDNode>(DefNode); 3418218893Sdim unsigned DefAlign = !DefMN->memoperands_empty() 3419218893Sdim ? (*DefMN->memoperands_begin())->getAlignment() : 0; 3420218893Sdim const MachineSDNode *UseMN = dyn_cast<MachineSDNode>(UseNode); 3421218893Sdim unsigned UseAlign = !UseMN->memoperands_empty() 3422218893Sdim ? (*UseMN->memoperands_begin())->getAlignment() : 0; 3423224145Sdim int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign, 3424224145Sdim UseMCID, UseIdx, UseAlign); 3425218893Sdim 3426218893Sdim if (Latency > 1 && 3427243830Sdim (Subtarget.isCortexA8() || Subtarget.isLikeA9())) { 3428218893Sdim // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2] 3429218893Sdim // variants are one cycle cheaper. 3430224145Sdim switch (DefMCID.getOpcode()) { 3431218893Sdim default: break; 3432218893Sdim case ARM::LDRrs: 3433218893Sdim case ARM::LDRBrs: { 3434218893Sdim unsigned ShOpVal = 3435218893Sdim cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue(); 3436218893Sdim unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); 3437218893Sdim if (ShImm == 0 || 3438218893Sdim (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)) 3439218893Sdim --Latency; 3440218893Sdim break; 3441218893Sdim } 3442218893Sdim case ARM::t2LDRs: 3443218893Sdim case ARM::t2LDRBs: 3444218893Sdim case ARM::t2LDRHs: 3445218893Sdim case ARM::t2LDRSHs: { 3446218893Sdim // Thumb2 mode: lsl only. 3447218893Sdim unsigned ShAmt = 3448218893Sdim cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue(); 3449218893Sdim if (ShAmt == 0 || ShAmt == 2) 3450218893Sdim --Latency; 3451218893Sdim break; 3452218893Sdim } 3453218893Sdim } 3454243830Sdim } else if (DefIdx == 0 && Latency > 2 && Subtarget.isSwift()) { 3455243830Sdim // FIXME: Properly handle all of the latency adjustments for address 3456243830Sdim // writeback. 3457243830Sdim switch (DefMCID.getOpcode()) { 3458243830Sdim default: break; 3459243830Sdim case ARM::LDRrs: 3460243830Sdim case ARM::LDRBrs: { 3461243830Sdim unsigned ShOpVal = 3462243830Sdim cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue(); 3463243830Sdim unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); 3464243830Sdim if (ShImm == 0 || 3465243830Sdim ((ShImm == 1 || ShImm == 2 || ShImm == 3) && 3466243830Sdim ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)) 3467243830Sdim Latency -= 2; 3468243830Sdim else if (ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr) 3469243830Sdim --Latency; 3470243830Sdim break; 3471243830Sdim } 3472243830Sdim case ARM::t2LDRs: 3473243830Sdim case ARM::t2LDRBs: 3474243830Sdim case ARM::t2LDRHs: 3475243830Sdim case ARM::t2LDRSHs: { 3476243830Sdim // Thumb2 mode: lsl 0-3 only. 3477243830Sdim Latency -= 2; 3478243830Sdim break; 3479243830Sdim } 3480243830Sdim } 3481218893Sdim } 3482218893Sdim 3483243830Sdim if (DefAlign < 8 && Subtarget.isLikeA9()) 3484224145Sdim switch (DefMCID.getOpcode()) { 3485221345Sdim default: break; 3486234353Sdim case ARM::VLD1q8: 3487234353Sdim case ARM::VLD1q16: 3488234353Sdim case ARM::VLD1q32: 3489234353Sdim case ARM::VLD1q64: 3490234353Sdim case ARM::VLD1q8wb_register: 3491234353Sdim case ARM::VLD1q16wb_register: 3492234353Sdim case ARM::VLD1q32wb_register: 3493234353Sdim case ARM::VLD1q64wb_register: 3494234353Sdim case ARM::VLD1q8wb_fixed: 3495234353Sdim case ARM::VLD1q16wb_fixed: 3496234353Sdim case ARM::VLD1q32wb_fixed: 3497234353Sdim case ARM::VLD1q64wb_fixed: 3498234353Sdim case ARM::VLD2d8: 3499234353Sdim case ARM::VLD2d16: 3500234353Sdim case ARM::VLD2d32: 3501221345Sdim case ARM::VLD2q8Pseudo: 3502221345Sdim case ARM::VLD2q16Pseudo: 3503221345Sdim case ARM::VLD2q32Pseudo: 3504234353Sdim case ARM::VLD2d8wb_fixed: 3505234353Sdim case ARM::VLD2d16wb_fixed: 3506234353Sdim case ARM::VLD2d32wb_fixed: 3507234353Sdim case ARM::VLD2q8PseudoWB_fixed: 3508234353Sdim case ARM::VLD2q16PseudoWB_fixed: 3509234353Sdim case ARM::VLD2q32PseudoWB_fixed: 3510234353Sdim case ARM::VLD2d8wb_register: 3511234353Sdim case ARM::VLD2d16wb_register: 3512234353Sdim case ARM::VLD2d32wb_register: 3513234353Sdim case ARM::VLD2q8PseudoWB_register: 3514234353Sdim case ARM::VLD2q16PseudoWB_register: 3515234353Sdim case ARM::VLD2q32PseudoWB_register: 3516221345Sdim case ARM::VLD3d8Pseudo: 3517221345Sdim case ARM::VLD3d16Pseudo: 3518221345Sdim case ARM::VLD3d32Pseudo: 3519221345Sdim case ARM::VLD1d64TPseudo: 3520221345Sdim case ARM::VLD3d8Pseudo_UPD: 3521221345Sdim case ARM::VLD3d16Pseudo_UPD: 3522221345Sdim case ARM::VLD3d32Pseudo_UPD: 3523221345Sdim case ARM::VLD3q8Pseudo_UPD: 3524221345Sdim case ARM::VLD3q16Pseudo_UPD: 3525221345Sdim case ARM::VLD3q32Pseudo_UPD: 3526221345Sdim case ARM::VLD3q8oddPseudo: 3527221345Sdim case ARM::VLD3q16oddPseudo: 3528221345Sdim case ARM::VLD3q32oddPseudo: 3529221345Sdim case ARM::VLD3q8oddPseudo_UPD: 3530221345Sdim case ARM::VLD3q16oddPseudo_UPD: 3531221345Sdim case ARM::VLD3q32oddPseudo_UPD: 3532221345Sdim case ARM::VLD4d8Pseudo: 3533221345Sdim case ARM::VLD4d16Pseudo: 3534221345Sdim case ARM::VLD4d32Pseudo: 3535221345Sdim case ARM::VLD1d64QPseudo: 3536221345Sdim case ARM::VLD4d8Pseudo_UPD: 3537221345Sdim case ARM::VLD4d16Pseudo_UPD: 3538221345Sdim case ARM::VLD4d32Pseudo_UPD: 3539221345Sdim case ARM::VLD4q8Pseudo_UPD: 3540221345Sdim case ARM::VLD4q16Pseudo_UPD: 3541221345Sdim case ARM::VLD4q32Pseudo_UPD: 3542221345Sdim case ARM::VLD4q8oddPseudo: 3543221345Sdim case ARM::VLD4q16oddPseudo: 3544221345Sdim case ARM::VLD4q32oddPseudo: 3545221345Sdim case ARM::VLD4q8oddPseudo_UPD: 3546221345Sdim case ARM::VLD4q16oddPseudo_UPD: 3547221345Sdim case ARM::VLD4q32oddPseudo_UPD: 3548234353Sdim case ARM::VLD1DUPq8: 3549234353Sdim case ARM::VLD1DUPq16: 3550234353Sdim case ARM::VLD1DUPq32: 3551234353Sdim case ARM::VLD1DUPq8wb_fixed: 3552234353Sdim case ARM::VLD1DUPq16wb_fixed: 3553234353Sdim case ARM::VLD1DUPq32wb_fixed: 3554234353Sdim case ARM::VLD1DUPq8wb_register: 3555234353Sdim case ARM::VLD1DUPq16wb_register: 3556234353Sdim case ARM::VLD1DUPq32wb_register: 3557234353Sdim case ARM::VLD2DUPd8: 3558234353Sdim case ARM::VLD2DUPd16: 3559234353Sdim case ARM::VLD2DUPd32: 3560234353Sdim case ARM::VLD2DUPd8wb_fixed: 3561234353Sdim case ARM::VLD2DUPd16wb_fixed: 3562234353Sdim case ARM::VLD2DUPd32wb_fixed: 3563234353Sdim case ARM::VLD2DUPd8wb_register: 3564234353Sdim case ARM::VLD2DUPd16wb_register: 3565234353Sdim case ARM::VLD2DUPd32wb_register: 3566221345Sdim case ARM::VLD4DUPd8Pseudo: 3567221345Sdim case ARM::VLD4DUPd16Pseudo: 3568221345Sdim case ARM::VLD4DUPd32Pseudo: 3569221345Sdim case ARM::VLD4DUPd8Pseudo_UPD: 3570221345Sdim case ARM::VLD4DUPd16Pseudo_UPD: 3571221345Sdim case ARM::VLD4DUPd32Pseudo_UPD: 3572221345Sdim case ARM::VLD1LNq8Pseudo: 3573221345Sdim case ARM::VLD1LNq16Pseudo: 3574221345Sdim case ARM::VLD1LNq32Pseudo: 3575221345Sdim case ARM::VLD1LNq8Pseudo_UPD: 3576221345Sdim case ARM::VLD1LNq16Pseudo_UPD: 3577221345Sdim case ARM::VLD1LNq32Pseudo_UPD: 3578221345Sdim case ARM::VLD2LNd8Pseudo: 3579221345Sdim case ARM::VLD2LNd16Pseudo: 3580221345Sdim case ARM::VLD2LNd32Pseudo: 3581221345Sdim case ARM::VLD2LNq16Pseudo: 3582221345Sdim case ARM::VLD2LNq32Pseudo: 3583221345Sdim case ARM::VLD2LNd8Pseudo_UPD: 3584221345Sdim case ARM::VLD2LNd16Pseudo_UPD: 3585221345Sdim case ARM::VLD2LNd32Pseudo_UPD: 3586221345Sdim case ARM::VLD2LNq16Pseudo_UPD: 3587221345Sdim case ARM::VLD2LNq32Pseudo_UPD: 3588221345Sdim case ARM::VLD4LNd8Pseudo: 3589221345Sdim case ARM::VLD4LNd16Pseudo: 3590221345Sdim case ARM::VLD4LNd32Pseudo: 3591221345Sdim case ARM::VLD4LNq16Pseudo: 3592221345Sdim case ARM::VLD4LNq32Pseudo: 3593221345Sdim case ARM::VLD4LNd8Pseudo_UPD: 3594221345Sdim case ARM::VLD4LNd16Pseudo_UPD: 3595221345Sdim case ARM::VLD4LNd32Pseudo_UPD: 3596221345Sdim case ARM::VLD4LNq16Pseudo_UPD: 3597221345Sdim case ARM::VLD4LNq32Pseudo_UPD: 3598221345Sdim // If the address is not 64-bit aligned, the latencies of these 3599221345Sdim // instructions increases by one. 3600221345Sdim ++Latency; 3601221345Sdim break; 3602221345Sdim } 3603221345Sdim 3604218893Sdim return Latency; 3605218893Sdim} 3606218893Sdim 3607239462Sdimunsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, 3608239462Sdim const MachineInstr *MI, 3609239462Sdim unsigned *PredCost) const { 3610218893Sdim if (MI->isCopyLike() || MI->isInsertSubreg() || 3611218893Sdim MI->isRegSequence() || MI->isImplicitDef()) 3612218893Sdim return 1; 3613218893Sdim 3614239462Sdim // An instruction scheduler typically runs on unbundled instructions, however 3615239462Sdim // other passes may query the latency of a bundled instruction. 3616234353Sdim if (MI->isBundle()) { 3617239462Sdim unsigned Latency = 0; 3618234353Sdim MachineBasicBlock::const_instr_iterator I = MI; 3619234353Sdim MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end(); 3620234353Sdim while (++I != E && I->isInsideBundle()) { 3621234353Sdim if (I->getOpcode() != ARM::t2IT) 3622234353Sdim Latency += getInstrLatency(ItinData, I, PredCost); 3623234353Sdim } 3624234353Sdim return Latency; 3625234353Sdim } 3626234353Sdim 3627224145Sdim const MCInstrDesc &MCID = MI->getDesc(); 3628239462Sdim if (PredCost && (MCID.isCall() || MCID.hasImplicitDefOfPhysReg(ARM::CPSR))) { 3629218893Sdim // When predicated, CPSR is an additional source operand for CPSR updating 3630218893Sdim // instructions, this apparently increases their latencies. 3631218893Sdim *PredCost = 1; 3632239462Sdim } 3633239462Sdim // Be sure to call getStageLatency for an empty itinerary in case it has a 3634239462Sdim // valid MinLatency property. 3635239462Sdim if (!ItinData) 3636239462Sdim return MI->mayLoad() ? 3 : 1; 3637239462Sdim 3638239462Sdim unsigned Class = MCID.getSchedClass(); 3639239462Sdim 3640239462Sdim // For instructions with variable uops, use uops as latency. 3641239462Sdim if (!ItinData->isEmpty() && ItinData->getNumMicroOps(Class) < 0) 3642239462Sdim return getNumMicroOps(ItinData, MI); 3643239462Sdim 3644239462Sdim // For the common case, fall back on the itinerary's latency. 3645239462Sdim unsigned Latency = ItinData->getStageLatency(Class); 3646239462Sdim 3647239462Sdim // Adjust for dynamic def-side opcode variants not captured by the itinerary. 3648239462Sdim unsigned DefAlign = MI->hasOneMemOperand() 3649239462Sdim ? (*MI->memoperands_begin())->getAlignment() : 0; 3650239462Sdim int Adj = adjustDefLatency(Subtarget, MI, &MCID, DefAlign); 3651239462Sdim if (Adj >= 0 || (int)Latency > -Adj) { 3652239462Sdim return Latency + Adj; 3653239462Sdim } 3654239462Sdim return Latency; 3655218893Sdim} 3656218893Sdim 3657218893Sdimint ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, 3658218893Sdim SDNode *Node) const { 3659218893Sdim if (!Node->isMachineOpcode()) 3660218893Sdim return 1; 3661218893Sdim 3662218893Sdim if (!ItinData || ItinData->isEmpty()) 3663218893Sdim return 1; 3664218893Sdim 3665218893Sdim unsigned Opcode = Node->getMachineOpcode(); 3666218893Sdim switch (Opcode) { 3667218893Sdim default: 3668218893Sdim return ItinData->getStageLatency(get(Opcode).getSchedClass()); 3669218893Sdim case ARM::VLDMQIA: 3670218893Sdim case ARM::VSTMQIA: 3671218893Sdim return 2; 3672218893Sdim } 3673218893Sdim} 3674218893Sdim 3675218893Sdimbool ARMBaseInstrInfo:: 3676218893SdimhasHighOperandLatency(const InstrItineraryData *ItinData, 3677218893Sdim const MachineRegisterInfo *MRI, 3678218893Sdim const MachineInstr *DefMI, unsigned DefIdx, 3679218893Sdim const MachineInstr *UseMI, unsigned UseIdx) const { 3680218893Sdim unsigned DDomain = DefMI->getDesc().TSFlags & ARMII::DomainMask; 3681218893Sdim unsigned UDomain = UseMI->getDesc().TSFlags & ARMII::DomainMask; 3682218893Sdim if (Subtarget.isCortexA8() && 3683218893Sdim (DDomain == ARMII::DomainVFP || UDomain == ARMII::DomainVFP)) 3684218893Sdim // CortexA8 VFP instructions are not pipelined. 3685218893Sdim return true; 3686218893Sdim 3687218893Sdim // Hoist VFP / NEON instructions with 4 or higher latency. 3688239462Sdim int Latency = computeOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx, 3689239462Sdim /*FindMin=*/false); 3690239462Sdim if (Latency < 0) 3691239462Sdim Latency = getInstrLatency(ItinData, DefMI); 3692218893Sdim if (Latency <= 3) 3693218893Sdim return false; 3694218893Sdim return DDomain == ARMII::DomainVFP || DDomain == ARMII::DomainNEON || 3695218893Sdim UDomain == ARMII::DomainVFP || UDomain == ARMII::DomainNEON; 3696218893Sdim} 3697218893Sdim 3698218893Sdimbool ARMBaseInstrInfo:: 3699218893SdimhasLowDefLatency(const InstrItineraryData *ItinData, 3700218893Sdim const MachineInstr *DefMI, unsigned DefIdx) const { 3701218893Sdim if (!ItinData || ItinData->isEmpty()) 3702218893Sdim return false; 3703218893Sdim 3704218893Sdim unsigned DDomain = DefMI->getDesc().TSFlags & ARMII::DomainMask; 3705218893Sdim if (DDomain == ARMII::DomainGeneral) { 3706218893Sdim unsigned DefClass = DefMI->getDesc().getSchedClass(); 3707218893Sdim int DefCycle = ItinData->getOperandCycle(DefClass, DefIdx); 3708218893Sdim return (DefCycle != -1 && DefCycle <= 2); 3709218893Sdim } 3710218893Sdim return false; 3711218893Sdim} 3712218893Sdim 3713226633Sdimbool ARMBaseInstrInfo::verifyInstruction(const MachineInstr *MI, 3714226633Sdim StringRef &ErrInfo) const { 3715226633Sdim if (convertAddSubFlagsOpcode(MI->getOpcode())) { 3716226633Sdim ErrInfo = "Pseudo flag setting opcodes only exist in Selection DAG"; 3717226633Sdim return false; 3718226633Sdim } 3719226633Sdim return true; 3720226633Sdim} 3721226633Sdim 3722218893Sdimbool 3723218893SdimARMBaseInstrInfo::isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc, 3724218893Sdim unsigned &AddSubOpc, 3725218893Sdim bool &NegAcc, bool &HasLane) const { 3726218893Sdim DenseMap<unsigned, unsigned>::const_iterator I = MLxEntryMap.find(Opcode); 3727218893Sdim if (I == MLxEntryMap.end()) 3728218893Sdim return false; 3729218893Sdim 3730218893Sdim const ARM_MLxEntry &Entry = ARM_MLxTable[I->second]; 3731218893Sdim MulOpc = Entry.MulOpc; 3732218893Sdim AddSubOpc = Entry.AddSubOpc; 3733218893Sdim NegAcc = Entry.NegAcc; 3734218893Sdim HasLane = Entry.HasLane; 3735218893Sdim return true; 3736218893Sdim} 3737226633Sdim 3738226633Sdim//===----------------------------------------------------------------------===// 3739226633Sdim// Execution domains. 3740226633Sdim//===----------------------------------------------------------------------===// 3741226633Sdim// 3742226633Sdim// Some instructions go down the NEON pipeline, some go down the VFP pipeline, 3743226633Sdim// and some can go down both. The vmov instructions go down the VFP pipeline, 3744226633Sdim// but they can be changed to vorr equivalents that are executed by the NEON 3745226633Sdim// pipeline. 3746226633Sdim// 3747226633Sdim// We use the following execution domain numbering: 3748226633Sdim// 3749226633Sdimenum ARMExeDomain { 3750226633Sdim ExeGeneric = 0, 3751226633Sdim ExeVFP = 1, 3752226633Sdim ExeNEON = 2 3753226633Sdim}; 3754226633Sdim// 3755226633Sdim// Also see ARMInstrFormats.td and Domain* enums in ARMBaseInfo.h 3756226633Sdim// 3757226633Sdimstd::pair<uint16_t, uint16_t> 3758226633SdimARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const { 3759239462Sdim // VMOVD, VMOVRS and VMOVSR are VFP instructions, but can be changed to NEON 3760239462Sdim // if they are not predicated. 3761226633Sdim if (MI->getOpcode() == ARM::VMOVD && !isPredicated(MI)) 3762226633Sdim return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON)); 3763226633Sdim 3764249423Sdim // CortexA9 is particularly picky about mixing the two and wants these 3765239462Sdim // converted. 3766249423Sdim if (Subtarget.isCortexA9() && !isPredicated(MI) && 3767239462Sdim (MI->getOpcode() == ARM::VMOVRS || 3768243830Sdim MI->getOpcode() == ARM::VMOVSR || 3769243830Sdim MI->getOpcode() == ARM::VMOVS)) 3770239462Sdim return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON)); 3771239462Sdim 3772226633Sdim // No other instructions can be swizzled, so just determine their domain. 3773226633Sdim unsigned Domain = MI->getDesc().TSFlags & ARMII::DomainMask; 3774226633Sdim 3775226633Sdim if (Domain & ARMII::DomainNEON) 3776226633Sdim return std::make_pair(ExeNEON, 0); 3777226633Sdim 3778226633Sdim // Certain instructions can go either way on Cortex-A8. 3779226633Sdim // Treat them as NEON instructions. 3780226633Sdim if ((Domain & ARMII::DomainNEONA8) && Subtarget.isCortexA8()) 3781226633Sdim return std::make_pair(ExeNEON, 0); 3782226633Sdim 3783226633Sdim if (Domain & ARMII::DomainVFP) 3784226633Sdim return std::make_pair(ExeVFP, 0); 3785226633Sdim 3786226633Sdim return std::make_pair(ExeGeneric, 0); 3787226633Sdim} 3788226633Sdim 3789243830Sdimstatic unsigned getCorrespondingDRegAndLane(const TargetRegisterInfo *TRI, 3790243830Sdim unsigned SReg, unsigned &Lane) { 3791243830Sdim unsigned DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_0, &ARM::DPRRegClass); 3792243830Sdim Lane = 0; 3793243830Sdim 3794243830Sdim if (DReg != ARM::NoRegister) 3795243830Sdim return DReg; 3796243830Sdim 3797243830Sdim Lane = 1; 3798243830Sdim DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_1, &ARM::DPRRegClass); 3799243830Sdim 3800243830Sdim assert(DReg && "S-register with no D super-register?"); 3801243830Sdim return DReg; 3802243830Sdim} 3803243830Sdim 3804243830Sdim/// getImplicitSPRUseForDPRUse - Given a use of a DPR register and lane, 3805243830Sdim/// set ImplicitSReg to a register number that must be marked as implicit-use or 3806243830Sdim/// zero if no register needs to be defined as implicit-use. 3807243830Sdim/// 3808243830Sdim/// If the function cannot determine if an SPR should be marked implicit use or 3809243830Sdim/// not, it returns false. 3810243830Sdim/// 3811243830Sdim/// This function handles cases where an instruction is being modified from taking 3812243830Sdim/// an SPR to a DPR[Lane]. A use of the DPR is being added, which may conflict 3813243830Sdim/// with an earlier def of an SPR corresponding to DPR[Lane^1] (i.e. the other 3814243830Sdim/// lane of the DPR). 3815243830Sdim/// 3816243830Sdim/// If the other SPR is defined, an implicit-use of it should be added. Else, 3817243830Sdim/// (including the case where the DPR itself is defined), it should not. 3818243830Sdim/// 3819243830Sdimstatic bool getImplicitSPRUseForDPRUse(const TargetRegisterInfo *TRI, 3820243830Sdim MachineInstr *MI, 3821243830Sdim unsigned DReg, unsigned Lane, 3822243830Sdim unsigned &ImplicitSReg) { 3823243830Sdim // If the DPR is defined or used already, the other SPR lane will be chained 3824243830Sdim // correctly, so there is nothing to be done. 3825243830Sdim if (MI->definesRegister(DReg, TRI) || MI->readsRegister(DReg, TRI)) { 3826243830Sdim ImplicitSReg = 0; 3827243830Sdim return true; 3828243830Sdim } 3829243830Sdim 3830243830Sdim // Otherwise we need to go searching to see if the SPR is set explicitly. 3831243830Sdim ImplicitSReg = TRI->getSubReg(DReg, 3832243830Sdim (Lane & 1) ? ARM::ssub_0 : ARM::ssub_1); 3833243830Sdim MachineBasicBlock::LivenessQueryResult LQR = 3834243830Sdim MI->getParent()->computeRegisterLiveness(TRI, ImplicitSReg, MI); 3835243830Sdim 3836243830Sdim if (LQR == MachineBasicBlock::LQR_Live) 3837243830Sdim return true; 3838243830Sdim else if (LQR == MachineBasicBlock::LQR_Unknown) 3839243830Sdim return false; 3840243830Sdim 3841243830Sdim // If the register is known not to be live, there is no need to add an 3842243830Sdim // implicit-use. 3843243830Sdim ImplicitSReg = 0; 3844243830Sdim return true; 3845243830Sdim} 3846243830Sdim 3847226633Sdimvoid 3848226633SdimARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const { 3849239462Sdim unsigned DstReg, SrcReg, DReg; 3850239462Sdim unsigned Lane; 3851249423Sdim MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI); 3852239462Sdim const TargetRegisterInfo *TRI = &getRegisterInfo(); 3853239462Sdim switch (MI->getOpcode()) { 3854239462Sdim default: 3855239462Sdim llvm_unreachable("cannot handle opcode!"); 3856239462Sdim break; 3857239462Sdim case ARM::VMOVD: 3858239462Sdim if (Domain != ExeNEON) 3859239462Sdim break; 3860226633Sdim 3861239462Sdim // Zap the predicate operands. 3862239462Sdim assert(!isPredicated(MI) && "Cannot predicate a VORRd"); 3863226633Sdim 3864243830Sdim // Source instruction is %DDst = VMOVD %DSrc, 14, %noreg (; implicits) 3865243830Sdim DstReg = MI->getOperand(0).getReg(); 3866243830Sdim SrcReg = MI->getOperand(1).getReg(); 3867243830Sdim 3868243830Sdim for (unsigned i = MI->getDesc().getNumOperands(); i; --i) 3869243830Sdim MI->RemoveOperand(i-1); 3870243830Sdim 3871243830Sdim // Change to a %DDst = VORRd %DSrc, %DSrc, 14, %noreg (; implicits) 3872239462Sdim MI->setDesc(get(ARM::VORRd)); 3873243830Sdim AddDefaultPred(MIB.addReg(DstReg, RegState::Define) 3874243830Sdim .addReg(SrcReg) 3875243830Sdim .addReg(SrcReg)); 3876239462Sdim break; 3877239462Sdim case ARM::VMOVRS: 3878239462Sdim if (Domain != ExeNEON) 3879239462Sdim break; 3880239462Sdim assert(!isPredicated(MI) && "Cannot predicate a VGETLN"); 3881239462Sdim 3882243830Sdim // Source instruction is %RDst = VMOVRS %SSrc, 14, %noreg (; implicits) 3883239462Sdim DstReg = MI->getOperand(0).getReg(); 3884239462Sdim SrcReg = MI->getOperand(1).getReg(); 3885239462Sdim 3886243830Sdim for (unsigned i = MI->getDesc().getNumOperands(); i; --i) 3887243830Sdim MI->RemoveOperand(i-1); 3888239462Sdim 3889243830Sdim DReg = getCorrespondingDRegAndLane(TRI, SrcReg, Lane); 3890239462Sdim 3891243830Sdim // Convert to %RDst = VGETLNi32 %DSrc, Lane, 14, %noreg (; imps) 3892243830Sdim // Note that DSrc has been widened and the other lane may be undef, which 3893243830Sdim // contaminates the entire register. 3894239462Sdim MI->setDesc(get(ARM::VGETLNi32)); 3895243830Sdim AddDefaultPred(MIB.addReg(DstReg, RegState::Define) 3896243830Sdim .addReg(DReg, RegState::Undef) 3897243830Sdim .addImm(Lane)); 3898239462Sdim 3899243830Sdim // The old source should be an implicit use, otherwise we might think it 3900243830Sdim // was dead before here. 3901239462Sdim MIB.addReg(SrcReg, RegState::Implicit); 3902243830Sdim break; 3903243830Sdim case ARM::VMOVSR: { 3904243830Sdim if (Domain != ExeNEON) 3905243830Sdim break; 3906243830Sdim assert(!isPredicated(MI) && "Cannot predicate a VSETLN"); 3907239462Sdim 3908243830Sdim // Source instruction is %SDst = VMOVSR %RSrc, 14, %noreg (; implicits) 3909243830Sdim DstReg = MI->getOperand(0).getReg(); 3910243830Sdim SrcReg = MI->getOperand(1).getReg(); 3911243830Sdim 3912243830Sdim DReg = getCorrespondingDRegAndLane(TRI, DstReg, Lane); 3913243830Sdim 3914243830Sdim unsigned ImplicitSReg; 3915243830Sdim if (!getImplicitSPRUseForDPRUse(TRI, MI, DReg, Lane, ImplicitSReg)) 3916243830Sdim break; 3917243830Sdim 3918243830Sdim for (unsigned i = MI->getDesc().getNumOperands(); i; --i) 3919243830Sdim MI->RemoveOperand(i-1); 3920243830Sdim 3921243830Sdim // Convert to %DDst = VSETLNi32 %DDst, %RSrc, Lane, 14, %noreg (; imps) 3922243830Sdim // Again DDst may be undefined at the beginning of this instruction. 3923243830Sdim MI->setDesc(get(ARM::VSETLNi32)); 3924243830Sdim MIB.addReg(DReg, RegState::Define) 3925243830Sdim .addReg(DReg, getUndefRegState(!MI->readsRegister(DReg, TRI))) 3926243830Sdim .addReg(SrcReg) 3927243830Sdim .addImm(Lane); 3928239462Sdim AddDefaultPred(MIB); 3929243830Sdim 3930243830Sdim // The narrower destination must be marked as set to keep previous chains 3931243830Sdim // in place. 3932243830Sdim MIB.addReg(DstReg, RegState::Define | RegState::Implicit); 3933243830Sdim if (ImplicitSReg != 0) 3934243830Sdim MIB.addReg(ImplicitSReg, RegState::Implicit); 3935239462Sdim break; 3936243830Sdim } 3937243830Sdim case ARM::VMOVS: { 3938239462Sdim if (Domain != ExeNEON) 3939239462Sdim break; 3940239462Sdim 3941243830Sdim // Source instruction is %SDst = VMOVS %SSrc, 14, %noreg (; implicits) 3942239462Sdim DstReg = MI->getOperand(0).getReg(); 3943239462Sdim SrcReg = MI->getOperand(1).getReg(); 3944243830Sdim 3945243830Sdim unsigned DstLane = 0, SrcLane = 0, DDst, DSrc; 3946243830Sdim DDst = getCorrespondingDRegAndLane(TRI, DstReg, DstLane); 3947243830Sdim DSrc = getCorrespondingDRegAndLane(TRI, SrcReg, SrcLane); 3948243830Sdim 3949243830Sdim unsigned ImplicitSReg; 3950243830Sdim if (!getImplicitSPRUseForDPRUse(TRI, MI, DSrc, SrcLane, ImplicitSReg)) 3951243830Sdim break; 3952243830Sdim 3953243830Sdim for (unsigned i = MI->getDesc().getNumOperands(); i; --i) 3954243830Sdim MI->RemoveOperand(i-1); 3955243830Sdim 3956243830Sdim if (DSrc == DDst) { 3957243830Sdim // Destination can be: 3958243830Sdim // %DDst = VDUPLN32d %DDst, Lane, 14, %noreg (; implicits) 3959243830Sdim MI->setDesc(get(ARM::VDUPLN32d)); 3960243830Sdim MIB.addReg(DDst, RegState::Define) 3961243830Sdim .addReg(DDst, getUndefRegState(!MI->readsRegister(DDst, TRI))) 3962243830Sdim .addImm(SrcLane); 3963243830Sdim AddDefaultPred(MIB); 3964243830Sdim 3965243830Sdim // Neither the source or the destination are naturally represented any 3966243830Sdim // more, so add them in manually. 3967243830Sdim MIB.addReg(DstReg, RegState::Implicit | RegState::Define); 3968243830Sdim MIB.addReg(SrcReg, RegState::Implicit); 3969243830Sdim if (ImplicitSReg != 0) 3970243830Sdim MIB.addReg(ImplicitSReg, RegState::Implicit); 3971243830Sdim break; 3972239462Sdim } 3973239462Sdim 3974243830Sdim // In general there's no single instruction that can perform an S <-> S 3975243830Sdim // move in NEON space, but a pair of VEXT instructions *can* do the 3976243830Sdim // job. It turns out that the VEXTs needed will only use DSrc once, with 3977243830Sdim // the position based purely on the combination of lane-0 and lane-1 3978243830Sdim // involved. For example 3979243830Sdim // vmov s0, s2 -> vext.32 d0, d0, d1, #1 vext.32 d0, d0, d0, #1 3980243830Sdim // vmov s1, s3 -> vext.32 d0, d1, d0, #1 vext.32 d0, d0, d0, #1 3981243830Sdim // vmov s0, s3 -> vext.32 d0, d0, d0, #1 vext.32 d0, d1, d0, #1 3982243830Sdim // vmov s1, s2 -> vext.32 d0, d0, d0, #1 vext.32 d0, d0, d1, #1 3983243830Sdim // 3984243830Sdim // Pattern of the MachineInstrs is: 3985243830Sdim // %DDst = VEXTd32 %DSrc1, %DSrc2, Lane, 14, %noreg (;implicits) 3986243830Sdim MachineInstrBuilder NewMIB; 3987243830Sdim NewMIB = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), 3988243830Sdim get(ARM::VEXTd32), DDst); 3989239462Sdim 3990243830Sdim // On the first instruction, both DSrc and DDst may be <undef> if present. 3991243830Sdim // Specifically when the original instruction didn't have them as an 3992243830Sdim // <imp-use>. 3993243830Sdim unsigned CurReg = SrcLane == 1 && DstLane == 1 ? DSrc : DDst; 3994243830Sdim bool CurUndef = !MI->readsRegister(CurReg, TRI); 3995243830Sdim NewMIB.addReg(CurReg, getUndefRegState(CurUndef)); 3996239462Sdim 3997243830Sdim CurReg = SrcLane == 0 && DstLane == 0 ? DSrc : DDst; 3998243830Sdim CurUndef = !MI->readsRegister(CurReg, TRI); 3999243830Sdim NewMIB.addReg(CurReg, getUndefRegState(CurUndef)); 4000239462Sdim 4001243830Sdim NewMIB.addImm(1); 4002243830Sdim AddDefaultPred(NewMIB); 4003239462Sdim 4004243830Sdim if (SrcLane == DstLane) 4005243830Sdim NewMIB.addReg(SrcReg, RegState::Implicit); 4006243830Sdim 4007243830Sdim MI->setDesc(get(ARM::VEXTd32)); 4008243830Sdim MIB.addReg(DDst, RegState::Define); 4009243830Sdim 4010243830Sdim // On the second instruction, DDst has definitely been defined above, so 4011243830Sdim // it is not <undef>. DSrc, if present, can be <undef> as above. 4012243830Sdim CurReg = SrcLane == 1 && DstLane == 0 ? DSrc : DDst; 4013243830Sdim CurUndef = CurReg == DSrc && !MI->readsRegister(CurReg, TRI); 4014243830Sdim MIB.addReg(CurReg, getUndefRegState(CurUndef)); 4015243830Sdim 4016243830Sdim CurReg = SrcLane == 0 && DstLane == 1 ? DSrc : DDst; 4017243830Sdim CurUndef = CurReg == DSrc && !MI->readsRegister(CurReg, TRI); 4018243830Sdim MIB.addReg(CurReg, getUndefRegState(CurUndef)); 4019243830Sdim 4020243830Sdim MIB.addImm(1); 4021239462Sdim AddDefaultPred(MIB); 4022243830Sdim 4023243830Sdim if (SrcLane != DstLane) 4024243830Sdim MIB.addReg(SrcReg, RegState::Implicit); 4025243830Sdim 4026243830Sdim // As before, the original destination is no longer represented, add it 4027243830Sdim // implicitly. 4028243830Sdim MIB.addReg(DstReg, RegState::Define | RegState::Implicit); 4029243830Sdim if (ImplicitSReg != 0) 4030243830Sdim MIB.addReg(ImplicitSReg, RegState::Implicit); 4031239462Sdim break; 4032243830Sdim } 4033239462Sdim } 4034239462Sdim 4035226633Sdim} 4036234353Sdim 4037243830Sdim//===----------------------------------------------------------------------===// 4038243830Sdim// Partial register updates 4039243830Sdim//===----------------------------------------------------------------------===// 4040243830Sdim// 4041243830Sdim// Swift renames NEON registers with 64-bit granularity. That means any 4042243830Sdim// instruction writing an S-reg implicitly reads the containing D-reg. The 4043243830Sdim// problem is mostly avoided by translating f32 operations to v2f32 operations 4044243830Sdim// on D-registers, but f32 loads are still a problem. 4045243830Sdim// 4046243830Sdim// These instructions can load an f32 into a NEON register: 4047243830Sdim// 4048243830Sdim// VLDRS - Only writes S, partial D update. 4049243830Sdim// VLD1LNd32 - Writes all D-regs, explicit partial D update, 2 uops. 4050243830Sdim// VLD1DUPd32 - Writes all D-regs, no partial reg update, 2 uops. 4051243830Sdim// 4052243830Sdim// FCONSTD can be used as a dependency-breaking instruction. 4053243830Sdimunsigned ARMBaseInstrInfo:: 4054243830SdimgetPartialRegUpdateClearance(const MachineInstr *MI, 4055243830Sdim unsigned OpNum, 4056243830Sdim const TargetRegisterInfo *TRI) const { 4057249423Sdim if (!SwiftPartialUpdateClearance || 4058249423Sdim !(Subtarget.isSwift() || Subtarget.isCortexA15())) 4059243830Sdim return 0; 4060243830Sdim 4061243830Sdim assert(TRI && "Need TRI instance"); 4062243830Sdim 4063243830Sdim const MachineOperand &MO = MI->getOperand(OpNum); 4064243830Sdim if (MO.readsReg()) 4065243830Sdim return 0; 4066243830Sdim unsigned Reg = MO.getReg(); 4067243830Sdim int UseOp = -1; 4068243830Sdim 4069243830Sdim switch(MI->getOpcode()) { 4070243830Sdim // Normal instructions writing only an S-register. 4071243830Sdim case ARM::VLDRS: 4072243830Sdim case ARM::FCONSTS: 4073243830Sdim case ARM::VMOVSR: 4074243830Sdim case ARM::VMOVv8i8: 4075243830Sdim case ARM::VMOVv4i16: 4076243830Sdim case ARM::VMOVv2i32: 4077243830Sdim case ARM::VMOVv2f32: 4078243830Sdim case ARM::VMOVv1i64: 4079243830Sdim UseOp = MI->findRegisterUseOperandIdx(Reg, false, TRI); 4080243830Sdim break; 4081243830Sdim 4082243830Sdim // Explicitly reads the dependency. 4083243830Sdim case ARM::VLD1LNd32: 4084249423Sdim UseOp = 3; 4085243830Sdim break; 4086243830Sdim default: 4087243830Sdim return 0; 4088243830Sdim } 4089243830Sdim 4090243830Sdim // If this instruction actually reads a value from Reg, there is no unwanted 4091243830Sdim // dependency. 4092243830Sdim if (UseOp != -1 && MI->getOperand(UseOp).readsReg()) 4093243830Sdim return 0; 4094243830Sdim 4095243830Sdim // We must be able to clobber the whole D-reg. 4096243830Sdim if (TargetRegisterInfo::isVirtualRegister(Reg)) { 4097243830Sdim // Virtual register must be a foo:ssub_0<def,undef> operand. 4098243830Sdim if (!MO.getSubReg() || MI->readsVirtualRegister(Reg)) 4099243830Sdim return 0; 4100243830Sdim } else if (ARM::SPRRegClass.contains(Reg)) { 4101243830Sdim // Physical register: MI must define the full D-reg. 4102243830Sdim unsigned DReg = TRI->getMatchingSuperReg(Reg, ARM::ssub_0, 4103243830Sdim &ARM::DPRRegClass); 4104243830Sdim if (!DReg || !MI->definesRegister(DReg, TRI)) 4105243830Sdim return 0; 4106243830Sdim } 4107243830Sdim 4108243830Sdim // MI has an unwanted D-register dependency. 4109243830Sdim // Avoid defs in the previous N instructrions. 4110243830Sdim return SwiftPartialUpdateClearance; 4111243830Sdim} 4112243830Sdim 4113243830Sdim// Break a partial register dependency after getPartialRegUpdateClearance 4114243830Sdim// returned non-zero. 4115243830Sdimvoid ARMBaseInstrInfo:: 4116243830SdimbreakPartialRegDependency(MachineBasicBlock::iterator MI, 4117243830Sdim unsigned OpNum, 4118243830Sdim const TargetRegisterInfo *TRI) const { 4119243830Sdim assert(MI && OpNum < MI->getDesc().getNumDefs() && "OpNum is not a def"); 4120243830Sdim assert(TRI && "Need TRI instance"); 4121243830Sdim 4122243830Sdim const MachineOperand &MO = MI->getOperand(OpNum); 4123243830Sdim unsigned Reg = MO.getReg(); 4124243830Sdim assert(TargetRegisterInfo::isPhysicalRegister(Reg) && 4125243830Sdim "Can't break virtual register dependencies."); 4126243830Sdim unsigned DReg = Reg; 4127243830Sdim 4128243830Sdim // If MI defines an S-reg, find the corresponding D super-register. 4129243830Sdim if (ARM::SPRRegClass.contains(Reg)) { 4130243830Sdim DReg = ARM::D0 + (Reg - ARM::S0) / 2; 4131243830Sdim assert(TRI->isSuperRegister(Reg, DReg) && "Register enums broken"); 4132243830Sdim } 4133243830Sdim 4134243830Sdim assert(ARM::DPRRegClass.contains(DReg) && "Can only break D-reg deps"); 4135243830Sdim assert(MI->definesRegister(DReg, TRI) && "MI doesn't clobber full D-reg"); 4136243830Sdim 4137243830Sdim // FIXME: In some cases, VLDRS can be changed to a VLD1DUPd32 which defines 4138243830Sdim // the full D-register by loading the same value to both lanes. The 4139243830Sdim // instruction is micro-coded with 2 uops, so don't do this until we can 4140243830Sdim // properly schedule micro-coded instuctions. The dispatcher stalls cause 4141243830Sdim // too big regressions. 4142243830Sdim 4143243830Sdim // Insert the dependency-breaking FCONSTD before MI. 4144243830Sdim // 96 is the encoding of 0.5, but the actual value doesn't matter here. 4145243830Sdim AddDefaultPred(BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), 4146243830Sdim get(ARM::FCONSTD), DReg).addImm(96)); 4147243830Sdim MI->addRegisterKilled(DReg, TRI, true); 4148243830Sdim} 4149243830Sdim 4150234353Sdimbool ARMBaseInstrInfo::hasNOP() const { 4151234353Sdim return (Subtarget.getFeatureBits() & ARM::HasV6T2Ops) != 0; 4152234353Sdim} 4153249423Sdim 4154249423Sdimbool ARMBaseInstrInfo::isSwiftFastImmShift(const MachineInstr *MI) const { 4155249423Sdim unsigned ShOpVal = MI->getOperand(3).getImm(); 4156249423Sdim unsigned ShImm = ARM_AM::getSORegOffset(ShOpVal); 4157249423Sdim // Swift supports faster shifts for: lsl 2, lsl 1, and lsr 1. 4158249423Sdim if ((ShImm == 1 && ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsr) || 4159249423Sdim ((ShImm == 1 || ShImm == 2) && 4160249423Sdim ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsl)) 4161249423Sdim return true; 4162249423Sdim 4163249423Sdim return false; 4164249423Sdim} 4165