1249259Sdim//===-- R600InstrInfo.cpp - R600 Instruction Information ------------------===// 2249259Sdim// 3249259Sdim// The LLVM Compiler Infrastructure 4249259Sdim// 5249259Sdim// This file is distributed under the University of Illinois Open Source 6249259Sdim// License. See LICENSE.TXT for details. 7249259Sdim// 8249259Sdim//===----------------------------------------------------------------------===// 9249259Sdim// 10249259Sdim/// \file 11249259Sdim/// \brief R600 Implementation of TargetInstrInfo. 12249259Sdim// 13249259Sdim//===----------------------------------------------------------------------===// 14249259Sdim 15249259Sdim#include "R600InstrInfo.h" 16251662Sdim#include "AMDGPU.h" 17249259Sdim#include "AMDGPUSubtarget.h" 18249259Sdim#include "AMDGPUTargetMachine.h" 19249259Sdim#include "R600Defines.h" 20249259Sdim#include "R600MachineFunctionInfo.h" 21249259Sdim#include "R600RegisterInfo.h" 22263508Sdim#include "llvm/CodeGen/MachineFrameInfo.h" 23249259Sdim#include "llvm/CodeGen/MachineInstrBuilder.h" 24249259Sdim#include "llvm/CodeGen/MachineRegisterInfo.h" 25249259Sdim 26263508Sdim#define GET_INSTRINFO_CTOR_DTOR 27249259Sdim#include "AMDGPUGenDFAPacketizer.inc" 28249259Sdim 29249259Sdimusing namespace llvm; 30249259Sdim 31249259SdimR600InstrInfo::R600InstrInfo(AMDGPUTargetMachine &tm) 32249259Sdim : AMDGPUInstrInfo(tm), 33263508Sdim RI(tm), 34251662Sdim ST(tm.getSubtarget<AMDGPUSubtarget>()) 35249259Sdim { } 36249259Sdim 37249259Sdimconst R600RegisterInfo &R600InstrInfo::getRegisterInfo() const { 38249259Sdim return RI; 39249259Sdim} 40249259Sdim 41249259Sdimbool R600InstrInfo::isTrig(const MachineInstr &MI) const { 42249259Sdim return get(MI.getOpcode()).TSFlags & R600_InstFlag::TRIG; 43249259Sdim} 44249259Sdim 45249259Sdimbool R600InstrInfo::isVector(const MachineInstr &MI) const { 46249259Sdim return get(MI.getOpcode()).TSFlags & R600_InstFlag::VECTOR; 47249259Sdim} 48249259Sdim 49249259Sdimvoid 50249259SdimR600InstrInfo::copyPhysReg(MachineBasicBlock &MBB, 51249259Sdim MachineBasicBlock::iterator MI, DebugLoc DL, 52249259Sdim unsigned DestReg, unsigned SrcReg, 53249259Sdim bool KillSrc) const { 54263508Sdim unsigned VectorComponents = 0; 55263508Sdim if (AMDGPU::R600_Reg128RegClass.contains(DestReg) && 56263508Sdim AMDGPU::R600_Reg128RegClass.contains(SrcReg)) { 57263508Sdim VectorComponents = 4; 58263508Sdim } else if(AMDGPU::R600_Reg64RegClass.contains(DestReg) && 59263508Sdim AMDGPU::R600_Reg64RegClass.contains(SrcReg)) { 60263508Sdim VectorComponents = 2; 61263508Sdim } 62263508Sdim 63263508Sdim if (VectorComponents > 0) { 64263508Sdim for (unsigned I = 0; I < VectorComponents; I++) { 65249259Sdim unsigned SubRegIndex = RI.getSubRegFromChannel(I); 66249259Sdim buildDefaultInstruction(MBB, MI, AMDGPU::MOV, 67249259Sdim RI.getSubReg(DestReg, SubRegIndex), 68249259Sdim RI.getSubReg(SrcReg, SubRegIndex)) 69249259Sdim .addReg(DestReg, 70249259Sdim RegState::Define | RegState::Implicit); 71249259Sdim } 72249259Sdim } else { 73249259Sdim MachineInstr *NewMI = buildDefaultInstruction(MBB, MI, AMDGPU::MOV, 74249259Sdim DestReg, SrcReg); 75263508Sdim NewMI->getOperand(getOperandIdx(*NewMI, AMDGPU::OpName::src0)) 76249259Sdim .setIsKill(KillSrc); 77249259Sdim } 78249259Sdim} 79249259Sdim 80263508Sdim/// \returns true if \p MBBI can be moved into a new basic. 81263508Sdimbool R600InstrInfo::isLegalToSplitMBBAt(MachineBasicBlock &MBB, 82263508Sdim MachineBasicBlock::iterator MBBI) const { 83263508Sdim for (MachineInstr::const_mop_iterator I = MBBI->operands_begin(), 84263508Sdim E = MBBI->operands_end(); I != E; ++I) { 85263508Sdim if (I->isReg() && !TargetRegisterInfo::isVirtualRegister(I->getReg()) && 86263508Sdim I->isUse() && RI.isPhysRegLiveAcrossClauses(I->getReg())) 87263508Sdim return false; 88263508Sdim } 89263508Sdim return true; 90249259Sdim} 91249259Sdim 92249259Sdimunsigned R600InstrInfo::getIEQOpcode() const { 93249259Sdim return AMDGPU::SETE_INT; 94249259Sdim} 95249259Sdim 96249259Sdimbool R600InstrInfo::isMov(unsigned Opcode) const { 97249259Sdim 98249259Sdim 99249259Sdim switch(Opcode) { 100249259Sdim default: return false; 101249259Sdim case AMDGPU::MOV: 102249259Sdim case AMDGPU::MOV_IMM_F32: 103249259Sdim case AMDGPU::MOV_IMM_I32: 104249259Sdim return true; 105249259Sdim } 106249259Sdim} 107249259Sdim 108249259Sdim// Some instructions act as place holders to emulate operations that the GPU 109249259Sdim// hardware does automatically. This function can be used to check if 110249259Sdim// an opcode falls into this category. 111249259Sdimbool R600InstrInfo::isPlaceHolderOpcode(unsigned Opcode) const { 112249259Sdim switch (Opcode) { 113249259Sdim default: return false; 114249259Sdim case AMDGPU::RETURN: 115249259Sdim return true; 116249259Sdim } 117249259Sdim} 118249259Sdim 119249259Sdimbool R600InstrInfo::isReductionOp(unsigned Opcode) const { 120263508Sdim return false; 121249259Sdim} 122249259Sdim 123249259Sdimbool R600InstrInfo::isCubeOp(unsigned Opcode) const { 124249259Sdim switch(Opcode) { 125249259Sdim default: return false; 126249259Sdim case AMDGPU::CUBE_r600_pseudo: 127249259Sdim case AMDGPU::CUBE_r600_real: 128249259Sdim case AMDGPU::CUBE_eg_pseudo: 129249259Sdim case AMDGPU::CUBE_eg_real: 130249259Sdim return true; 131249259Sdim } 132249259Sdim} 133249259Sdim 134249259Sdimbool R600InstrInfo::isALUInstr(unsigned Opcode) const { 135249259Sdim unsigned TargetFlags = get(Opcode).TSFlags; 136249259Sdim 137263508Sdim return (TargetFlags & R600_InstFlag::ALU_INST); 138263508Sdim} 139263508Sdim 140263508Sdimbool R600InstrInfo::hasInstrModifiers(unsigned Opcode) const { 141263508Sdim unsigned TargetFlags = get(Opcode).TSFlags; 142263508Sdim 143249259Sdim return ((TargetFlags & R600_InstFlag::OP1) | 144249259Sdim (TargetFlags & R600_InstFlag::OP2) | 145249259Sdim (TargetFlags & R600_InstFlag::OP3)); 146249259Sdim} 147249259Sdim 148263508Sdimbool R600InstrInfo::isLDSInstr(unsigned Opcode) const { 149263508Sdim unsigned TargetFlags = get(Opcode).TSFlags; 150263508Sdim 151263508Sdim return ((TargetFlags & R600_InstFlag::LDS_1A) | 152263508Sdim (TargetFlags & R600_InstFlag::LDS_1A1D) | 153263508Sdim (TargetFlags & R600_InstFlag::LDS_1A2D)); 154263508Sdim} 155263508Sdim 156263508Sdimbool R600InstrInfo::isLDSNoRetInstr(unsigned Opcode) const { 157263508Sdim return isLDSInstr(Opcode) && getOperandIdx(Opcode, AMDGPU::OpName::dst) == -1; 158263508Sdim} 159263508Sdim 160263508Sdimbool R600InstrInfo::isLDSRetInstr(unsigned Opcode) const { 161263508Sdim return isLDSInstr(Opcode) && getOperandIdx(Opcode, AMDGPU::OpName::dst) != -1; 162263508Sdim} 163263508Sdim 164263508Sdimbool R600InstrInfo::canBeConsideredALU(const MachineInstr *MI) const { 165263508Sdim if (isALUInstr(MI->getOpcode())) 166263508Sdim return true; 167263508Sdim if (isVector(*MI) || isCubeOp(MI->getOpcode())) 168263508Sdim return true; 169263508Sdim switch (MI->getOpcode()) { 170263508Sdim case AMDGPU::PRED_X: 171263508Sdim case AMDGPU::INTERP_PAIR_XY: 172263508Sdim case AMDGPU::INTERP_PAIR_ZW: 173263508Sdim case AMDGPU::INTERP_VEC_LOAD: 174263508Sdim case AMDGPU::COPY: 175263508Sdim case AMDGPU::DOT_4: 176263508Sdim return true; 177263508Sdim default: 178263508Sdim return false; 179263508Sdim } 180263508Sdim} 181263508Sdim 182251662Sdimbool R600InstrInfo::isTransOnly(unsigned Opcode) const { 183263508Sdim if (ST.hasCaymanISA()) 184263508Sdim return false; 185263508Sdim return (get(Opcode).getSchedClass() == AMDGPU::Sched::TransALU); 186251662Sdim} 187251662Sdim 188251662Sdimbool R600InstrInfo::isTransOnly(const MachineInstr *MI) const { 189251662Sdim return isTransOnly(MI->getOpcode()); 190251662Sdim} 191251662Sdim 192263508Sdimbool R600InstrInfo::isVectorOnly(unsigned Opcode) const { 193263508Sdim return (get(Opcode).getSchedClass() == AMDGPU::Sched::VecALU); 194263508Sdim} 195263508Sdim 196263508Sdimbool R600InstrInfo::isVectorOnly(const MachineInstr *MI) const { 197263508Sdim return isVectorOnly(MI->getOpcode()); 198263508Sdim} 199263508Sdim 200263508Sdimbool R600InstrInfo::isExport(unsigned Opcode) const { 201263508Sdim return (get(Opcode).TSFlags & R600_InstFlag::IS_EXPORT); 202263508Sdim} 203263508Sdim 204251662Sdimbool R600InstrInfo::usesVertexCache(unsigned Opcode) const { 205251662Sdim return ST.hasVertexCache() && IS_VTX(get(Opcode)); 206251662Sdim} 207251662Sdim 208251662Sdimbool R600InstrInfo::usesVertexCache(const MachineInstr *MI) const { 209251662Sdim const R600MachineFunctionInfo *MFI = MI->getParent()->getParent()->getInfo<R600MachineFunctionInfo>(); 210251662Sdim return MFI->ShaderType != ShaderType::COMPUTE && usesVertexCache(MI->getOpcode()); 211251662Sdim} 212251662Sdim 213251662Sdimbool R600InstrInfo::usesTextureCache(unsigned Opcode) const { 214251662Sdim return (!ST.hasVertexCache() && IS_VTX(get(Opcode))) || IS_TEX(get(Opcode)); 215251662Sdim} 216251662Sdim 217251662Sdimbool R600InstrInfo::usesTextureCache(const MachineInstr *MI) const { 218251662Sdim const R600MachineFunctionInfo *MFI = MI->getParent()->getParent()->getInfo<R600MachineFunctionInfo>(); 219251662Sdim return (MFI->ShaderType == ShaderType::COMPUTE && usesVertexCache(MI->getOpcode())) || 220251662Sdim usesTextureCache(MI->getOpcode()); 221251662Sdim} 222251662Sdim 223263508Sdimbool R600InstrInfo::mustBeLastInClause(unsigned Opcode) const { 224263508Sdim switch (Opcode) { 225263508Sdim case AMDGPU::KILLGT: 226263508Sdim case AMDGPU::GROUP_BARRIER: 227263508Sdim return true; 228263508Sdim default: 229263508Sdim return false; 230263508Sdim } 231263508Sdim} 232263508Sdim 233263508Sdimbool R600InstrInfo::usesAddressRegister(MachineInstr *MI) const { 234263508Sdim return MI->findRegisterUseOperandIdx(AMDGPU::AR_X) != -1; 235263508Sdim} 236263508Sdim 237263508Sdimbool R600InstrInfo::definesAddressRegister(MachineInstr *MI) const { 238263508Sdim return MI->findRegisterDefOperandIdx(AMDGPU::AR_X) != -1; 239263508Sdim} 240263508Sdim 241263508Sdimbool R600InstrInfo::readsLDSSrcReg(const MachineInstr *MI) const { 242263508Sdim if (!isALUInstr(MI->getOpcode())) { 243263508Sdim return false; 244263508Sdim } 245263508Sdim for (MachineInstr::const_mop_iterator I = MI->operands_begin(), 246263508Sdim E = MI->operands_end(); I != E; ++I) { 247263508Sdim if (!I->isReg() || !I->isUse() || 248263508Sdim TargetRegisterInfo::isVirtualRegister(I->getReg())) 249263508Sdim continue; 250263508Sdim 251263508Sdim if (AMDGPU::R600_LDS_SRC_REGRegClass.contains(I->getReg())) 252263508Sdim return true; 253263508Sdim } 254263508Sdim return false; 255263508Sdim} 256263508Sdim 257263508Sdimint R600InstrInfo::getSrcIdx(unsigned Opcode, unsigned SrcNum) const { 258263508Sdim static const unsigned OpTable[] = { 259263508Sdim AMDGPU::OpName::src0, 260263508Sdim AMDGPU::OpName::src1, 261263508Sdim AMDGPU::OpName::src2 262263508Sdim }; 263263508Sdim 264263508Sdim assert (SrcNum < 3); 265263508Sdim return getOperandIdx(Opcode, OpTable[SrcNum]); 266263508Sdim} 267263508Sdim 268263508Sdim#define SRC_SEL_ROWS 11 269263508Sdimint R600InstrInfo::getSelIdx(unsigned Opcode, unsigned SrcIdx) const { 270263508Sdim static const unsigned SrcSelTable[SRC_SEL_ROWS][2] = { 271263508Sdim {AMDGPU::OpName::src0, AMDGPU::OpName::src0_sel}, 272263508Sdim {AMDGPU::OpName::src1, AMDGPU::OpName::src1_sel}, 273263508Sdim {AMDGPU::OpName::src2, AMDGPU::OpName::src2_sel}, 274263508Sdim {AMDGPU::OpName::src0_X, AMDGPU::OpName::src0_sel_X}, 275263508Sdim {AMDGPU::OpName::src0_Y, AMDGPU::OpName::src0_sel_Y}, 276263508Sdim {AMDGPU::OpName::src0_Z, AMDGPU::OpName::src0_sel_Z}, 277263508Sdim {AMDGPU::OpName::src0_W, AMDGPU::OpName::src0_sel_W}, 278263508Sdim {AMDGPU::OpName::src1_X, AMDGPU::OpName::src1_sel_X}, 279263508Sdim {AMDGPU::OpName::src1_Y, AMDGPU::OpName::src1_sel_Y}, 280263508Sdim {AMDGPU::OpName::src1_Z, AMDGPU::OpName::src1_sel_Z}, 281263508Sdim {AMDGPU::OpName::src1_W, AMDGPU::OpName::src1_sel_W} 282263508Sdim }; 283263508Sdim 284263508Sdim for (unsigned i = 0; i < SRC_SEL_ROWS; ++i) { 285263508Sdim if (getOperandIdx(Opcode, SrcSelTable[i][0]) == (int)SrcIdx) { 286263508Sdim return getOperandIdx(Opcode, SrcSelTable[i][1]); 287263508Sdim } 288263508Sdim } 289263508Sdim return -1; 290263508Sdim} 291263508Sdim#undef SRC_SEL_ROWS 292263508Sdim 293263508SdimSmallVector<std::pair<MachineOperand *, int64_t>, 3> 294263508SdimR600InstrInfo::getSrcs(MachineInstr *MI) const { 295263508Sdim SmallVector<std::pair<MachineOperand *, int64_t>, 3> Result; 296263508Sdim 297263508Sdim if (MI->getOpcode() == AMDGPU::DOT_4) { 298263508Sdim static const unsigned OpTable[8][2] = { 299263508Sdim {AMDGPU::OpName::src0_X, AMDGPU::OpName::src0_sel_X}, 300263508Sdim {AMDGPU::OpName::src0_Y, AMDGPU::OpName::src0_sel_Y}, 301263508Sdim {AMDGPU::OpName::src0_Z, AMDGPU::OpName::src0_sel_Z}, 302263508Sdim {AMDGPU::OpName::src0_W, AMDGPU::OpName::src0_sel_W}, 303263508Sdim {AMDGPU::OpName::src1_X, AMDGPU::OpName::src1_sel_X}, 304263508Sdim {AMDGPU::OpName::src1_Y, AMDGPU::OpName::src1_sel_Y}, 305263508Sdim {AMDGPU::OpName::src1_Z, AMDGPU::OpName::src1_sel_Z}, 306263508Sdim {AMDGPU::OpName::src1_W, AMDGPU::OpName::src1_sel_W}, 307263508Sdim }; 308263508Sdim 309263508Sdim for (unsigned j = 0; j < 8; j++) { 310263508Sdim MachineOperand &MO = MI->getOperand(getOperandIdx(MI->getOpcode(), 311263508Sdim OpTable[j][0])); 312263508Sdim unsigned Reg = MO.getReg(); 313263508Sdim if (Reg == AMDGPU::ALU_CONST) { 314263508Sdim unsigned Sel = MI->getOperand(getOperandIdx(MI->getOpcode(), 315263508Sdim OpTable[j][1])).getImm(); 316263508Sdim Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, Sel)); 317263508Sdim continue; 318263508Sdim } 319263508Sdim 320263508Sdim } 321263508Sdim return Result; 322263508Sdim } 323263508Sdim 324263508Sdim static const unsigned OpTable[3][2] = { 325263508Sdim {AMDGPU::OpName::src0, AMDGPU::OpName::src0_sel}, 326263508Sdim {AMDGPU::OpName::src1, AMDGPU::OpName::src1_sel}, 327263508Sdim {AMDGPU::OpName::src2, AMDGPU::OpName::src2_sel}, 328263508Sdim }; 329263508Sdim 330263508Sdim for (unsigned j = 0; j < 3; j++) { 331263508Sdim int SrcIdx = getOperandIdx(MI->getOpcode(), OpTable[j][0]); 332263508Sdim if (SrcIdx < 0) 333263508Sdim break; 334263508Sdim MachineOperand &MO = MI->getOperand(SrcIdx); 335263508Sdim unsigned Reg = MI->getOperand(SrcIdx).getReg(); 336263508Sdim if (Reg == AMDGPU::ALU_CONST) { 337263508Sdim unsigned Sel = MI->getOperand( 338263508Sdim getOperandIdx(MI->getOpcode(), OpTable[j][1])).getImm(); 339263508Sdim Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, Sel)); 340263508Sdim continue; 341263508Sdim } 342263508Sdim if (Reg == AMDGPU::ALU_LITERAL_X) { 343263508Sdim unsigned Imm = MI->getOperand( 344263508Sdim getOperandIdx(MI->getOpcode(), AMDGPU::OpName::literal)).getImm(); 345263508Sdim Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, Imm)); 346263508Sdim continue; 347263508Sdim } 348263508Sdim Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, 0)); 349263508Sdim } 350263508Sdim return Result; 351263508Sdim} 352263508Sdim 353263508Sdimstd::vector<std::pair<int, unsigned> > 354263508SdimR600InstrInfo::ExtractSrcs(MachineInstr *MI, 355263508Sdim const DenseMap<unsigned, unsigned> &PV, 356263508Sdim unsigned &ConstCount) const { 357263508Sdim ConstCount = 0; 358263508Sdim const SmallVector<std::pair<MachineOperand *, int64_t>, 3> Srcs = getSrcs(MI); 359263508Sdim const std::pair<int, unsigned> DummyPair(-1, 0); 360263508Sdim std::vector<std::pair<int, unsigned> > Result; 361263508Sdim unsigned i = 0; 362263508Sdim for (unsigned n = Srcs.size(); i < n; ++i) { 363263508Sdim unsigned Reg = Srcs[i].first->getReg(); 364263508Sdim unsigned Index = RI.getEncodingValue(Reg) & 0xff; 365263508Sdim if (Reg == AMDGPU::OQAP) { 366263508Sdim Result.push_back(std::pair<int, unsigned>(Index, 0)); 367263508Sdim } 368263508Sdim if (PV.find(Reg) != PV.end()) { 369263508Sdim // 255 is used to tells its a PS/PV reg 370263508Sdim Result.push_back(std::pair<int, unsigned>(255, 0)); 371263508Sdim continue; 372263508Sdim } 373263508Sdim if (Index > 127) { 374263508Sdim ConstCount++; 375263508Sdim Result.push_back(DummyPair); 376263508Sdim continue; 377263508Sdim } 378263508Sdim unsigned Chan = RI.getHWRegChan(Reg); 379263508Sdim Result.push_back(std::pair<int, unsigned>(Index, Chan)); 380263508Sdim } 381263508Sdim for (; i < 3; ++i) 382263508Sdim Result.push_back(DummyPair); 383263508Sdim return Result; 384263508Sdim} 385263508Sdim 386263508Sdimstatic std::vector<std::pair<int, unsigned> > 387263508SdimSwizzle(std::vector<std::pair<int, unsigned> > Src, 388263508Sdim R600InstrInfo::BankSwizzle Swz) { 389263508Sdim if (Src[0] == Src[1]) 390263508Sdim Src[1].first = -1; 391263508Sdim switch (Swz) { 392263508Sdim case R600InstrInfo::ALU_VEC_012_SCL_210: 393263508Sdim break; 394263508Sdim case R600InstrInfo::ALU_VEC_021_SCL_122: 395263508Sdim std::swap(Src[1], Src[2]); 396263508Sdim break; 397263508Sdim case R600InstrInfo::ALU_VEC_102_SCL_221: 398263508Sdim std::swap(Src[0], Src[1]); 399263508Sdim break; 400263508Sdim case R600InstrInfo::ALU_VEC_120_SCL_212: 401263508Sdim std::swap(Src[0], Src[1]); 402263508Sdim std::swap(Src[0], Src[2]); 403263508Sdim break; 404263508Sdim case R600InstrInfo::ALU_VEC_201: 405263508Sdim std::swap(Src[0], Src[2]); 406263508Sdim std::swap(Src[0], Src[1]); 407263508Sdim break; 408263508Sdim case R600InstrInfo::ALU_VEC_210: 409263508Sdim std::swap(Src[0], Src[2]); 410263508Sdim break; 411263508Sdim } 412263508Sdim return Src; 413263508Sdim} 414263508Sdim 415263508Sdimstatic unsigned 416263508SdimgetTransSwizzle(R600InstrInfo::BankSwizzle Swz, unsigned Op) { 417263508Sdim switch (Swz) { 418263508Sdim case R600InstrInfo::ALU_VEC_012_SCL_210: { 419263508Sdim unsigned Cycles[3] = { 2, 1, 0}; 420263508Sdim return Cycles[Op]; 421263508Sdim } 422263508Sdim case R600InstrInfo::ALU_VEC_021_SCL_122: { 423263508Sdim unsigned Cycles[3] = { 1, 2, 2}; 424263508Sdim return Cycles[Op]; 425263508Sdim } 426263508Sdim case R600InstrInfo::ALU_VEC_120_SCL_212: { 427263508Sdim unsigned Cycles[3] = { 2, 1, 2}; 428263508Sdim return Cycles[Op]; 429263508Sdim } 430263508Sdim case R600InstrInfo::ALU_VEC_102_SCL_221: { 431263508Sdim unsigned Cycles[3] = { 2, 2, 1}; 432263508Sdim return Cycles[Op]; 433263508Sdim } 434263508Sdim default: 435263508Sdim llvm_unreachable("Wrong Swizzle for Trans Slot"); 436263508Sdim return 0; 437263508Sdim } 438263508Sdim} 439263508Sdim 440263508Sdim/// returns how many MIs (whose inputs are represented by IGSrcs) can be packed 441263508Sdim/// in the same Instruction Group while meeting read port limitations given a 442263508Sdim/// Swz swizzle sequence. 443263508Sdimunsigned R600InstrInfo::isLegalUpTo( 444263508Sdim const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs, 445263508Sdim const std::vector<R600InstrInfo::BankSwizzle> &Swz, 446263508Sdim const std::vector<std::pair<int, unsigned> > &TransSrcs, 447263508Sdim R600InstrInfo::BankSwizzle TransSwz) const { 448263508Sdim int Vector[4][3]; 449263508Sdim memset(Vector, -1, sizeof(Vector)); 450263508Sdim for (unsigned i = 0, e = IGSrcs.size(); i < e; i++) { 451263508Sdim const std::vector<std::pair<int, unsigned> > &Srcs = 452263508Sdim Swizzle(IGSrcs[i], Swz[i]); 453263508Sdim for (unsigned j = 0; j < 3; j++) { 454263508Sdim const std::pair<int, unsigned> &Src = Srcs[j]; 455263508Sdim if (Src.first < 0 || Src.first == 255) 456263508Sdim continue; 457263508Sdim if (Src.first == GET_REG_INDEX(RI.getEncodingValue(AMDGPU::OQAP))) { 458263508Sdim if (Swz[i] != R600InstrInfo::ALU_VEC_012_SCL_210 && 459263508Sdim Swz[i] != R600InstrInfo::ALU_VEC_021_SCL_122) { 460263508Sdim // The value from output queue A (denoted by register OQAP) can 461263508Sdim // only be fetched during the first cycle. 462263508Sdim return false; 463263508Sdim } 464263508Sdim // OQAP does not count towards the normal read port restrictions 465263508Sdim continue; 466263508Sdim } 467263508Sdim if (Vector[Src.second][j] < 0) 468263508Sdim Vector[Src.second][j] = Src.first; 469263508Sdim if (Vector[Src.second][j] != Src.first) 470263508Sdim return i; 471263508Sdim } 472263508Sdim } 473263508Sdim // Now check Trans Alu 474263508Sdim for (unsigned i = 0, e = TransSrcs.size(); i < e; ++i) { 475263508Sdim const std::pair<int, unsigned> &Src = TransSrcs[i]; 476263508Sdim unsigned Cycle = getTransSwizzle(TransSwz, i); 477263508Sdim if (Src.first < 0) 478263508Sdim continue; 479263508Sdim if (Src.first == 255) 480263508Sdim continue; 481263508Sdim if (Vector[Src.second][Cycle] < 0) 482263508Sdim Vector[Src.second][Cycle] = Src.first; 483263508Sdim if (Vector[Src.second][Cycle] != Src.first) 484263508Sdim return IGSrcs.size() - 1; 485263508Sdim } 486263508Sdim return IGSrcs.size(); 487263508Sdim} 488263508Sdim 489263508Sdim/// Given a swizzle sequence SwzCandidate and an index Idx, returns the next 490263508Sdim/// (in lexicographic term) swizzle sequence assuming that all swizzles after 491263508Sdim/// Idx can be skipped 492263508Sdimstatic bool 493263508SdimNextPossibleSolution( 494263508Sdim std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate, 495263508Sdim unsigned Idx) { 496263508Sdim assert(Idx < SwzCandidate.size()); 497263508Sdim int ResetIdx = Idx; 498263508Sdim while (ResetIdx > -1 && SwzCandidate[ResetIdx] == R600InstrInfo::ALU_VEC_210) 499263508Sdim ResetIdx --; 500263508Sdim for (unsigned i = ResetIdx + 1, e = SwzCandidate.size(); i < e; i++) { 501263508Sdim SwzCandidate[i] = R600InstrInfo::ALU_VEC_012_SCL_210; 502263508Sdim } 503263508Sdim if (ResetIdx == -1) 504263508Sdim return false; 505263508Sdim int NextSwizzle = SwzCandidate[ResetIdx] + 1; 506263508Sdim SwzCandidate[ResetIdx] = (R600InstrInfo::BankSwizzle)NextSwizzle; 507263508Sdim return true; 508263508Sdim} 509263508Sdim 510263508Sdim/// Enumerate all possible Swizzle sequence to find one that can meet all 511263508Sdim/// read port requirements. 512263508Sdimbool R600InstrInfo::FindSwizzleForVectorSlot( 513263508Sdim const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs, 514263508Sdim std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate, 515263508Sdim const std::vector<std::pair<int, unsigned> > &TransSrcs, 516263508Sdim R600InstrInfo::BankSwizzle TransSwz) const { 517263508Sdim unsigned ValidUpTo = 0; 518263508Sdim do { 519263508Sdim ValidUpTo = isLegalUpTo(IGSrcs, SwzCandidate, TransSrcs, TransSwz); 520263508Sdim if (ValidUpTo == IGSrcs.size()) 521263508Sdim return true; 522263508Sdim } while (NextPossibleSolution(SwzCandidate, ValidUpTo)); 523263508Sdim return false; 524263508Sdim} 525263508Sdim 526263508Sdim/// Instructions in Trans slot can't read gpr at cycle 0 if they also read 527263508Sdim/// a const, and can't read a gpr at cycle 1 if they read 2 const. 528263508Sdimstatic bool 529263508SdimisConstCompatible(R600InstrInfo::BankSwizzle TransSwz, 530263508Sdim const std::vector<std::pair<int, unsigned> > &TransOps, 531263508Sdim unsigned ConstCount) { 532263508Sdim // TransALU can't read 3 constants 533263508Sdim if (ConstCount > 2) 534263508Sdim return false; 535263508Sdim for (unsigned i = 0, e = TransOps.size(); i < e; ++i) { 536263508Sdim const std::pair<int, unsigned> &Src = TransOps[i]; 537263508Sdim unsigned Cycle = getTransSwizzle(TransSwz, i); 538263508Sdim if (Src.first < 0) 539263508Sdim continue; 540263508Sdim if (ConstCount > 0 && Cycle == 0) 541263508Sdim return false; 542263508Sdim if (ConstCount > 1 && Cycle == 1) 543263508Sdim return false; 544263508Sdim } 545263508Sdim return true; 546263508Sdim} 547263508Sdim 548249259Sdimbool 549263508SdimR600InstrInfo::fitsReadPortLimitations(const std::vector<MachineInstr *> &IG, 550263508Sdim const DenseMap<unsigned, unsigned> &PV, 551263508Sdim std::vector<BankSwizzle> &ValidSwizzle, 552263508Sdim bool isLastAluTrans) 553263508Sdim const { 554263508Sdim //Todo : support shared src0 - src1 operand 555263508Sdim 556263508Sdim std::vector<std::vector<std::pair<int, unsigned> > > IGSrcs; 557263508Sdim ValidSwizzle.clear(); 558263508Sdim unsigned ConstCount; 559263508Sdim BankSwizzle TransBS = ALU_VEC_012_SCL_210; 560263508Sdim for (unsigned i = 0, e = IG.size(); i < e; ++i) { 561263508Sdim IGSrcs.push_back(ExtractSrcs(IG[i], PV, ConstCount)); 562263508Sdim unsigned Op = getOperandIdx(IG[i]->getOpcode(), 563263508Sdim AMDGPU::OpName::bank_swizzle); 564263508Sdim ValidSwizzle.push_back( (R600InstrInfo::BankSwizzle) 565263508Sdim IG[i]->getOperand(Op).getImm()); 566263508Sdim } 567263508Sdim std::vector<std::pair<int, unsigned> > TransOps; 568263508Sdim if (!isLastAluTrans) 569263508Sdim return FindSwizzleForVectorSlot(IGSrcs, ValidSwizzle, TransOps, TransBS); 570263508Sdim 571263508Sdim TransOps = IGSrcs.back(); 572263508Sdim IGSrcs.pop_back(); 573263508Sdim ValidSwizzle.pop_back(); 574263508Sdim 575263508Sdim static const R600InstrInfo::BankSwizzle TransSwz[] = { 576263508Sdim ALU_VEC_012_SCL_210, 577263508Sdim ALU_VEC_021_SCL_122, 578263508Sdim ALU_VEC_120_SCL_212, 579263508Sdim ALU_VEC_102_SCL_221 580263508Sdim }; 581263508Sdim for (unsigned i = 0; i < 4; i++) { 582263508Sdim TransBS = TransSwz[i]; 583263508Sdim if (!isConstCompatible(TransBS, TransOps, ConstCount)) 584263508Sdim continue; 585263508Sdim bool Result = FindSwizzleForVectorSlot(IGSrcs, ValidSwizzle, TransOps, 586263508Sdim TransBS); 587263508Sdim if (Result) { 588263508Sdim ValidSwizzle.push_back(TransBS); 589263508Sdim return true; 590263508Sdim } 591263508Sdim } 592263508Sdim 593263508Sdim return false; 594263508Sdim} 595263508Sdim 596263508Sdim 597263508Sdimbool 598249259SdimR600InstrInfo::fitsConstReadLimitations(const std::vector<unsigned> &Consts) 599249259Sdim const { 600249259Sdim assert (Consts.size() <= 12 && "Too many operands in instructions group"); 601249259Sdim unsigned Pair1 = 0, Pair2 = 0; 602249259Sdim for (unsigned i = 0, n = Consts.size(); i < n; ++i) { 603249259Sdim unsigned ReadConstHalf = Consts[i] & 2; 604249259Sdim unsigned ReadConstIndex = Consts[i] & (~3); 605249259Sdim unsigned ReadHalfConst = ReadConstIndex | ReadConstHalf; 606249259Sdim if (!Pair1) { 607249259Sdim Pair1 = ReadHalfConst; 608249259Sdim continue; 609249259Sdim } 610249259Sdim if (Pair1 == ReadHalfConst) 611249259Sdim continue; 612249259Sdim if (!Pair2) { 613249259Sdim Pair2 = ReadHalfConst; 614249259Sdim continue; 615249259Sdim } 616249259Sdim if (Pair2 != ReadHalfConst) 617249259Sdim return false; 618249259Sdim } 619249259Sdim return true; 620249259Sdim} 621249259Sdim 622249259Sdimbool 623263508SdimR600InstrInfo::fitsConstReadLimitations(const std::vector<MachineInstr *> &MIs) 624263508Sdim const { 625249259Sdim std::vector<unsigned> Consts; 626263508Sdim SmallSet<int64_t, 4> Literals; 627249259Sdim for (unsigned i = 0, n = MIs.size(); i < n; i++) { 628263508Sdim MachineInstr *MI = MIs[i]; 629249259Sdim if (!isALUInstr(MI->getOpcode())) 630249259Sdim continue; 631249259Sdim 632263508Sdim const SmallVectorImpl<std::pair<MachineOperand *, int64_t> > &Srcs = 633263508Sdim getSrcs(MI); 634263508Sdim 635263508Sdim for (unsigned j = 0, e = Srcs.size(); j < e; j++) { 636263508Sdim std::pair<MachineOperand *, unsigned> Src = Srcs[j]; 637263508Sdim if (Src.first->getReg() == AMDGPU::ALU_LITERAL_X) 638263508Sdim Literals.insert(Src.second); 639263508Sdim if (Literals.size() > 4) 640263508Sdim return false; 641263508Sdim if (Src.first->getReg() == AMDGPU::ALU_CONST) 642263508Sdim Consts.push_back(Src.second); 643263508Sdim if (AMDGPU::R600_KC0RegClass.contains(Src.first->getReg()) || 644263508Sdim AMDGPU::R600_KC1RegClass.contains(Src.first->getReg())) { 645263508Sdim unsigned Index = RI.getEncodingValue(Src.first->getReg()) & 0xff; 646263508Sdim unsigned Chan = RI.getHWRegChan(Src.first->getReg()); 647251662Sdim Consts.push_back((Index << 2) | Chan); 648251662Sdim } 649249259Sdim } 650249259Sdim } 651249259Sdim return fitsConstReadLimitations(Consts); 652249259Sdim} 653249259Sdim 654249259SdimDFAPacketizer *R600InstrInfo::CreateTargetScheduleState(const TargetMachine *TM, 655249259Sdim const ScheduleDAG *DAG) const { 656249259Sdim const InstrItineraryData *II = TM->getInstrItineraryData(); 657249259Sdim return TM->getSubtarget<AMDGPUSubtarget>().createDFAPacketizer(II); 658249259Sdim} 659249259Sdim 660249259Sdimstatic bool 661249259SdimisPredicateSetter(unsigned Opcode) { 662249259Sdim switch (Opcode) { 663249259Sdim case AMDGPU::PRED_X: 664249259Sdim return true; 665249259Sdim default: 666249259Sdim return false; 667249259Sdim } 668249259Sdim} 669249259Sdim 670249259Sdimstatic MachineInstr * 671249259SdimfindFirstPredicateSetterFrom(MachineBasicBlock &MBB, 672249259Sdim MachineBasicBlock::iterator I) { 673249259Sdim while (I != MBB.begin()) { 674249259Sdim --I; 675249259Sdim MachineInstr *MI = I; 676249259Sdim if (isPredicateSetter(MI->getOpcode())) 677249259Sdim return MI; 678249259Sdim } 679249259Sdim 680249259Sdim return NULL; 681249259Sdim} 682249259Sdim 683249259Sdimstatic 684249259Sdimbool isJump(unsigned Opcode) { 685249259Sdim return Opcode == AMDGPU::JUMP || Opcode == AMDGPU::JUMP_COND; 686249259Sdim} 687249259Sdim 688263508Sdimstatic bool isBranch(unsigned Opcode) { 689263508Sdim return Opcode == AMDGPU::BRANCH || Opcode == AMDGPU::BRANCH_COND_i32 || 690263508Sdim Opcode == AMDGPU::BRANCH_COND_f32; 691263508Sdim} 692263508Sdim 693249259Sdimbool 694249259SdimR600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, 695249259Sdim MachineBasicBlock *&TBB, 696249259Sdim MachineBasicBlock *&FBB, 697249259Sdim SmallVectorImpl<MachineOperand> &Cond, 698249259Sdim bool AllowModify) const { 699249259Sdim // Most of the following comes from the ARM implementation of AnalyzeBranch 700249259Sdim 701249259Sdim // If the block has no terminators, it just falls into the block after it. 702249259Sdim MachineBasicBlock::iterator I = MBB.end(); 703249259Sdim if (I == MBB.begin()) 704249259Sdim return false; 705249259Sdim --I; 706249259Sdim while (I->isDebugValue()) { 707249259Sdim if (I == MBB.begin()) 708249259Sdim return false; 709249259Sdim --I; 710249259Sdim } 711263508Sdim // AMDGPU::BRANCH* instructions are only available after isel and are not 712263508Sdim // handled 713263508Sdim if (isBranch(I->getOpcode())) 714263508Sdim return true; 715249259Sdim if (!isJump(static_cast<MachineInstr *>(I)->getOpcode())) { 716249259Sdim return false; 717249259Sdim } 718249259Sdim 719266715Sdim // Remove successive JUMP 720266715Sdim while (I != MBB.begin() && llvm::prior(I)->getOpcode() == AMDGPU::JUMP) { 721266715Sdim MachineBasicBlock::iterator PriorI = llvm::prior(I); 722266715Sdim if (AllowModify) 723266715Sdim I->removeFromParent(); 724266715Sdim I = PriorI; 725266715Sdim } 726249259Sdim MachineInstr *LastInst = I; 727249259Sdim 728249259Sdim // If there is only one terminator instruction, process it. 729249259Sdim unsigned LastOpc = LastInst->getOpcode(); 730249259Sdim if (I == MBB.begin() || 731249259Sdim !isJump(static_cast<MachineInstr *>(--I)->getOpcode())) { 732249259Sdim if (LastOpc == AMDGPU::JUMP) { 733249259Sdim TBB = LastInst->getOperand(0).getMBB(); 734249259Sdim return false; 735249259Sdim } else if (LastOpc == AMDGPU::JUMP_COND) { 736249259Sdim MachineInstr *predSet = I; 737249259Sdim while (!isPredicateSetter(predSet->getOpcode())) { 738249259Sdim predSet = --I; 739249259Sdim } 740249259Sdim TBB = LastInst->getOperand(0).getMBB(); 741249259Sdim Cond.push_back(predSet->getOperand(1)); 742249259Sdim Cond.push_back(predSet->getOperand(2)); 743249259Sdim Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false)); 744249259Sdim return false; 745249259Sdim } 746249259Sdim return true; // Can't handle indirect branch. 747249259Sdim } 748249259Sdim 749249259Sdim // Get the instruction before it if it is a terminator. 750249259Sdim MachineInstr *SecondLastInst = I; 751249259Sdim unsigned SecondLastOpc = SecondLastInst->getOpcode(); 752249259Sdim 753249259Sdim // If the block ends with a B and a Bcc, handle it. 754249259Sdim if (SecondLastOpc == AMDGPU::JUMP_COND && LastOpc == AMDGPU::JUMP) { 755249259Sdim MachineInstr *predSet = --I; 756249259Sdim while (!isPredicateSetter(predSet->getOpcode())) { 757249259Sdim predSet = --I; 758249259Sdim } 759249259Sdim TBB = SecondLastInst->getOperand(0).getMBB(); 760249259Sdim FBB = LastInst->getOperand(0).getMBB(); 761249259Sdim Cond.push_back(predSet->getOperand(1)); 762249259Sdim Cond.push_back(predSet->getOperand(2)); 763249259Sdim Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false)); 764249259Sdim return false; 765249259Sdim } 766249259Sdim 767249259Sdim // Otherwise, can't handle this. 768249259Sdim return true; 769249259Sdim} 770249259Sdim 771249259Sdimint R600InstrInfo::getBranchInstr(const MachineOperand &op) const { 772249259Sdim const MachineInstr *MI = op.getParent(); 773249259Sdim 774249259Sdim switch (MI->getDesc().OpInfo->RegClass) { 775249259Sdim default: // FIXME: fallthrough?? 776249259Sdim case AMDGPU::GPRI32RegClassID: return AMDGPU::BRANCH_COND_i32; 777249259Sdim case AMDGPU::GPRF32RegClassID: return AMDGPU::BRANCH_COND_f32; 778249259Sdim }; 779249259Sdim} 780249259Sdim 781263508Sdimstatic 782263508SdimMachineBasicBlock::iterator FindLastAluClause(MachineBasicBlock &MBB) { 783263508Sdim for (MachineBasicBlock::reverse_iterator It = MBB.rbegin(), E = MBB.rend(); 784263508Sdim It != E; ++It) { 785263508Sdim if (It->getOpcode() == AMDGPU::CF_ALU || 786263508Sdim It->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE) 787263508Sdim return llvm::prior(It.base()); 788263508Sdim } 789263508Sdim return MBB.end(); 790263508Sdim} 791263508Sdim 792249259Sdimunsigned 793249259SdimR600InstrInfo::InsertBranch(MachineBasicBlock &MBB, 794249259Sdim MachineBasicBlock *TBB, 795249259Sdim MachineBasicBlock *FBB, 796249259Sdim const SmallVectorImpl<MachineOperand> &Cond, 797249259Sdim DebugLoc DL) const { 798249259Sdim assert(TBB && "InsertBranch must not be told to insert a fallthrough"); 799249259Sdim 800249259Sdim if (FBB == 0) { 801249259Sdim if (Cond.empty()) { 802249259Sdim BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(TBB); 803249259Sdim return 1; 804249259Sdim } else { 805249259Sdim MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end()); 806249259Sdim assert(PredSet && "No previous predicate !"); 807249259Sdim addFlag(PredSet, 0, MO_FLAG_PUSH); 808249259Sdim PredSet->getOperand(2).setImm(Cond[1].getImm()); 809249259Sdim 810249259Sdim BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND)) 811249259Sdim .addMBB(TBB) 812249259Sdim .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); 813263508Sdim MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB); 814263508Sdim if (CfAlu == MBB.end()) 815263508Sdim return 1; 816263508Sdim assert (CfAlu->getOpcode() == AMDGPU::CF_ALU); 817263508Sdim CfAlu->setDesc(get(AMDGPU::CF_ALU_PUSH_BEFORE)); 818249259Sdim return 1; 819249259Sdim } 820249259Sdim } else { 821249259Sdim MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end()); 822249259Sdim assert(PredSet && "No previous predicate !"); 823249259Sdim addFlag(PredSet, 0, MO_FLAG_PUSH); 824249259Sdim PredSet->getOperand(2).setImm(Cond[1].getImm()); 825249259Sdim BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND)) 826249259Sdim .addMBB(TBB) 827249259Sdim .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); 828249259Sdim BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(FBB); 829263508Sdim MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB); 830263508Sdim if (CfAlu == MBB.end()) 831263508Sdim return 2; 832263508Sdim assert (CfAlu->getOpcode() == AMDGPU::CF_ALU); 833263508Sdim CfAlu->setDesc(get(AMDGPU::CF_ALU_PUSH_BEFORE)); 834249259Sdim return 2; 835249259Sdim } 836249259Sdim} 837249259Sdim 838249259Sdimunsigned 839249259SdimR600InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { 840249259Sdim 841249259Sdim // Note : we leave PRED* instructions there. 842249259Sdim // They may be needed when predicating instructions. 843249259Sdim 844249259Sdim MachineBasicBlock::iterator I = MBB.end(); 845249259Sdim 846249259Sdim if (I == MBB.begin()) { 847249259Sdim return 0; 848249259Sdim } 849249259Sdim --I; 850249259Sdim switch (I->getOpcode()) { 851249259Sdim default: 852249259Sdim return 0; 853249259Sdim case AMDGPU::JUMP_COND: { 854249259Sdim MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I); 855249259Sdim clearFlag(predSet, 0, MO_FLAG_PUSH); 856249259Sdim I->eraseFromParent(); 857263508Sdim MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB); 858263508Sdim if (CfAlu == MBB.end()) 859263508Sdim break; 860263508Sdim assert (CfAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE); 861263508Sdim CfAlu->setDesc(get(AMDGPU::CF_ALU)); 862249259Sdim break; 863249259Sdim } 864249259Sdim case AMDGPU::JUMP: 865249259Sdim I->eraseFromParent(); 866249259Sdim break; 867249259Sdim } 868249259Sdim I = MBB.end(); 869249259Sdim 870249259Sdim if (I == MBB.begin()) { 871249259Sdim return 1; 872249259Sdim } 873249259Sdim --I; 874249259Sdim switch (I->getOpcode()) { 875249259Sdim // FIXME: only one case?? 876249259Sdim default: 877249259Sdim return 1; 878249259Sdim case AMDGPU::JUMP_COND: { 879249259Sdim MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I); 880249259Sdim clearFlag(predSet, 0, MO_FLAG_PUSH); 881249259Sdim I->eraseFromParent(); 882263508Sdim MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB); 883263508Sdim if (CfAlu == MBB.end()) 884263508Sdim break; 885263508Sdim assert (CfAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE); 886263508Sdim CfAlu->setDesc(get(AMDGPU::CF_ALU)); 887249259Sdim break; 888249259Sdim } 889249259Sdim case AMDGPU::JUMP: 890249259Sdim I->eraseFromParent(); 891249259Sdim break; 892249259Sdim } 893249259Sdim return 2; 894249259Sdim} 895249259Sdim 896249259Sdimbool 897249259SdimR600InstrInfo::isPredicated(const MachineInstr *MI) const { 898249259Sdim int idx = MI->findFirstPredOperandIdx(); 899249259Sdim if (idx < 0) 900249259Sdim return false; 901249259Sdim 902249259Sdim unsigned Reg = MI->getOperand(idx).getReg(); 903249259Sdim switch (Reg) { 904249259Sdim default: return false; 905249259Sdim case AMDGPU::PRED_SEL_ONE: 906249259Sdim case AMDGPU::PRED_SEL_ZERO: 907249259Sdim case AMDGPU::PREDICATE_BIT: 908249259Sdim return true; 909249259Sdim } 910249259Sdim} 911249259Sdim 912249259Sdimbool 913249259SdimR600InstrInfo::isPredicable(MachineInstr *MI) const { 914249259Sdim // XXX: KILL* instructions can be predicated, but they must be the last 915249259Sdim // instruction in a clause, so this means any instructions after them cannot 916249259Sdim // be predicated. Until we have proper support for instruction clauses in the 917249259Sdim // backend, we will mark KILL* instructions as unpredicable. 918249259Sdim 919249259Sdim if (MI->getOpcode() == AMDGPU::KILLGT) { 920249259Sdim return false; 921263508Sdim } else if (MI->getOpcode() == AMDGPU::CF_ALU) { 922263508Sdim // If the clause start in the middle of MBB then the MBB has more 923263508Sdim // than a single clause, unable to predicate several clauses. 924263508Sdim if (MI->getParent()->begin() != MachineBasicBlock::iterator(MI)) 925263508Sdim return false; 926263508Sdim // TODO: We don't support KC merging atm 927263508Sdim if (MI->getOperand(3).getImm() != 0 || MI->getOperand(4).getImm() != 0) 928263508Sdim return false; 929263508Sdim return true; 930249259Sdim } else if (isVector(*MI)) { 931249259Sdim return false; 932249259Sdim } else { 933249259Sdim return AMDGPUInstrInfo::isPredicable(MI); 934249259Sdim } 935249259Sdim} 936249259Sdim 937249259Sdim 938249259Sdimbool 939249259SdimR600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB, 940249259Sdim unsigned NumCyles, 941249259Sdim unsigned ExtraPredCycles, 942249259Sdim const BranchProbability &Probability) const{ 943249259Sdim return true; 944249259Sdim} 945249259Sdim 946249259Sdimbool 947249259SdimR600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB, 948249259Sdim unsigned NumTCycles, 949249259Sdim unsigned ExtraTCycles, 950249259Sdim MachineBasicBlock &FMBB, 951249259Sdim unsigned NumFCycles, 952249259Sdim unsigned ExtraFCycles, 953249259Sdim const BranchProbability &Probability) const { 954249259Sdim return true; 955249259Sdim} 956249259Sdim 957249259Sdimbool 958249259SdimR600InstrInfo::isProfitableToDupForIfCvt(MachineBasicBlock &MBB, 959249259Sdim unsigned NumCyles, 960249259Sdim const BranchProbability &Probability) 961249259Sdim const { 962249259Sdim return true; 963249259Sdim} 964249259Sdim 965249259Sdimbool 966249259SdimR600InstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB, 967249259Sdim MachineBasicBlock &FMBB) const { 968249259Sdim return false; 969249259Sdim} 970249259Sdim 971249259Sdim 972249259Sdimbool 973249259SdimR600InstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { 974249259Sdim MachineOperand &MO = Cond[1]; 975249259Sdim switch (MO.getImm()) { 976249259Sdim case OPCODE_IS_ZERO_INT: 977249259Sdim MO.setImm(OPCODE_IS_NOT_ZERO_INT); 978249259Sdim break; 979249259Sdim case OPCODE_IS_NOT_ZERO_INT: 980249259Sdim MO.setImm(OPCODE_IS_ZERO_INT); 981249259Sdim break; 982249259Sdim case OPCODE_IS_ZERO: 983249259Sdim MO.setImm(OPCODE_IS_NOT_ZERO); 984249259Sdim break; 985249259Sdim case OPCODE_IS_NOT_ZERO: 986249259Sdim MO.setImm(OPCODE_IS_ZERO); 987249259Sdim break; 988249259Sdim default: 989249259Sdim return true; 990249259Sdim } 991249259Sdim 992249259Sdim MachineOperand &MO2 = Cond[2]; 993249259Sdim switch (MO2.getReg()) { 994249259Sdim case AMDGPU::PRED_SEL_ZERO: 995249259Sdim MO2.setReg(AMDGPU::PRED_SEL_ONE); 996249259Sdim break; 997249259Sdim case AMDGPU::PRED_SEL_ONE: 998249259Sdim MO2.setReg(AMDGPU::PRED_SEL_ZERO); 999249259Sdim break; 1000249259Sdim default: 1001249259Sdim return true; 1002249259Sdim } 1003249259Sdim return false; 1004249259Sdim} 1005249259Sdim 1006249259Sdimbool 1007249259SdimR600InstrInfo::DefinesPredicate(MachineInstr *MI, 1008249259Sdim std::vector<MachineOperand> &Pred) const { 1009249259Sdim return isPredicateSetter(MI->getOpcode()); 1010249259Sdim} 1011249259Sdim 1012249259Sdim 1013249259Sdimbool 1014249259SdimR600InstrInfo::SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1, 1015249259Sdim const SmallVectorImpl<MachineOperand> &Pred2) const { 1016249259Sdim return false; 1017249259Sdim} 1018249259Sdim 1019249259Sdim 1020249259Sdimbool 1021249259SdimR600InstrInfo::PredicateInstruction(MachineInstr *MI, 1022249259Sdim const SmallVectorImpl<MachineOperand> &Pred) const { 1023249259Sdim int PIdx = MI->findFirstPredOperandIdx(); 1024249259Sdim 1025263508Sdim if (MI->getOpcode() == AMDGPU::CF_ALU) { 1026263508Sdim MI->getOperand(8).setImm(0); 1027263508Sdim return true; 1028263508Sdim } 1029263508Sdim 1030263508Sdim if (MI->getOpcode() == AMDGPU::DOT_4) { 1031263508Sdim MI->getOperand(getOperandIdx(*MI, AMDGPU::OpName::pred_sel_X)) 1032263508Sdim .setReg(Pred[2].getReg()); 1033263508Sdim MI->getOperand(getOperandIdx(*MI, AMDGPU::OpName::pred_sel_Y)) 1034263508Sdim .setReg(Pred[2].getReg()); 1035263508Sdim MI->getOperand(getOperandIdx(*MI, AMDGPU::OpName::pred_sel_Z)) 1036263508Sdim .setReg(Pred[2].getReg()); 1037263508Sdim MI->getOperand(getOperandIdx(*MI, AMDGPU::OpName::pred_sel_W)) 1038263508Sdim .setReg(Pred[2].getReg()); 1039263508Sdim MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI); 1040263508Sdim MIB.addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit); 1041263508Sdim return true; 1042263508Sdim } 1043263508Sdim 1044249259Sdim if (PIdx != -1) { 1045249259Sdim MachineOperand &PMO = MI->getOperand(PIdx); 1046249259Sdim PMO.setReg(Pred[2].getReg()); 1047249259Sdim MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI); 1048249259Sdim MIB.addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit); 1049249259Sdim return true; 1050249259Sdim } 1051249259Sdim 1052249259Sdim return false; 1053249259Sdim} 1054249259Sdim 1055263508Sdimunsigned int R600InstrInfo::getPredicationCost(const MachineInstr *) const { 1056263508Sdim return 2; 1057263508Sdim} 1058263508Sdim 1059249259Sdimunsigned int R600InstrInfo::getInstrLatency(const InstrItineraryData *ItinData, 1060249259Sdim const MachineInstr *MI, 1061249259Sdim unsigned *PredCost) const { 1062249259Sdim if (PredCost) 1063249259Sdim *PredCost = 2; 1064249259Sdim return 2; 1065249259Sdim} 1066249259Sdim 1067263508Sdimvoid R600InstrInfo::reserveIndirectRegisters(BitVector &Reserved, 1068249259Sdim const MachineFunction &MF) const { 1069249259Sdim const AMDGPUFrameLowering *TFL = 1070249259Sdim static_cast<const AMDGPUFrameLowering*>(TM.getFrameLowering()); 1071249259Sdim 1072249259Sdim unsigned StackWidth = TFL->getStackWidth(MF); 1073249259Sdim int End = getIndirectIndexEnd(MF); 1074249259Sdim 1075263508Sdim if (End == -1) 1076263508Sdim return; 1077249259Sdim 1078249259Sdim for (int Index = getIndirectIndexBegin(MF); Index <= End; ++Index) { 1079249259Sdim unsigned SuperReg = AMDGPU::R600_Reg128RegClass.getRegister(Index); 1080263508Sdim Reserved.set(SuperReg); 1081249259Sdim for (unsigned Chan = 0; Chan < StackWidth; ++Chan) { 1082249259Sdim unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister((4 * Index) + Chan); 1083263508Sdim Reserved.set(Reg); 1084249259Sdim } 1085249259Sdim } 1086249259Sdim} 1087249259Sdim 1088249259Sdimunsigned R600InstrInfo::calculateIndirectAddress(unsigned RegIndex, 1089249259Sdim unsigned Channel) const { 1090249259Sdim // XXX: Remove when we support a stack width > 2 1091249259Sdim assert(Channel == 0); 1092249259Sdim return RegIndex; 1093249259Sdim} 1094249259Sdim 1095263508Sdimconst TargetRegisterClass *R600InstrInfo::getIndirectAddrRegClass() const { 1096263508Sdim return &AMDGPU::R600_TReg32_XRegClass; 1097249259Sdim} 1098249259Sdim 1099249259SdimMachineInstrBuilder R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB, 1100249259Sdim MachineBasicBlock::iterator I, 1101249259Sdim unsigned ValueReg, unsigned Address, 1102249259Sdim unsigned OffsetReg) const { 1103249259Sdim unsigned AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address); 1104249259Sdim MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg, 1105249259Sdim AMDGPU::AR_X, OffsetReg); 1106263508Sdim setImmOperand(MOVA, AMDGPU::OpName::write, 0); 1107249259Sdim 1108249259Sdim MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV, 1109249259Sdim AddrReg, ValueReg) 1110263508Sdim .addReg(AMDGPU::AR_X, 1111263508Sdim RegState::Implicit | RegState::Kill); 1112263508Sdim setImmOperand(Mov, AMDGPU::OpName::dst_rel, 1); 1113249259Sdim return Mov; 1114249259Sdim} 1115249259Sdim 1116249259SdimMachineInstrBuilder R600InstrInfo::buildIndirectRead(MachineBasicBlock *MBB, 1117249259Sdim MachineBasicBlock::iterator I, 1118249259Sdim unsigned ValueReg, unsigned Address, 1119249259Sdim unsigned OffsetReg) const { 1120249259Sdim unsigned AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address); 1121249259Sdim MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg, 1122249259Sdim AMDGPU::AR_X, 1123249259Sdim OffsetReg); 1124263508Sdim setImmOperand(MOVA, AMDGPU::OpName::write, 0); 1125249259Sdim MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV, 1126249259Sdim ValueReg, 1127249259Sdim AddrReg) 1128263508Sdim .addReg(AMDGPU::AR_X, 1129263508Sdim RegState::Implicit | RegState::Kill); 1130263508Sdim setImmOperand(Mov, AMDGPU::OpName::src0_rel, 1); 1131249259Sdim 1132249259Sdim return Mov; 1133249259Sdim} 1134249259Sdim 1135249259Sdimunsigned R600InstrInfo::getMaxAlusPerClause() const { 1136249259Sdim return 115; 1137249259Sdim} 1138249259Sdim 1139249259SdimMachineInstrBuilder R600InstrInfo::buildDefaultInstruction(MachineBasicBlock &MBB, 1140249259Sdim MachineBasicBlock::iterator I, 1141249259Sdim unsigned Opcode, 1142249259Sdim unsigned DstReg, 1143249259Sdim unsigned Src0Reg, 1144249259Sdim unsigned Src1Reg) const { 1145249259Sdim MachineInstrBuilder MIB = BuildMI(MBB, I, MBB.findDebugLoc(I), get(Opcode), 1146249259Sdim DstReg); // $dst 1147249259Sdim 1148249259Sdim if (Src1Reg) { 1149249259Sdim MIB.addImm(0) // $update_exec_mask 1150249259Sdim .addImm(0); // $update_predicate 1151249259Sdim } 1152249259Sdim MIB.addImm(1) // $write 1153249259Sdim .addImm(0) // $omod 1154249259Sdim .addImm(0) // $dst_rel 1155249259Sdim .addImm(0) // $dst_clamp 1156249259Sdim .addReg(Src0Reg) // $src0 1157249259Sdim .addImm(0) // $src0_neg 1158249259Sdim .addImm(0) // $src0_rel 1159249259Sdim .addImm(0) // $src0_abs 1160249259Sdim .addImm(-1); // $src0_sel 1161249259Sdim 1162249259Sdim if (Src1Reg) { 1163249259Sdim MIB.addReg(Src1Reg) // $src1 1164249259Sdim .addImm(0) // $src1_neg 1165249259Sdim .addImm(0) // $src1_rel 1166249259Sdim .addImm(0) // $src1_abs 1167249259Sdim .addImm(-1); // $src1_sel 1168249259Sdim } 1169249259Sdim 1170249259Sdim //XXX: The r600g finalizer expects this to be 1, once we've moved the 1171249259Sdim //scheduling to the backend, we can change the default to 0. 1172249259Sdim MIB.addImm(1) // $last 1173249259Sdim .addReg(AMDGPU::PRED_SEL_OFF) // $pred_sel 1174251662Sdim .addImm(0) // $literal 1175251662Sdim .addImm(0); // $bank_swizzle 1176249259Sdim 1177249259Sdim return MIB; 1178249259Sdim} 1179249259Sdim 1180263508Sdim#define OPERAND_CASE(Label) \ 1181263508Sdim case Label: { \ 1182263508Sdim static const unsigned Ops[] = \ 1183263508Sdim { \ 1184263508Sdim Label##_X, \ 1185263508Sdim Label##_Y, \ 1186263508Sdim Label##_Z, \ 1187263508Sdim Label##_W \ 1188263508Sdim }; \ 1189263508Sdim return Ops[Slot]; \ 1190263508Sdim } 1191263508Sdim 1192263508Sdimstatic unsigned getSlotedOps(unsigned Op, unsigned Slot) { 1193263508Sdim switch (Op) { 1194263508Sdim OPERAND_CASE(AMDGPU::OpName::update_exec_mask) 1195263508Sdim OPERAND_CASE(AMDGPU::OpName::update_pred) 1196263508Sdim OPERAND_CASE(AMDGPU::OpName::write) 1197263508Sdim OPERAND_CASE(AMDGPU::OpName::omod) 1198263508Sdim OPERAND_CASE(AMDGPU::OpName::dst_rel) 1199263508Sdim OPERAND_CASE(AMDGPU::OpName::clamp) 1200263508Sdim OPERAND_CASE(AMDGPU::OpName::src0) 1201263508Sdim OPERAND_CASE(AMDGPU::OpName::src0_neg) 1202263508Sdim OPERAND_CASE(AMDGPU::OpName::src0_rel) 1203263508Sdim OPERAND_CASE(AMDGPU::OpName::src0_abs) 1204263508Sdim OPERAND_CASE(AMDGPU::OpName::src0_sel) 1205263508Sdim OPERAND_CASE(AMDGPU::OpName::src1) 1206263508Sdim OPERAND_CASE(AMDGPU::OpName::src1_neg) 1207263508Sdim OPERAND_CASE(AMDGPU::OpName::src1_rel) 1208263508Sdim OPERAND_CASE(AMDGPU::OpName::src1_abs) 1209263508Sdim OPERAND_CASE(AMDGPU::OpName::src1_sel) 1210263508Sdim OPERAND_CASE(AMDGPU::OpName::pred_sel) 1211263508Sdim default: 1212263508Sdim llvm_unreachable("Wrong Operand"); 1213263508Sdim } 1214263508Sdim} 1215263508Sdim 1216263508Sdim#undef OPERAND_CASE 1217263508Sdim 1218263508SdimMachineInstr *R600InstrInfo::buildSlotOfVectorInstruction( 1219263508Sdim MachineBasicBlock &MBB, MachineInstr *MI, unsigned Slot, unsigned DstReg) 1220263508Sdim const { 1221263508Sdim assert (MI->getOpcode() == AMDGPU::DOT_4 && "Not Implemented"); 1222263508Sdim unsigned Opcode; 1223263508Sdim const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>(); 1224263508Sdim if (ST.getGeneration() <= AMDGPUSubtarget::R700) 1225263508Sdim Opcode = AMDGPU::DOT4_r600; 1226263508Sdim else 1227263508Sdim Opcode = AMDGPU::DOT4_eg; 1228263508Sdim MachineBasicBlock::iterator I = MI; 1229263508Sdim MachineOperand &Src0 = MI->getOperand( 1230263508Sdim getOperandIdx(MI->getOpcode(), getSlotedOps(AMDGPU::OpName::src0, Slot))); 1231263508Sdim MachineOperand &Src1 = MI->getOperand( 1232263508Sdim getOperandIdx(MI->getOpcode(), getSlotedOps(AMDGPU::OpName::src1, Slot))); 1233263508Sdim MachineInstr *MIB = buildDefaultInstruction( 1234263508Sdim MBB, I, Opcode, DstReg, Src0.getReg(), Src1.getReg()); 1235263508Sdim static const unsigned Operands[14] = { 1236263508Sdim AMDGPU::OpName::update_exec_mask, 1237263508Sdim AMDGPU::OpName::update_pred, 1238263508Sdim AMDGPU::OpName::write, 1239263508Sdim AMDGPU::OpName::omod, 1240263508Sdim AMDGPU::OpName::dst_rel, 1241263508Sdim AMDGPU::OpName::clamp, 1242263508Sdim AMDGPU::OpName::src0_neg, 1243263508Sdim AMDGPU::OpName::src0_rel, 1244263508Sdim AMDGPU::OpName::src0_abs, 1245263508Sdim AMDGPU::OpName::src0_sel, 1246263508Sdim AMDGPU::OpName::src1_neg, 1247263508Sdim AMDGPU::OpName::src1_rel, 1248263508Sdim AMDGPU::OpName::src1_abs, 1249263508Sdim AMDGPU::OpName::src1_sel, 1250263508Sdim }; 1251263508Sdim 1252263508Sdim MachineOperand &MO = MI->getOperand(getOperandIdx(MI->getOpcode(), 1253263508Sdim getSlotedOps(AMDGPU::OpName::pred_sel, Slot))); 1254263508Sdim MIB->getOperand(getOperandIdx(Opcode, AMDGPU::OpName::pred_sel)) 1255263508Sdim .setReg(MO.getReg()); 1256263508Sdim 1257263508Sdim for (unsigned i = 0; i < 14; i++) { 1258263508Sdim MachineOperand &MO = MI->getOperand( 1259263508Sdim getOperandIdx(MI->getOpcode(), getSlotedOps(Operands[i], Slot))); 1260263508Sdim assert (MO.isImm()); 1261263508Sdim setImmOperand(MIB, Operands[i], MO.getImm()); 1262263508Sdim } 1263263508Sdim MIB->getOperand(20).setImm(0); 1264263508Sdim return MIB; 1265263508Sdim} 1266263508Sdim 1267249259SdimMachineInstr *R600InstrInfo::buildMovImm(MachineBasicBlock &BB, 1268249259Sdim MachineBasicBlock::iterator I, 1269249259Sdim unsigned DstReg, 1270249259Sdim uint64_t Imm) const { 1271249259Sdim MachineInstr *MovImm = buildDefaultInstruction(BB, I, AMDGPU::MOV, DstReg, 1272249259Sdim AMDGPU::ALU_LITERAL_X); 1273263508Sdim setImmOperand(MovImm, AMDGPU::OpName::literal, Imm); 1274249259Sdim return MovImm; 1275249259Sdim} 1276249259Sdim 1277263508SdimMachineInstr *R600InstrInfo::buildMovInstr(MachineBasicBlock *MBB, 1278263508Sdim MachineBasicBlock::iterator I, 1279263508Sdim unsigned DstReg, unsigned SrcReg) const { 1280263508Sdim return buildDefaultInstruction(*MBB, I, AMDGPU::MOV, DstReg, SrcReg); 1281263508Sdim} 1282263508Sdim 1283263508Sdimint R600InstrInfo::getOperandIdx(const MachineInstr &MI, unsigned Op) const { 1284249259Sdim return getOperandIdx(MI.getOpcode(), Op); 1285249259Sdim} 1286249259Sdim 1287263508Sdimint R600InstrInfo::getOperandIdx(unsigned Opcode, unsigned Op) const { 1288263508Sdim return AMDGPU::getNamedOperandIdx(Opcode, Op); 1289249259Sdim} 1290249259Sdim 1291263508Sdimvoid R600InstrInfo::setImmOperand(MachineInstr *MI, unsigned Op, 1292249259Sdim int64_t Imm) const { 1293249259Sdim int Idx = getOperandIdx(*MI, Op); 1294249259Sdim assert(Idx != -1 && "Operand not supported for this instruction."); 1295249259Sdim assert(MI->getOperand(Idx).isImm()); 1296249259Sdim MI->getOperand(Idx).setImm(Imm); 1297249259Sdim} 1298249259Sdim 1299249259Sdim//===----------------------------------------------------------------------===// 1300249259Sdim// Instruction flag getters/setters 1301249259Sdim//===----------------------------------------------------------------------===// 1302249259Sdim 1303249259Sdimbool R600InstrInfo::hasFlagOperand(const MachineInstr &MI) const { 1304249259Sdim return GET_FLAG_OPERAND_IDX(get(MI.getOpcode()).TSFlags) != 0; 1305249259Sdim} 1306249259Sdim 1307249259SdimMachineOperand &R600InstrInfo::getFlagOp(MachineInstr *MI, unsigned SrcIdx, 1308249259Sdim unsigned Flag) const { 1309249259Sdim unsigned TargetFlags = get(MI->getOpcode()).TSFlags; 1310249259Sdim int FlagIndex = 0; 1311249259Sdim if (Flag != 0) { 1312249259Sdim // If we pass something other than the default value of Flag to this 1313249259Sdim // function, it means we are want to set a flag on an instruction 1314249259Sdim // that uses native encoding. 1315249259Sdim assert(HAS_NATIVE_OPERANDS(TargetFlags)); 1316249259Sdim bool IsOP3 = (TargetFlags & R600_InstFlag::OP3) == R600_InstFlag::OP3; 1317249259Sdim switch (Flag) { 1318249259Sdim case MO_FLAG_CLAMP: 1319263508Sdim FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::clamp); 1320249259Sdim break; 1321249259Sdim case MO_FLAG_MASK: 1322263508Sdim FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::write); 1323249259Sdim break; 1324249259Sdim case MO_FLAG_NOT_LAST: 1325249259Sdim case MO_FLAG_LAST: 1326263508Sdim FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::last); 1327249259Sdim break; 1328249259Sdim case MO_FLAG_NEG: 1329249259Sdim switch (SrcIdx) { 1330263508Sdim case 0: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src0_neg); break; 1331263508Sdim case 1: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src1_neg); break; 1332263508Sdim case 2: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src2_neg); break; 1333249259Sdim } 1334249259Sdim break; 1335249259Sdim 1336249259Sdim case MO_FLAG_ABS: 1337249259Sdim assert(!IsOP3 && "Cannot set absolute value modifier for OP3 " 1338249259Sdim "instructions."); 1339249259Sdim (void)IsOP3; 1340249259Sdim switch (SrcIdx) { 1341263508Sdim case 0: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src0_abs); break; 1342263508Sdim case 1: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src1_abs); break; 1343249259Sdim } 1344249259Sdim break; 1345249259Sdim 1346249259Sdim default: 1347249259Sdim FlagIndex = -1; 1348249259Sdim break; 1349249259Sdim } 1350249259Sdim assert(FlagIndex != -1 && "Flag not supported for this instruction"); 1351249259Sdim } else { 1352249259Sdim FlagIndex = GET_FLAG_OPERAND_IDX(TargetFlags); 1353249259Sdim assert(FlagIndex != 0 && 1354249259Sdim "Instruction flags not supported for this instruction"); 1355249259Sdim } 1356249259Sdim 1357249259Sdim MachineOperand &FlagOp = MI->getOperand(FlagIndex); 1358249259Sdim assert(FlagOp.isImm()); 1359249259Sdim return FlagOp; 1360249259Sdim} 1361249259Sdim 1362249259Sdimvoid R600InstrInfo::addFlag(MachineInstr *MI, unsigned Operand, 1363249259Sdim unsigned Flag) const { 1364249259Sdim unsigned TargetFlags = get(MI->getOpcode()).TSFlags; 1365249259Sdim if (Flag == 0) { 1366249259Sdim return; 1367249259Sdim } 1368249259Sdim if (HAS_NATIVE_OPERANDS(TargetFlags)) { 1369249259Sdim MachineOperand &FlagOp = getFlagOp(MI, Operand, Flag); 1370249259Sdim if (Flag == MO_FLAG_NOT_LAST) { 1371249259Sdim clearFlag(MI, Operand, MO_FLAG_LAST); 1372249259Sdim } else if (Flag == MO_FLAG_MASK) { 1373249259Sdim clearFlag(MI, Operand, Flag); 1374249259Sdim } else { 1375249259Sdim FlagOp.setImm(1); 1376249259Sdim } 1377249259Sdim } else { 1378249259Sdim MachineOperand &FlagOp = getFlagOp(MI, Operand); 1379249259Sdim FlagOp.setImm(FlagOp.getImm() | (Flag << (NUM_MO_FLAGS * Operand))); 1380249259Sdim } 1381249259Sdim} 1382249259Sdim 1383249259Sdimvoid R600InstrInfo::clearFlag(MachineInstr *MI, unsigned Operand, 1384249259Sdim unsigned Flag) const { 1385249259Sdim unsigned TargetFlags = get(MI->getOpcode()).TSFlags; 1386249259Sdim if (HAS_NATIVE_OPERANDS(TargetFlags)) { 1387249259Sdim MachineOperand &FlagOp = getFlagOp(MI, Operand, Flag); 1388249259Sdim FlagOp.setImm(0); 1389249259Sdim } else { 1390249259Sdim MachineOperand &FlagOp = getFlagOp(MI); 1391249259Sdim unsigned InstFlags = FlagOp.getImm(); 1392249259Sdim InstFlags &= ~(Flag << (NUM_MO_FLAGS * Operand)); 1393249259Sdim FlagOp.setImm(InstFlags); 1394249259Sdim } 1395249259Sdim} 1396