R600InstrInfo.cpp revision 266715
1//===-- R600InstrInfo.cpp - R600 Instruction Information ------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10/// \file 11/// \brief R600 Implementation of TargetInstrInfo. 12// 13//===----------------------------------------------------------------------===// 14 15#include "R600InstrInfo.h" 16#include "AMDGPU.h" 17#include "AMDGPUSubtarget.h" 18#include "AMDGPUTargetMachine.h" 19#include "R600Defines.h" 20#include "R600MachineFunctionInfo.h" 21#include "R600RegisterInfo.h" 22#include "llvm/CodeGen/MachineFrameInfo.h" 23#include "llvm/CodeGen/MachineInstrBuilder.h" 24#include "llvm/CodeGen/MachineRegisterInfo.h" 25 26#define GET_INSTRINFO_CTOR_DTOR 27#include "AMDGPUGenDFAPacketizer.inc" 28 29using namespace llvm; 30 31R600InstrInfo::R600InstrInfo(AMDGPUTargetMachine &tm) 32 : AMDGPUInstrInfo(tm), 33 RI(tm), 34 ST(tm.getSubtarget<AMDGPUSubtarget>()) 35 { } 36 37const R600RegisterInfo &R600InstrInfo::getRegisterInfo() const { 38 return RI; 39} 40 41bool R600InstrInfo::isTrig(const MachineInstr &MI) const { 42 return get(MI.getOpcode()).TSFlags & R600_InstFlag::TRIG; 43} 44 45bool R600InstrInfo::isVector(const MachineInstr &MI) const { 46 return get(MI.getOpcode()).TSFlags & R600_InstFlag::VECTOR; 47} 48 49void 50R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB, 51 MachineBasicBlock::iterator MI, DebugLoc DL, 52 unsigned DestReg, unsigned SrcReg, 53 bool KillSrc) const { 54 unsigned VectorComponents = 0; 55 if (AMDGPU::R600_Reg128RegClass.contains(DestReg) && 56 AMDGPU::R600_Reg128RegClass.contains(SrcReg)) { 57 VectorComponents = 4; 58 } else if(AMDGPU::R600_Reg64RegClass.contains(DestReg) && 59 AMDGPU::R600_Reg64RegClass.contains(SrcReg)) { 60 VectorComponents = 2; 61 } 62 63 if (VectorComponents > 0) { 64 for (unsigned I = 0; I < VectorComponents; I++) { 65 unsigned SubRegIndex = RI.getSubRegFromChannel(I); 66 buildDefaultInstruction(MBB, MI, AMDGPU::MOV, 67 RI.getSubReg(DestReg, SubRegIndex), 68 RI.getSubReg(SrcReg, SubRegIndex)) 69 .addReg(DestReg, 70 RegState::Define | RegState::Implicit); 71 } 72 } else { 73 MachineInstr *NewMI = buildDefaultInstruction(MBB, MI, AMDGPU::MOV, 74 DestReg, SrcReg); 75 NewMI->getOperand(getOperandIdx(*NewMI, AMDGPU::OpName::src0)) 76 .setIsKill(KillSrc); 77 } 78} 79 80/// \returns true if \p MBBI can be moved into a new basic. 81bool R600InstrInfo::isLegalToSplitMBBAt(MachineBasicBlock &MBB, 82 MachineBasicBlock::iterator MBBI) const { 83 for (MachineInstr::const_mop_iterator I = MBBI->operands_begin(), 84 E = MBBI->operands_end(); I != E; ++I) { 85 if (I->isReg() && !TargetRegisterInfo::isVirtualRegister(I->getReg()) && 86 I->isUse() && RI.isPhysRegLiveAcrossClauses(I->getReg())) 87 return false; 88 } 89 return true; 90} 91 92unsigned R600InstrInfo::getIEQOpcode() const { 93 return AMDGPU::SETE_INT; 94} 95 96bool R600InstrInfo::isMov(unsigned Opcode) const { 97 98 99 switch(Opcode) { 100 default: return false; 101 case AMDGPU::MOV: 102 case AMDGPU::MOV_IMM_F32: 103 case AMDGPU::MOV_IMM_I32: 104 return true; 105 } 106} 107 108// Some instructions act as place holders to emulate operations that the GPU 109// hardware does automatically. This function can be used to check if 110// an opcode falls into this category. 111bool R600InstrInfo::isPlaceHolderOpcode(unsigned Opcode) const { 112 switch (Opcode) { 113 default: return false; 114 case AMDGPU::RETURN: 115 return true; 116 } 117} 118 119bool R600InstrInfo::isReductionOp(unsigned Opcode) const { 120 return false; 121} 122 123bool R600InstrInfo::isCubeOp(unsigned Opcode) const { 124 switch(Opcode) { 125 default: return false; 126 case AMDGPU::CUBE_r600_pseudo: 127 case AMDGPU::CUBE_r600_real: 128 case AMDGPU::CUBE_eg_pseudo: 129 case AMDGPU::CUBE_eg_real: 130 return true; 131 } 132} 133 134bool R600InstrInfo::isALUInstr(unsigned Opcode) const { 135 unsigned TargetFlags = get(Opcode).TSFlags; 136 137 return (TargetFlags & R600_InstFlag::ALU_INST); 138} 139 140bool R600InstrInfo::hasInstrModifiers(unsigned Opcode) const { 141 unsigned TargetFlags = get(Opcode).TSFlags; 142 143 return ((TargetFlags & R600_InstFlag::OP1) | 144 (TargetFlags & R600_InstFlag::OP2) | 145 (TargetFlags & R600_InstFlag::OP3)); 146} 147 148bool R600InstrInfo::isLDSInstr(unsigned Opcode) const { 149 unsigned TargetFlags = get(Opcode).TSFlags; 150 151 return ((TargetFlags & R600_InstFlag::LDS_1A) | 152 (TargetFlags & R600_InstFlag::LDS_1A1D) | 153 (TargetFlags & R600_InstFlag::LDS_1A2D)); 154} 155 156bool R600InstrInfo::isLDSNoRetInstr(unsigned Opcode) const { 157 return isLDSInstr(Opcode) && getOperandIdx(Opcode, AMDGPU::OpName::dst) == -1; 158} 159 160bool R600InstrInfo::isLDSRetInstr(unsigned Opcode) const { 161 return isLDSInstr(Opcode) && getOperandIdx(Opcode, AMDGPU::OpName::dst) != -1; 162} 163 164bool R600InstrInfo::canBeConsideredALU(const MachineInstr *MI) const { 165 if (isALUInstr(MI->getOpcode())) 166 return true; 167 if (isVector(*MI) || isCubeOp(MI->getOpcode())) 168 return true; 169 switch (MI->getOpcode()) { 170 case AMDGPU::PRED_X: 171 case AMDGPU::INTERP_PAIR_XY: 172 case AMDGPU::INTERP_PAIR_ZW: 173 case AMDGPU::INTERP_VEC_LOAD: 174 case AMDGPU::COPY: 175 case AMDGPU::DOT_4: 176 return true; 177 default: 178 return false; 179 } 180} 181 182bool R600InstrInfo::isTransOnly(unsigned Opcode) const { 183 if (ST.hasCaymanISA()) 184 return false; 185 return (get(Opcode).getSchedClass() == AMDGPU::Sched::TransALU); 186} 187 188bool R600InstrInfo::isTransOnly(const MachineInstr *MI) const { 189 return isTransOnly(MI->getOpcode()); 190} 191 192bool R600InstrInfo::isVectorOnly(unsigned Opcode) const { 193 return (get(Opcode).getSchedClass() == AMDGPU::Sched::VecALU); 194} 195 196bool R600InstrInfo::isVectorOnly(const MachineInstr *MI) const { 197 return isVectorOnly(MI->getOpcode()); 198} 199 200bool R600InstrInfo::isExport(unsigned Opcode) const { 201 return (get(Opcode).TSFlags & R600_InstFlag::IS_EXPORT); 202} 203 204bool R600InstrInfo::usesVertexCache(unsigned Opcode) const { 205 return ST.hasVertexCache() && IS_VTX(get(Opcode)); 206} 207 208bool R600InstrInfo::usesVertexCache(const MachineInstr *MI) const { 209 const R600MachineFunctionInfo *MFI = MI->getParent()->getParent()->getInfo<R600MachineFunctionInfo>(); 210 return MFI->ShaderType != ShaderType::COMPUTE && usesVertexCache(MI->getOpcode()); 211} 212 213bool R600InstrInfo::usesTextureCache(unsigned Opcode) const { 214 return (!ST.hasVertexCache() && IS_VTX(get(Opcode))) || IS_TEX(get(Opcode)); 215} 216 217bool R600InstrInfo::usesTextureCache(const MachineInstr *MI) const { 218 const R600MachineFunctionInfo *MFI = MI->getParent()->getParent()->getInfo<R600MachineFunctionInfo>(); 219 return (MFI->ShaderType == ShaderType::COMPUTE && usesVertexCache(MI->getOpcode())) || 220 usesTextureCache(MI->getOpcode()); 221} 222 223bool R600InstrInfo::mustBeLastInClause(unsigned Opcode) const { 224 switch (Opcode) { 225 case AMDGPU::KILLGT: 226 case AMDGPU::GROUP_BARRIER: 227 return true; 228 default: 229 return false; 230 } 231} 232 233bool R600InstrInfo::usesAddressRegister(MachineInstr *MI) const { 234 return MI->findRegisterUseOperandIdx(AMDGPU::AR_X) != -1; 235} 236 237bool R600InstrInfo::definesAddressRegister(MachineInstr *MI) const { 238 return MI->findRegisterDefOperandIdx(AMDGPU::AR_X) != -1; 239} 240 241bool R600InstrInfo::readsLDSSrcReg(const MachineInstr *MI) const { 242 if (!isALUInstr(MI->getOpcode())) { 243 return false; 244 } 245 for (MachineInstr::const_mop_iterator I = MI->operands_begin(), 246 E = MI->operands_end(); I != E; ++I) { 247 if (!I->isReg() || !I->isUse() || 248 TargetRegisterInfo::isVirtualRegister(I->getReg())) 249 continue; 250 251 if (AMDGPU::R600_LDS_SRC_REGRegClass.contains(I->getReg())) 252 return true; 253 } 254 return false; 255} 256 257int R600InstrInfo::getSrcIdx(unsigned Opcode, unsigned SrcNum) const { 258 static const unsigned OpTable[] = { 259 AMDGPU::OpName::src0, 260 AMDGPU::OpName::src1, 261 AMDGPU::OpName::src2 262 }; 263 264 assert (SrcNum < 3); 265 return getOperandIdx(Opcode, OpTable[SrcNum]); 266} 267 268#define SRC_SEL_ROWS 11 269int R600InstrInfo::getSelIdx(unsigned Opcode, unsigned SrcIdx) const { 270 static const unsigned SrcSelTable[SRC_SEL_ROWS][2] = { 271 {AMDGPU::OpName::src0, AMDGPU::OpName::src0_sel}, 272 {AMDGPU::OpName::src1, AMDGPU::OpName::src1_sel}, 273 {AMDGPU::OpName::src2, AMDGPU::OpName::src2_sel}, 274 {AMDGPU::OpName::src0_X, AMDGPU::OpName::src0_sel_X}, 275 {AMDGPU::OpName::src0_Y, AMDGPU::OpName::src0_sel_Y}, 276 {AMDGPU::OpName::src0_Z, AMDGPU::OpName::src0_sel_Z}, 277 {AMDGPU::OpName::src0_W, AMDGPU::OpName::src0_sel_W}, 278 {AMDGPU::OpName::src1_X, AMDGPU::OpName::src1_sel_X}, 279 {AMDGPU::OpName::src1_Y, AMDGPU::OpName::src1_sel_Y}, 280 {AMDGPU::OpName::src1_Z, AMDGPU::OpName::src1_sel_Z}, 281 {AMDGPU::OpName::src1_W, AMDGPU::OpName::src1_sel_W} 282 }; 283 284 for (unsigned i = 0; i < SRC_SEL_ROWS; ++i) { 285 if (getOperandIdx(Opcode, SrcSelTable[i][0]) == (int)SrcIdx) { 286 return getOperandIdx(Opcode, SrcSelTable[i][1]); 287 } 288 } 289 return -1; 290} 291#undef SRC_SEL_ROWS 292 293SmallVector<std::pair<MachineOperand *, int64_t>, 3> 294R600InstrInfo::getSrcs(MachineInstr *MI) const { 295 SmallVector<std::pair<MachineOperand *, int64_t>, 3> Result; 296 297 if (MI->getOpcode() == AMDGPU::DOT_4) { 298 static const unsigned OpTable[8][2] = { 299 {AMDGPU::OpName::src0_X, AMDGPU::OpName::src0_sel_X}, 300 {AMDGPU::OpName::src0_Y, AMDGPU::OpName::src0_sel_Y}, 301 {AMDGPU::OpName::src0_Z, AMDGPU::OpName::src0_sel_Z}, 302 {AMDGPU::OpName::src0_W, AMDGPU::OpName::src0_sel_W}, 303 {AMDGPU::OpName::src1_X, AMDGPU::OpName::src1_sel_X}, 304 {AMDGPU::OpName::src1_Y, AMDGPU::OpName::src1_sel_Y}, 305 {AMDGPU::OpName::src1_Z, AMDGPU::OpName::src1_sel_Z}, 306 {AMDGPU::OpName::src1_W, AMDGPU::OpName::src1_sel_W}, 307 }; 308 309 for (unsigned j = 0; j < 8; j++) { 310 MachineOperand &MO = MI->getOperand(getOperandIdx(MI->getOpcode(), 311 OpTable[j][0])); 312 unsigned Reg = MO.getReg(); 313 if (Reg == AMDGPU::ALU_CONST) { 314 unsigned Sel = MI->getOperand(getOperandIdx(MI->getOpcode(), 315 OpTable[j][1])).getImm(); 316 Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, Sel)); 317 continue; 318 } 319 320 } 321 return Result; 322 } 323 324 static const unsigned OpTable[3][2] = { 325 {AMDGPU::OpName::src0, AMDGPU::OpName::src0_sel}, 326 {AMDGPU::OpName::src1, AMDGPU::OpName::src1_sel}, 327 {AMDGPU::OpName::src2, AMDGPU::OpName::src2_sel}, 328 }; 329 330 for (unsigned j = 0; j < 3; j++) { 331 int SrcIdx = getOperandIdx(MI->getOpcode(), OpTable[j][0]); 332 if (SrcIdx < 0) 333 break; 334 MachineOperand &MO = MI->getOperand(SrcIdx); 335 unsigned Reg = MI->getOperand(SrcIdx).getReg(); 336 if (Reg == AMDGPU::ALU_CONST) { 337 unsigned Sel = MI->getOperand( 338 getOperandIdx(MI->getOpcode(), OpTable[j][1])).getImm(); 339 Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, Sel)); 340 continue; 341 } 342 if (Reg == AMDGPU::ALU_LITERAL_X) { 343 unsigned Imm = MI->getOperand( 344 getOperandIdx(MI->getOpcode(), AMDGPU::OpName::literal)).getImm(); 345 Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, Imm)); 346 continue; 347 } 348 Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, 0)); 349 } 350 return Result; 351} 352 353std::vector<std::pair<int, unsigned> > 354R600InstrInfo::ExtractSrcs(MachineInstr *MI, 355 const DenseMap<unsigned, unsigned> &PV, 356 unsigned &ConstCount) const { 357 ConstCount = 0; 358 const SmallVector<std::pair<MachineOperand *, int64_t>, 3> Srcs = getSrcs(MI); 359 const std::pair<int, unsigned> DummyPair(-1, 0); 360 std::vector<std::pair<int, unsigned> > Result; 361 unsigned i = 0; 362 for (unsigned n = Srcs.size(); i < n; ++i) { 363 unsigned Reg = Srcs[i].first->getReg(); 364 unsigned Index = RI.getEncodingValue(Reg) & 0xff; 365 if (Reg == AMDGPU::OQAP) { 366 Result.push_back(std::pair<int, unsigned>(Index, 0)); 367 } 368 if (PV.find(Reg) != PV.end()) { 369 // 255 is used to tells its a PS/PV reg 370 Result.push_back(std::pair<int, unsigned>(255, 0)); 371 continue; 372 } 373 if (Index > 127) { 374 ConstCount++; 375 Result.push_back(DummyPair); 376 continue; 377 } 378 unsigned Chan = RI.getHWRegChan(Reg); 379 Result.push_back(std::pair<int, unsigned>(Index, Chan)); 380 } 381 for (; i < 3; ++i) 382 Result.push_back(DummyPair); 383 return Result; 384} 385 386static std::vector<std::pair<int, unsigned> > 387Swizzle(std::vector<std::pair<int, unsigned> > Src, 388 R600InstrInfo::BankSwizzle Swz) { 389 if (Src[0] == Src[1]) 390 Src[1].first = -1; 391 switch (Swz) { 392 case R600InstrInfo::ALU_VEC_012_SCL_210: 393 break; 394 case R600InstrInfo::ALU_VEC_021_SCL_122: 395 std::swap(Src[1], Src[2]); 396 break; 397 case R600InstrInfo::ALU_VEC_102_SCL_221: 398 std::swap(Src[0], Src[1]); 399 break; 400 case R600InstrInfo::ALU_VEC_120_SCL_212: 401 std::swap(Src[0], Src[1]); 402 std::swap(Src[0], Src[2]); 403 break; 404 case R600InstrInfo::ALU_VEC_201: 405 std::swap(Src[0], Src[2]); 406 std::swap(Src[0], Src[1]); 407 break; 408 case R600InstrInfo::ALU_VEC_210: 409 std::swap(Src[0], Src[2]); 410 break; 411 } 412 return Src; 413} 414 415static unsigned 416getTransSwizzle(R600InstrInfo::BankSwizzle Swz, unsigned Op) { 417 switch (Swz) { 418 case R600InstrInfo::ALU_VEC_012_SCL_210: { 419 unsigned Cycles[3] = { 2, 1, 0}; 420 return Cycles[Op]; 421 } 422 case R600InstrInfo::ALU_VEC_021_SCL_122: { 423 unsigned Cycles[3] = { 1, 2, 2}; 424 return Cycles[Op]; 425 } 426 case R600InstrInfo::ALU_VEC_120_SCL_212: { 427 unsigned Cycles[3] = { 2, 1, 2}; 428 return Cycles[Op]; 429 } 430 case R600InstrInfo::ALU_VEC_102_SCL_221: { 431 unsigned Cycles[3] = { 2, 2, 1}; 432 return Cycles[Op]; 433 } 434 default: 435 llvm_unreachable("Wrong Swizzle for Trans Slot"); 436 return 0; 437 } 438} 439 440/// returns how many MIs (whose inputs are represented by IGSrcs) can be packed 441/// in the same Instruction Group while meeting read port limitations given a 442/// Swz swizzle sequence. 443unsigned R600InstrInfo::isLegalUpTo( 444 const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs, 445 const std::vector<R600InstrInfo::BankSwizzle> &Swz, 446 const std::vector<std::pair<int, unsigned> > &TransSrcs, 447 R600InstrInfo::BankSwizzle TransSwz) const { 448 int Vector[4][3]; 449 memset(Vector, -1, sizeof(Vector)); 450 for (unsigned i = 0, e = IGSrcs.size(); i < e; i++) { 451 const std::vector<std::pair<int, unsigned> > &Srcs = 452 Swizzle(IGSrcs[i], Swz[i]); 453 for (unsigned j = 0; j < 3; j++) { 454 const std::pair<int, unsigned> &Src = Srcs[j]; 455 if (Src.first < 0 || Src.first == 255) 456 continue; 457 if (Src.first == GET_REG_INDEX(RI.getEncodingValue(AMDGPU::OQAP))) { 458 if (Swz[i] != R600InstrInfo::ALU_VEC_012_SCL_210 && 459 Swz[i] != R600InstrInfo::ALU_VEC_021_SCL_122) { 460 // The value from output queue A (denoted by register OQAP) can 461 // only be fetched during the first cycle. 462 return false; 463 } 464 // OQAP does not count towards the normal read port restrictions 465 continue; 466 } 467 if (Vector[Src.second][j] < 0) 468 Vector[Src.second][j] = Src.first; 469 if (Vector[Src.second][j] != Src.first) 470 return i; 471 } 472 } 473 // Now check Trans Alu 474 for (unsigned i = 0, e = TransSrcs.size(); i < e; ++i) { 475 const std::pair<int, unsigned> &Src = TransSrcs[i]; 476 unsigned Cycle = getTransSwizzle(TransSwz, i); 477 if (Src.first < 0) 478 continue; 479 if (Src.first == 255) 480 continue; 481 if (Vector[Src.second][Cycle] < 0) 482 Vector[Src.second][Cycle] = Src.first; 483 if (Vector[Src.second][Cycle] != Src.first) 484 return IGSrcs.size() - 1; 485 } 486 return IGSrcs.size(); 487} 488 489/// Given a swizzle sequence SwzCandidate and an index Idx, returns the next 490/// (in lexicographic term) swizzle sequence assuming that all swizzles after 491/// Idx can be skipped 492static bool 493NextPossibleSolution( 494 std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate, 495 unsigned Idx) { 496 assert(Idx < SwzCandidate.size()); 497 int ResetIdx = Idx; 498 while (ResetIdx > -1 && SwzCandidate[ResetIdx] == R600InstrInfo::ALU_VEC_210) 499 ResetIdx --; 500 for (unsigned i = ResetIdx + 1, e = SwzCandidate.size(); i < e; i++) { 501 SwzCandidate[i] = R600InstrInfo::ALU_VEC_012_SCL_210; 502 } 503 if (ResetIdx == -1) 504 return false; 505 int NextSwizzle = SwzCandidate[ResetIdx] + 1; 506 SwzCandidate[ResetIdx] = (R600InstrInfo::BankSwizzle)NextSwizzle; 507 return true; 508} 509 510/// Enumerate all possible Swizzle sequence to find one that can meet all 511/// read port requirements. 512bool R600InstrInfo::FindSwizzleForVectorSlot( 513 const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs, 514 std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate, 515 const std::vector<std::pair<int, unsigned> > &TransSrcs, 516 R600InstrInfo::BankSwizzle TransSwz) const { 517 unsigned ValidUpTo = 0; 518 do { 519 ValidUpTo = isLegalUpTo(IGSrcs, SwzCandidate, TransSrcs, TransSwz); 520 if (ValidUpTo == IGSrcs.size()) 521 return true; 522 } while (NextPossibleSolution(SwzCandidate, ValidUpTo)); 523 return false; 524} 525 526/// Instructions in Trans slot can't read gpr at cycle 0 if they also read 527/// a const, and can't read a gpr at cycle 1 if they read 2 const. 528static bool 529isConstCompatible(R600InstrInfo::BankSwizzle TransSwz, 530 const std::vector<std::pair<int, unsigned> > &TransOps, 531 unsigned ConstCount) { 532 // TransALU can't read 3 constants 533 if (ConstCount > 2) 534 return false; 535 for (unsigned i = 0, e = TransOps.size(); i < e; ++i) { 536 const std::pair<int, unsigned> &Src = TransOps[i]; 537 unsigned Cycle = getTransSwizzle(TransSwz, i); 538 if (Src.first < 0) 539 continue; 540 if (ConstCount > 0 && Cycle == 0) 541 return false; 542 if (ConstCount > 1 && Cycle == 1) 543 return false; 544 } 545 return true; 546} 547 548bool 549R600InstrInfo::fitsReadPortLimitations(const std::vector<MachineInstr *> &IG, 550 const DenseMap<unsigned, unsigned> &PV, 551 std::vector<BankSwizzle> &ValidSwizzle, 552 bool isLastAluTrans) 553 const { 554 //Todo : support shared src0 - src1 operand 555 556 std::vector<std::vector<std::pair<int, unsigned> > > IGSrcs; 557 ValidSwizzle.clear(); 558 unsigned ConstCount; 559 BankSwizzle TransBS = ALU_VEC_012_SCL_210; 560 for (unsigned i = 0, e = IG.size(); i < e; ++i) { 561 IGSrcs.push_back(ExtractSrcs(IG[i], PV, ConstCount)); 562 unsigned Op = getOperandIdx(IG[i]->getOpcode(), 563 AMDGPU::OpName::bank_swizzle); 564 ValidSwizzle.push_back( (R600InstrInfo::BankSwizzle) 565 IG[i]->getOperand(Op).getImm()); 566 } 567 std::vector<std::pair<int, unsigned> > TransOps; 568 if (!isLastAluTrans) 569 return FindSwizzleForVectorSlot(IGSrcs, ValidSwizzle, TransOps, TransBS); 570 571 TransOps = IGSrcs.back(); 572 IGSrcs.pop_back(); 573 ValidSwizzle.pop_back(); 574 575 static const R600InstrInfo::BankSwizzle TransSwz[] = { 576 ALU_VEC_012_SCL_210, 577 ALU_VEC_021_SCL_122, 578 ALU_VEC_120_SCL_212, 579 ALU_VEC_102_SCL_221 580 }; 581 for (unsigned i = 0; i < 4; i++) { 582 TransBS = TransSwz[i]; 583 if (!isConstCompatible(TransBS, TransOps, ConstCount)) 584 continue; 585 bool Result = FindSwizzleForVectorSlot(IGSrcs, ValidSwizzle, TransOps, 586 TransBS); 587 if (Result) { 588 ValidSwizzle.push_back(TransBS); 589 return true; 590 } 591 } 592 593 return false; 594} 595 596 597bool 598R600InstrInfo::fitsConstReadLimitations(const std::vector<unsigned> &Consts) 599 const { 600 assert (Consts.size() <= 12 && "Too many operands in instructions group"); 601 unsigned Pair1 = 0, Pair2 = 0; 602 for (unsigned i = 0, n = Consts.size(); i < n; ++i) { 603 unsigned ReadConstHalf = Consts[i] & 2; 604 unsigned ReadConstIndex = Consts[i] & (~3); 605 unsigned ReadHalfConst = ReadConstIndex | ReadConstHalf; 606 if (!Pair1) { 607 Pair1 = ReadHalfConst; 608 continue; 609 } 610 if (Pair1 == ReadHalfConst) 611 continue; 612 if (!Pair2) { 613 Pair2 = ReadHalfConst; 614 continue; 615 } 616 if (Pair2 != ReadHalfConst) 617 return false; 618 } 619 return true; 620} 621 622bool 623R600InstrInfo::fitsConstReadLimitations(const std::vector<MachineInstr *> &MIs) 624 const { 625 std::vector<unsigned> Consts; 626 SmallSet<int64_t, 4> Literals; 627 for (unsigned i = 0, n = MIs.size(); i < n; i++) { 628 MachineInstr *MI = MIs[i]; 629 if (!isALUInstr(MI->getOpcode())) 630 continue; 631 632 const SmallVectorImpl<std::pair<MachineOperand *, int64_t> > &Srcs = 633 getSrcs(MI); 634 635 for (unsigned j = 0, e = Srcs.size(); j < e; j++) { 636 std::pair<MachineOperand *, unsigned> Src = Srcs[j]; 637 if (Src.first->getReg() == AMDGPU::ALU_LITERAL_X) 638 Literals.insert(Src.second); 639 if (Literals.size() > 4) 640 return false; 641 if (Src.first->getReg() == AMDGPU::ALU_CONST) 642 Consts.push_back(Src.second); 643 if (AMDGPU::R600_KC0RegClass.contains(Src.first->getReg()) || 644 AMDGPU::R600_KC1RegClass.contains(Src.first->getReg())) { 645 unsigned Index = RI.getEncodingValue(Src.first->getReg()) & 0xff; 646 unsigned Chan = RI.getHWRegChan(Src.first->getReg()); 647 Consts.push_back((Index << 2) | Chan); 648 } 649 } 650 } 651 return fitsConstReadLimitations(Consts); 652} 653 654DFAPacketizer *R600InstrInfo::CreateTargetScheduleState(const TargetMachine *TM, 655 const ScheduleDAG *DAG) const { 656 const InstrItineraryData *II = TM->getInstrItineraryData(); 657 return TM->getSubtarget<AMDGPUSubtarget>().createDFAPacketizer(II); 658} 659 660static bool 661isPredicateSetter(unsigned Opcode) { 662 switch (Opcode) { 663 case AMDGPU::PRED_X: 664 return true; 665 default: 666 return false; 667 } 668} 669 670static MachineInstr * 671findFirstPredicateSetterFrom(MachineBasicBlock &MBB, 672 MachineBasicBlock::iterator I) { 673 while (I != MBB.begin()) { 674 --I; 675 MachineInstr *MI = I; 676 if (isPredicateSetter(MI->getOpcode())) 677 return MI; 678 } 679 680 return NULL; 681} 682 683static 684bool isJump(unsigned Opcode) { 685 return Opcode == AMDGPU::JUMP || Opcode == AMDGPU::JUMP_COND; 686} 687 688static bool isBranch(unsigned Opcode) { 689 return Opcode == AMDGPU::BRANCH || Opcode == AMDGPU::BRANCH_COND_i32 || 690 Opcode == AMDGPU::BRANCH_COND_f32; 691} 692 693bool 694R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, 695 MachineBasicBlock *&TBB, 696 MachineBasicBlock *&FBB, 697 SmallVectorImpl<MachineOperand> &Cond, 698 bool AllowModify) const { 699 // Most of the following comes from the ARM implementation of AnalyzeBranch 700 701 // If the block has no terminators, it just falls into the block after it. 702 MachineBasicBlock::iterator I = MBB.end(); 703 if (I == MBB.begin()) 704 return false; 705 --I; 706 while (I->isDebugValue()) { 707 if (I == MBB.begin()) 708 return false; 709 --I; 710 } 711 // AMDGPU::BRANCH* instructions are only available after isel and are not 712 // handled 713 if (isBranch(I->getOpcode())) 714 return true; 715 if (!isJump(static_cast<MachineInstr *>(I)->getOpcode())) { 716 return false; 717 } 718 719 // Remove successive JUMP 720 while (I != MBB.begin() && llvm::prior(I)->getOpcode() == AMDGPU::JUMP) { 721 MachineBasicBlock::iterator PriorI = llvm::prior(I); 722 if (AllowModify) 723 I->removeFromParent(); 724 I = PriorI; 725 } 726 MachineInstr *LastInst = I; 727 728 // If there is only one terminator instruction, process it. 729 unsigned LastOpc = LastInst->getOpcode(); 730 if (I == MBB.begin() || 731 !isJump(static_cast<MachineInstr *>(--I)->getOpcode())) { 732 if (LastOpc == AMDGPU::JUMP) { 733 TBB = LastInst->getOperand(0).getMBB(); 734 return false; 735 } else if (LastOpc == AMDGPU::JUMP_COND) { 736 MachineInstr *predSet = I; 737 while (!isPredicateSetter(predSet->getOpcode())) { 738 predSet = --I; 739 } 740 TBB = LastInst->getOperand(0).getMBB(); 741 Cond.push_back(predSet->getOperand(1)); 742 Cond.push_back(predSet->getOperand(2)); 743 Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false)); 744 return false; 745 } 746 return true; // Can't handle indirect branch. 747 } 748 749 // Get the instruction before it if it is a terminator. 750 MachineInstr *SecondLastInst = I; 751 unsigned SecondLastOpc = SecondLastInst->getOpcode(); 752 753 // If the block ends with a B and a Bcc, handle it. 754 if (SecondLastOpc == AMDGPU::JUMP_COND && LastOpc == AMDGPU::JUMP) { 755 MachineInstr *predSet = --I; 756 while (!isPredicateSetter(predSet->getOpcode())) { 757 predSet = --I; 758 } 759 TBB = SecondLastInst->getOperand(0).getMBB(); 760 FBB = LastInst->getOperand(0).getMBB(); 761 Cond.push_back(predSet->getOperand(1)); 762 Cond.push_back(predSet->getOperand(2)); 763 Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false)); 764 return false; 765 } 766 767 // Otherwise, can't handle this. 768 return true; 769} 770 771int R600InstrInfo::getBranchInstr(const MachineOperand &op) const { 772 const MachineInstr *MI = op.getParent(); 773 774 switch (MI->getDesc().OpInfo->RegClass) { 775 default: // FIXME: fallthrough?? 776 case AMDGPU::GPRI32RegClassID: return AMDGPU::BRANCH_COND_i32; 777 case AMDGPU::GPRF32RegClassID: return AMDGPU::BRANCH_COND_f32; 778 }; 779} 780 781static 782MachineBasicBlock::iterator FindLastAluClause(MachineBasicBlock &MBB) { 783 for (MachineBasicBlock::reverse_iterator It = MBB.rbegin(), E = MBB.rend(); 784 It != E; ++It) { 785 if (It->getOpcode() == AMDGPU::CF_ALU || 786 It->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE) 787 return llvm::prior(It.base()); 788 } 789 return MBB.end(); 790} 791 792unsigned 793R600InstrInfo::InsertBranch(MachineBasicBlock &MBB, 794 MachineBasicBlock *TBB, 795 MachineBasicBlock *FBB, 796 const SmallVectorImpl<MachineOperand> &Cond, 797 DebugLoc DL) const { 798 assert(TBB && "InsertBranch must not be told to insert a fallthrough"); 799 800 if (FBB == 0) { 801 if (Cond.empty()) { 802 BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(TBB); 803 return 1; 804 } else { 805 MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end()); 806 assert(PredSet && "No previous predicate !"); 807 addFlag(PredSet, 0, MO_FLAG_PUSH); 808 PredSet->getOperand(2).setImm(Cond[1].getImm()); 809 810 BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND)) 811 .addMBB(TBB) 812 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); 813 MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB); 814 if (CfAlu == MBB.end()) 815 return 1; 816 assert (CfAlu->getOpcode() == AMDGPU::CF_ALU); 817 CfAlu->setDesc(get(AMDGPU::CF_ALU_PUSH_BEFORE)); 818 return 1; 819 } 820 } else { 821 MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end()); 822 assert(PredSet && "No previous predicate !"); 823 addFlag(PredSet, 0, MO_FLAG_PUSH); 824 PredSet->getOperand(2).setImm(Cond[1].getImm()); 825 BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND)) 826 .addMBB(TBB) 827 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); 828 BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(FBB); 829 MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB); 830 if (CfAlu == MBB.end()) 831 return 2; 832 assert (CfAlu->getOpcode() == AMDGPU::CF_ALU); 833 CfAlu->setDesc(get(AMDGPU::CF_ALU_PUSH_BEFORE)); 834 return 2; 835 } 836} 837 838unsigned 839R600InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { 840 841 // Note : we leave PRED* instructions there. 842 // They may be needed when predicating instructions. 843 844 MachineBasicBlock::iterator I = MBB.end(); 845 846 if (I == MBB.begin()) { 847 return 0; 848 } 849 --I; 850 switch (I->getOpcode()) { 851 default: 852 return 0; 853 case AMDGPU::JUMP_COND: { 854 MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I); 855 clearFlag(predSet, 0, MO_FLAG_PUSH); 856 I->eraseFromParent(); 857 MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB); 858 if (CfAlu == MBB.end()) 859 break; 860 assert (CfAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE); 861 CfAlu->setDesc(get(AMDGPU::CF_ALU)); 862 break; 863 } 864 case AMDGPU::JUMP: 865 I->eraseFromParent(); 866 break; 867 } 868 I = MBB.end(); 869 870 if (I == MBB.begin()) { 871 return 1; 872 } 873 --I; 874 switch (I->getOpcode()) { 875 // FIXME: only one case?? 876 default: 877 return 1; 878 case AMDGPU::JUMP_COND: { 879 MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I); 880 clearFlag(predSet, 0, MO_FLAG_PUSH); 881 I->eraseFromParent(); 882 MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB); 883 if (CfAlu == MBB.end()) 884 break; 885 assert (CfAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE); 886 CfAlu->setDesc(get(AMDGPU::CF_ALU)); 887 break; 888 } 889 case AMDGPU::JUMP: 890 I->eraseFromParent(); 891 break; 892 } 893 return 2; 894} 895 896bool 897R600InstrInfo::isPredicated(const MachineInstr *MI) const { 898 int idx = MI->findFirstPredOperandIdx(); 899 if (idx < 0) 900 return false; 901 902 unsigned Reg = MI->getOperand(idx).getReg(); 903 switch (Reg) { 904 default: return false; 905 case AMDGPU::PRED_SEL_ONE: 906 case AMDGPU::PRED_SEL_ZERO: 907 case AMDGPU::PREDICATE_BIT: 908 return true; 909 } 910} 911 912bool 913R600InstrInfo::isPredicable(MachineInstr *MI) const { 914 // XXX: KILL* instructions can be predicated, but they must be the last 915 // instruction in a clause, so this means any instructions after them cannot 916 // be predicated. Until we have proper support for instruction clauses in the 917 // backend, we will mark KILL* instructions as unpredicable. 918 919 if (MI->getOpcode() == AMDGPU::KILLGT) { 920 return false; 921 } else if (MI->getOpcode() == AMDGPU::CF_ALU) { 922 // If the clause start in the middle of MBB then the MBB has more 923 // than a single clause, unable to predicate several clauses. 924 if (MI->getParent()->begin() != MachineBasicBlock::iterator(MI)) 925 return false; 926 // TODO: We don't support KC merging atm 927 if (MI->getOperand(3).getImm() != 0 || MI->getOperand(4).getImm() != 0) 928 return false; 929 return true; 930 } else if (isVector(*MI)) { 931 return false; 932 } else { 933 return AMDGPUInstrInfo::isPredicable(MI); 934 } 935} 936 937 938bool 939R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB, 940 unsigned NumCyles, 941 unsigned ExtraPredCycles, 942 const BranchProbability &Probability) const{ 943 return true; 944} 945 946bool 947R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB, 948 unsigned NumTCycles, 949 unsigned ExtraTCycles, 950 MachineBasicBlock &FMBB, 951 unsigned NumFCycles, 952 unsigned ExtraFCycles, 953 const BranchProbability &Probability) const { 954 return true; 955} 956 957bool 958R600InstrInfo::isProfitableToDupForIfCvt(MachineBasicBlock &MBB, 959 unsigned NumCyles, 960 const BranchProbability &Probability) 961 const { 962 return true; 963} 964 965bool 966R600InstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB, 967 MachineBasicBlock &FMBB) const { 968 return false; 969} 970 971 972bool 973R600InstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { 974 MachineOperand &MO = Cond[1]; 975 switch (MO.getImm()) { 976 case OPCODE_IS_ZERO_INT: 977 MO.setImm(OPCODE_IS_NOT_ZERO_INT); 978 break; 979 case OPCODE_IS_NOT_ZERO_INT: 980 MO.setImm(OPCODE_IS_ZERO_INT); 981 break; 982 case OPCODE_IS_ZERO: 983 MO.setImm(OPCODE_IS_NOT_ZERO); 984 break; 985 case OPCODE_IS_NOT_ZERO: 986 MO.setImm(OPCODE_IS_ZERO); 987 break; 988 default: 989 return true; 990 } 991 992 MachineOperand &MO2 = Cond[2]; 993 switch (MO2.getReg()) { 994 case AMDGPU::PRED_SEL_ZERO: 995 MO2.setReg(AMDGPU::PRED_SEL_ONE); 996 break; 997 case AMDGPU::PRED_SEL_ONE: 998 MO2.setReg(AMDGPU::PRED_SEL_ZERO); 999 break; 1000 default: 1001 return true; 1002 } 1003 return false; 1004} 1005 1006bool 1007R600InstrInfo::DefinesPredicate(MachineInstr *MI, 1008 std::vector<MachineOperand> &Pred) const { 1009 return isPredicateSetter(MI->getOpcode()); 1010} 1011 1012 1013bool 1014R600InstrInfo::SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1, 1015 const SmallVectorImpl<MachineOperand> &Pred2) const { 1016 return false; 1017} 1018 1019 1020bool 1021R600InstrInfo::PredicateInstruction(MachineInstr *MI, 1022 const SmallVectorImpl<MachineOperand> &Pred) const { 1023 int PIdx = MI->findFirstPredOperandIdx(); 1024 1025 if (MI->getOpcode() == AMDGPU::CF_ALU) { 1026 MI->getOperand(8).setImm(0); 1027 return true; 1028 } 1029 1030 if (MI->getOpcode() == AMDGPU::DOT_4) { 1031 MI->getOperand(getOperandIdx(*MI, AMDGPU::OpName::pred_sel_X)) 1032 .setReg(Pred[2].getReg()); 1033 MI->getOperand(getOperandIdx(*MI, AMDGPU::OpName::pred_sel_Y)) 1034 .setReg(Pred[2].getReg()); 1035 MI->getOperand(getOperandIdx(*MI, AMDGPU::OpName::pred_sel_Z)) 1036 .setReg(Pred[2].getReg()); 1037 MI->getOperand(getOperandIdx(*MI, AMDGPU::OpName::pred_sel_W)) 1038 .setReg(Pred[2].getReg()); 1039 MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI); 1040 MIB.addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit); 1041 return true; 1042 } 1043 1044 if (PIdx != -1) { 1045 MachineOperand &PMO = MI->getOperand(PIdx); 1046 PMO.setReg(Pred[2].getReg()); 1047 MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI); 1048 MIB.addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit); 1049 return true; 1050 } 1051 1052 return false; 1053} 1054 1055unsigned int R600InstrInfo::getPredicationCost(const MachineInstr *) const { 1056 return 2; 1057} 1058 1059unsigned int R600InstrInfo::getInstrLatency(const InstrItineraryData *ItinData, 1060 const MachineInstr *MI, 1061 unsigned *PredCost) const { 1062 if (PredCost) 1063 *PredCost = 2; 1064 return 2; 1065} 1066 1067void R600InstrInfo::reserveIndirectRegisters(BitVector &Reserved, 1068 const MachineFunction &MF) const { 1069 const AMDGPUFrameLowering *TFL = 1070 static_cast<const AMDGPUFrameLowering*>(TM.getFrameLowering()); 1071 1072 unsigned StackWidth = TFL->getStackWidth(MF); 1073 int End = getIndirectIndexEnd(MF); 1074 1075 if (End == -1) 1076 return; 1077 1078 for (int Index = getIndirectIndexBegin(MF); Index <= End; ++Index) { 1079 unsigned SuperReg = AMDGPU::R600_Reg128RegClass.getRegister(Index); 1080 Reserved.set(SuperReg); 1081 for (unsigned Chan = 0; Chan < StackWidth; ++Chan) { 1082 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister((4 * Index) + Chan); 1083 Reserved.set(Reg); 1084 } 1085 } 1086} 1087 1088unsigned R600InstrInfo::calculateIndirectAddress(unsigned RegIndex, 1089 unsigned Channel) const { 1090 // XXX: Remove when we support a stack width > 2 1091 assert(Channel == 0); 1092 return RegIndex; 1093} 1094 1095const TargetRegisterClass *R600InstrInfo::getIndirectAddrRegClass() const { 1096 return &AMDGPU::R600_TReg32_XRegClass; 1097} 1098 1099MachineInstrBuilder R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB, 1100 MachineBasicBlock::iterator I, 1101 unsigned ValueReg, unsigned Address, 1102 unsigned OffsetReg) const { 1103 unsigned AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address); 1104 MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg, 1105 AMDGPU::AR_X, OffsetReg); 1106 setImmOperand(MOVA, AMDGPU::OpName::write, 0); 1107 1108 MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV, 1109 AddrReg, ValueReg) 1110 .addReg(AMDGPU::AR_X, 1111 RegState::Implicit | RegState::Kill); 1112 setImmOperand(Mov, AMDGPU::OpName::dst_rel, 1); 1113 return Mov; 1114} 1115 1116MachineInstrBuilder R600InstrInfo::buildIndirectRead(MachineBasicBlock *MBB, 1117 MachineBasicBlock::iterator I, 1118 unsigned ValueReg, unsigned Address, 1119 unsigned OffsetReg) const { 1120 unsigned AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address); 1121 MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg, 1122 AMDGPU::AR_X, 1123 OffsetReg); 1124 setImmOperand(MOVA, AMDGPU::OpName::write, 0); 1125 MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV, 1126 ValueReg, 1127 AddrReg) 1128 .addReg(AMDGPU::AR_X, 1129 RegState::Implicit | RegState::Kill); 1130 setImmOperand(Mov, AMDGPU::OpName::src0_rel, 1); 1131 1132 return Mov; 1133} 1134 1135unsigned R600InstrInfo::getMaxAlusPerClause() const { 1136 return 115; 1137} 1138 1139MachineInstrBuilder R600InstrInfo::buildDefaultInstruction(MachineBasicBlock &MBB, 1140 MachineBasicBlock::iterator I, 1141 unsigned Opcode, 1142 unsigned DstReg, 1143 unsigned Src0Reg, 1144 unsigned Src1Reg) const { 1145 MachineInstrBuilder MIB = BuildMI(MBB, I, MBB.findDebugLoc(I), get(Opcode), 1146 DstReg); // $dst 1147 1148 if (Src1Reg) { 1149 MIB.addImm(0) // $update_exec_mask 1150 .addImm(0); // $update_predicate 1151 } 1152 MIB.addImm(1) // $write 1153 .addImm(0) // $omod 1154 .addImm(0) // $dst_rel 1155 .addImm(0) // $dst_clamp 1156 .addReg(Src0Reg) // $src0 1157 .addImm(0) // $src0_neg 1158 .addImm(0) // $src0_rel 1159 .addImm(0) // $src0_abs 1160 .addImm(-1); // $src0_sel 1161 1162 if (Src1Reg) { 1163 MIB.addReg(Src1Reg) // $src1 1164 .addImm(0) // $src1_neg 1165 .addImm(0) // $src1_rel 1166 .addImm(0) // $src1_abs 1167 .addImm(-1); // $src1_sel 1168 } 1169 1170 //XXX: The r600g finalizer expects this to be 1, once we've moved the 1171 //scheduling to the backend, we can change the default to 0. 1172 MIB.addImm(1) // $last 1173 .addReg(AMDGPU::PRED_SEL_OFF) // $pred_sel 1174 .addImm(0) // $literal 1175 .addImm(0); // $bank_swizzle 1176 1177 return MIB; 1178} 1179 1180#define OPERAND_CASE(Label) \ 1181 case Label: { \ 1182 static const unsigned Ops[] = \ 1183 { \ 1184 Label##_X, \ 1185 Label##_Y, \ 1186 Label##_Z, \ 1187 Label##_W \ 1188 }; \ 1189 return Ops[Slot]; \ 1190 } 1191 1192static unsigned getSlotedOps(unsigned Op, unsigned Slot) { 1193 switch (Op) { 1194 OPERAND_CASE(AMDGPU::OpName::update_exec_mask) 1195 OPERAND_CASE(AMDGPU::OpName::update_pred) 1196 OPERAND_CASE(AMDGPU::OpName::write) 1197 OPERAND_CASE(AMDGPU::OpName::omod) 1198 OPERAND_CASE(AMDGPU::OpName::dst_rel) 1199 OPERAND_CASE(AMDGPU::OpName::clamp) 1200 OPERAND_CASE(AMDGPU::OpName::src0) 1201 OPERAND_CASE(AMDGPU::OpName::src0_neg) 1202 OPERAND_CASE(AMDGPU::OpName::src0_rel) 1203 OPERAND_CASE(AMDGPU::OpName::src0_abs) 1204 OPERAND_CASE(AMDGPU::OpName::src0_sel) 1205 OPERAND_CASE(AMDGPU::OpName::src1) 1206 OPERAND_CASE(AMDGPU::OpName::src1_neg) 1207 OPERAND_CASE(AMDGPU::OpName::src1_rel) 1208 OPERAND_CASE(AMDGPU::OpName::src1_abs) 1209 OPERAND_CASE(AMDGPU::OpName::src1_sel) 1210 OPERAND_CASE(AMDGPU::OpName::pred_sel) 1211 default: 1212 llvm_unreachable("Wrong Operand"); 1213 } 1214} 1215 1216#undef OPERAND_CASE 1217 1218MachineInstr *R600InstrInfo::buildSlotOfVectorInstruction( 1219 MachineBasicBlock &MBB, MachineInstr *MI, unsigned Slot, unsigned DstReg) 1220 const { 1221 assert (MI->getOpcode() == AMDGPU::DOT_4 && "Not Implemented"); 1222 unsigned Opcode; 1223 const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>(); 1224 if (ST.getGeneration() <= AMDGPUSubtarget::R700) 1225 Opcode = AMDGPU::DOT4_r600; 1226 else 1227 Opcode = AMDGPU::DOT4_eg; 1228 MachineBasicBlock::iterator I = MI; 1229 MachineOperand &Src0 = MI->getOperand( 1230 getOperandIdx(MI->getOpcode(), getSlotedOps(AMDGPU::OpName::src0, Slot))); 1231 MachineOperand &Src1 = MI->getOperand( 1232 getOperandIdx(MI->getOpcode(), getSlotedOps(AMDGPU::OpName::src1, Slot))); 1233 MachineInstr *MIB = buildDefaultInstruction( 1234 MBB, I, Opcode, DstReg, Src0.getReg(), Src1.getReg()); 1235 static const unsigned Operands[14] = { 1236 AMDGPU::OpName::update_exec_mask, 1237 AMDGPU::OpName::update_pred, 1238 AMDGPU::OpName::write, 1239 AMDGPU::OpName::omod, 1240 AMDGPU::OpName::dst_rel, 1241 AMDGPU::OpName::clamp, 1242 AMDGPU::OpName::src0_neg, 1243 AMDGPU::OpName::src0_rel, 1244 AMDGPU::OpName::src0_abs, 1245 AMDGPU::OpName::src0_sel, 1246 AMDGPU::OpName::src1_neg, 1247 AMDGPU::OpName::src1_rel, 1248 AMDGPU::OpName::src1_abs, 1249 AMDGPU::OpName::src1_sel, 1250 }; 1251 1252 MachineOperand &MO = MI->getOperand(getOperandIdx(MI->getOpcode(), 1253 getSlotedOps(AMDGPU::OpName::pred_sel, Slot))); 1254 MIB->getOperand(getOperandIdx(Opcode, AMDGPU::OpName::pred_sel)) 1255 .setReg(MO.getReg()); 1256 1257 for (unsigned i = 0; i < 14; i++) { 1258 MachineOperand &MO = MI->getOperand( 1259 getOperandIdx(MI->getOpcode(), getSlotedOps(Operands[i], Slot))); 1260 assert (MO.isImm()); 1261 setImmOperand(MIB, Operands[i], MO.getImm()); 1262 } 1263 MIB->getOperand(20).setImm(0); 1264 return MIB; 1265} 1266 1267MachineInstr *R600InstrInfo::buildMovImm(MachineBasicBlock &BB, 1268 MachineBasicBlock::iterator I, 1269 unsigned DstReg, 1270 uint64_t Imm) const { 1271 MachineInstr *MovImm = buildDefaultInstruction(BB, I, AMDGPU::MOV, DstReg, 1272 AMDGPU::ALU_LITERAL_X); 1273 setImmOperand(MovImm, AMDGPU::OpName::literal, Imm); 1274 return MovImm; 1275} 1276 1277MachineInstr *R600InstrInfo::buildMovInstr(MachineBasicBlock *MBB, 1278 MachineBasicBlock::iterator I, 1279 unsigned DstReg, unsigned SrcReg) const { 1280 return buildDefaultInstruction(*MBB, I, AMDGPU::MOV, DstReg, SrcReg); 1281} 1282 1283int R600InstrInfo::getOperandIdx(const MachineInstr &MI, unsigned Op) const { 1284 return getOperandIdx(MI.getOpcode(), Op); 1285} 1286 1287int R600InstrInfo::getOperandIdx(unsigned Opcode, unsigned Op) const { 1288 return AMDGPU::getNamedOperandIdx(Opcode, Op); 1289} 1290 1291void R600InstrInfo::setImmOperand(MachineInstr *MI, unsigned Op, 1292 int64_t Imm) const { 1293 int Idx = getOperandIdx(*MI, Op); 1294 assert(Idx != -1 && "Operand not supported for this instruction."); 1295 assert(MI->getOperand(Idx).isImm()); 1296 MI->getOperand(Idx).setImm(Imm); 1297} 1298 1299//===----------------------------------------------------------------------===// 1300// Instruction flag getters/setters 1301//===----------------------------------------------------------------------===// 1302 1303bool R600InstrInfo::hasFlagOperand(const MachineInstr &MI) const { 1304 return GET_FLAG_OPERAND_IDX(get(MI.getOpcode()).TSFlags) != 0; 1305} 1306 1307MachineOperand &R600InstrInfo::getFlagOp(MachineInstr *MI, unsigned SrcIdx, 1308 unsigned Flag) const { 1309 unsigned TargetFlags = get(MI->getOpcode()).TSFlags; 1310 int FlagIndex = 0; 1311 if (Flag != 0) { 1312 // If we pass something other than the default value of Flag to this 1313 // function, it means we are want to set a flag on an instruction 1314 // that uses native encoding. 1315 assert(HAS_NATIVE_OPERANDS(TargetFlags)); 1316 bool IsOP3 = (TargetFlags & R600_InstFlag::OP3) == R600_InstFlag::OP3; 1317 switch (Flag) { 1318 case MO_FLAG_CLAMP: 1319 FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::clamp); 1320 break; 1321 case MO_FLAG_MASK: 1322 FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::write); 1323 break; 1324 case MO_FLAG_NOT_LAST: 1325 case MO_FLAG_LAST: 1326 FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::last); 1327 break; 1328 case MO_FLAG_NEG: 1329 switch (SrcIdx) { 1330 case 0: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src0_neg); break; 1331 case 1: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src1_neg); break; 1332 case 2: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src2_neg); break; 1333 } 1334 break; 1335 1336 case MO_FLAG_ABS: 1337 assert(!IsOP3 && "Cannot set absolute value modifier for OP3 " 1338 "instructions."); 1339 (void)IsOP3; 1340 switch (SrcIdx) { 1341 case 0: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src0_abs); break; 1342 case 1: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src1_abs); break; 1343 } 1344 break; 1345 1346 default: 1347 FlagIndex = -1; 1348 break; 1349 } 1350 assert(FlagIndex != -1 && "Flag not supported for this instruction"); 1351 } else { 1352 FlagIndex = GET_FLAG_OPERAND_IDX(TargetFlags); 1353 assert(FlagIndex != 0 && 1354 "Instruction flags not supported for this instruction"); 1355 } 1356 1357 MachineOperand &FlagOp = MI->getOperand(FlagIndex); 1358 assert(FlagOp.isImm()); 1359 return FlagOp; 1360} 1361 1362void R600InstrInfo::addFlag(MachineInstr *MI, unsigned Operand, 1363 unsigned Flag) const { 1364 unsigned TargetFlags = get(MI->getOpcode()).TSFlags; 1365 if (Flag == 0) { 1366 return; 1367 } 1368 if (HAS_NATIVE_OPERANDS(TargetFlags)) { 1369 MachineOperand &FlagOp = getFlagOp(MI, Operand, Flag); 1370 if (Flag == MO_FLAG_NOT_LAST) { 1371 clearFlag(MI, Operand, MO_FLAG_LAST); 1372 } else if (Flag == MO_FLAG_MASK) { 1373 clearFlag(MI, Operand, Flag); 1374 } else { 1375 FlagOp.setImm(1); 1376 } 1377 } else { 1378 MachineOperand &FlagOp = getFlagOp(MI, Operand); 1379 FlagOp.setImm(FlagOp.getImm() | (Flag << (NUM_MO_FLAGS * Operand))); 1380 } 1381} 1382 1383void R600InstrInfo::clearFlag(MachineInstr *MI, unsigned Operand, 1384 unsigned Flag) const { 1385 unsigned TargetFlags = get(MI->getOpcode()).TSFlags; 1386 if (HAS_NATIVE_OPERANDS(TargetFlags)) { 1387 MachineOperand &FlagOp = getFlagOp(MI, Operand, Flag); 1388 FlagOp.setImm(0); 1389 } else { 1390 MachineOperand &FlagOp = getFlagOp(MI); 1391 unsigned InstFlags = FlagOp.getImm(); 1392 InstFlags &= ~(Flag << (NUM_MO_FLAGS * Operand)); 1393 FlagOp.setImm(InstFlags); 1394 } 1395} 1396