X86InstructionSelector.cpp revision 360784
1//===- X86InstructionSelector.cpp -----------------------------------------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8/// \file 9/// This file implements the targeting of the InstructionSelector class for 10/// X86. 11/// \todo This should be generated by TableGen. 12//===----------------------------------------------------------------------===// 13 14#include "MCTargetDesc/X86BaseInfo.h" 15#include "X86InstrBuilder.h" 16#include "X86InstrInfo.h" 17#include "X86RegisterBankInfo.h" 18#include "X86RegisterInfo.h" 19#include "X86Subtarget.h" 20#include "X86TargetMachine.h" 21#include "llvm/CodeGen/GlobalISel/InstructionSelector.h" 22#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h" 23#include "llvm/CodeGen/GlobalISel/RegisterBank.h" 24#include "llvm/CodeGen/GlobalISel/Utils.h" 25#include "llvm/CodeGen/MachineBasicBlock.h" 26#include "llvm/CodeGen/MachineConstantPool.h" 27#include "llvm/CodeGen/MachineFunction.h" 28#include "llvm/CodeGen/MachineInstr.h" 29#include "llvm/CodeGen/MachineInstrBuilder.h" 30#include "llvm/CodeGen/MachineMemOperand.h" 31#include "llvm/CodeGen/MachineOperand.h" 32#include "llvm/CodeGen/MachineRegisterInfo.h" 33#include "llvm/CodeGen/TargetOpcodes.h" 34#include "llvm/CodeGen/TargetRegisterInfo.h" 35#include "llvm/IR/DataLayout.h" 36#include "llvm/IR/InstrTypes.h" 37#include "llvm/IR/IntrinsicsX86.h" 38#include "llvm/Support/AtomicOrdering.h" 39#include "llvm/Support/CodeGen.h" 40#include "llvm/Support/Debug.h" 41#include "llvm/Support/ErrorHandling.h" 42#include "llvm/Support/LowLevelTypeImpl.h" 43#include "llvm/Support/MathExtras.h" 44#include "llvm/Support/raw_ostream.h" 45#include <cassert> 46#include <cstdint> 47#include <tuple> 48 49#define DEBUG_TYPE "X86-isel" 50 51using namespace llvm; 52 53namespace { 54 55#define GET_GLOBALISEL_PREDICATE_BITSET 56#include "X86GenGlobalISel.inc" 57#undef GET_GLOBALISEL_PREDICATE_BITSET 58 59class X86InstructionSelector : public InstructionSelector { 60public: 61 X86InstructionSelector(const X86TargetMachine &TM, const X86Subtarget &STI, 62 const X86RegisterBankInfo &RBI); 63 64 bool select(MachineInstr &I) override; 65 static const char *getName() { return DEBUG_TYPE; } 66 67private: 68 /// tblgen-erated 'select' implementation, used as the initial selector for 69 /// the patterns that don't require complex C++. 70 bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const; 71 72 // TODO: remove after supported by Tablegen-erated instruction selection. 73 unsigned getLoadStoreOp(const LLT &Ty, const RegisterBank &RB, unsigned Opc, 74 uint64_t Alignment) const; 75 76 bool selectLoadStoreOp(MachineInstr &I, MachineRegisterInfo &MRI, 77 MachineFunction &MF) const; 78 bool selectFrameIndexOrGep(MachineInstr &I, MachineRegisterInfo &MRI, 79 MachineFunction &MF) const; 80 bool selectGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI, 81 MachineFunction &MF) const; 82 bool selectConstant(MachineInstr &I, MachineRegisterInfo &MRI, 83 MachineFunction &MF) const; 84 bool selectTruncOrPtrToInt(MachineInstr &I, MachineRegisterInfo &MRI, 85 MachineFunction &MF) const; 86 bool selectZext(MachineInstr &I, MachineRegisterInfo &MRI, 87 MachineFunction &MF) const; 88 bool selectAnyext(MachineInstr &I, MachineRegisterInfo &MRI, 89 MachineFunction &MF) const; 90 bool selectCmp(MachineInstr &I, MachineRegisterInfo &MRI, 91 MachineFunction &MF) const; 92 bool selectFCmp(MachineInstr &I, MachineRegisterInfo &MRI, 93 MachineFunction &MF) const; 94 bool selectUadde(MachineInstr &I, MachineRegisterInfo &MRI, 95 MachineFunction &MF) const; 96 bool selectCopy(MachineInstr &I, MachineRegisterInfo &MRI) const; 97 bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI, 98 MachineFunction &MF); 99 bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI, 100 MachineFunction &MF); 101 bool selectInsert(MachineInstr &I, MachineRegisterInfo &MRI, 102 MachineFunction &MF) const; 103 bool selectExtract(MachineInstr &I, MachineRegisterInfo &MRI, 104 MachineFunction &MF) const; 105 bool selectCondBranch(MachineInstr &I, MachineRegisterInfo &MRI, 106 MachineFunction &MF) const; 107 bool selectTurnIntoCOPY(MachineInstr &I, MachineRegisterInfo &MRI, 108 const unsigned DstReg, 109 const TargetRegisterClass *DstRC, 110 const unsigned SrcReg, 111 const TargetRegisterClass *SrcRC) const; 112 bool materializeFP(MachineInstr &I, MachineRegisterInfo &MRI, 113 MachineFunction &MF) const; 114 bool selectImplicitDefOrPHI(MachineInstr &I, MachineRegisterInfo &MRI) const; 115 bool selectDivRem(MachineInstr &I, MachineRegisterInfo &MRI, 116 MachineFunction &MF) const; 117 bool selectIntrinsicWSideEffects(MachineInstr &I, MachineRegisterInfo &MRI, 118 MachineFunction &MF) const; 119 120 // emit insert subreg instruction and insert it before MachineInstr &I 121 bool emitInsertSubreg(unsigned DstReg, unsigned SrcReg, MachineInstr &I, 122 MachineRegisterInfo &MRI, MachineFunction &MF) const; 123 // emit extract subreg instruction and insert it before MachineInstr &I 124 bool emitExtractSubreg(unsigned DstReg, unsigned SrcReg, MachineInstr &I, 125 MachineRegisterInfo &MRI, MachineFunction &MF) const; 126 127 const TargetRegisterClass *getRegClass(LLT Ty, const RegisterBank &RB) const; 128 const TargetRegisterClass *getRegClass(LLT Ty, unsigned Reg, 129 MachineRegisterInfo &MRI) const; 130 131 const X86TargetMachine &TM; 132 const X86Subtarget &STI; 133 const X86InstrInfo &TII; 134 const X86RegisterInfo &TRI; 135 const X86RegisterBankInfo &RBI; 136 137#define GET_GLOBALISEL_PREDICATES_DECL 138#include "X86GenGlobalISel.inc" 139#undef GET_GLOBALISEL_PREDICATES_DECL 140 141#define GET_GLOBALISEL_TEMPORARIES_DECL 142#include "X86GenGlobalISel.inc" 143#undef GET_GLOBALISEL_TEMPORARIES_DECL 144}; 145 146} // end anonymous namespace 147 148#define GET_GLOBALISEL_IMPL 149#include "X86GenGlobalISel.inc" 150#undef GET_GLOBALISEL_IMPL 151 152X86InstructionSelector::X86InstructionSelector(const X86TargetMachine &TM, 153 const X86Subtarget &STI, 154 const X86RegisterBankInfo &RBI) 155 : InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()), 156 TRI(*STI.getRegisterInfo()), RBI(RBI), 157#define GET_GLOBALISEL_PREDICATES_INIT 158#include "X86GenGlobalISel.inc" 159#undef GET_GLOBALISEL_PREDICATES_INIT 160#define GET_GLOBALISEL_TEMPORARIES_INIT 161#include "X86GenGlobalISel.inc" 162#undef GET_GLOBALISEL_TEMPORARIES_INIT 163{ 164} 165 166// FIXME: This should be target-independent, inferred from the types declared 167// for each class in the bank. 168const TargetRegisterClass * 169X86InstructionSelector::getRegClass(LLT Ty, const RegisterBank &RB) const { 170 if (RB.getID() == X86::GPRRegBankID) { 171 if (Ty.getSizeInBits() <= 8) 172 return &X86::GR8RegClass; 173 if (Ty.getSizeInBits() == 16) 174 return &X86::GR16RegClass; 175 if (Ty.getSizeInBits() == 32) 176 return &X86::GR32RegClass; 177 if (Ty.getSizeInBits() == 64) 178 return &X86::GR64RegClass; 179 } 180 if (RB.getID() == X86::VECRRegBankID) { 181 if (Ty.getSizeInBits() == 32) 182 return STI.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass; 183 if (Ty.getSizeInBits() == 64) 184 return STI.hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass; 185 if (Ty.getSizeInBits() == 128) 186 return STI.hasAVX512() ? &X86::VR128XRegClass : &X86::VR128RegClass; 187 if (Ty.getSizeInBits() == 256) 188 return STI.hasAVX512() ? &X86::VR256XRegClass : &X86::VR256RegClass; 189 if (Ty.getSizeInBits() == 512) 190 return &X86::VR512RegClass; 191 } 192 193 llvm_unreachable("Unknown RegBank!"); 194} 195 196const TargetRegisterClass * 197X86InstructionSelector::getRegClass(LLT Ty, unsigned Reg, 198 MachineRegisterInfo &MRI) const { 199 const RegisterBank &RegBank = *RBI.getRegBank(Reg, MRI, TRI); 200 return getRegClass(Ty, RegBank); 201} 202 203static unsigned getSubRegIndex(const TargetRegisterClass *RC) { 204 unsigned SubIdx = X86::NoSubRegister; 205 if (RC == &X86::GR32RegClass) { 206 SubIdx = X86::sub_32bit; 207 } else if (RC == &X86::GR16RegClass) { 208 SubIdx = X86::sub_16bit; 209 } else if (RC == &X86::GR8RegClass) { 210 SubIdx = X86::sub_8bit; 211 } 212 213 return SubIdx; 214} 215 216static const TargetRegisterClass *getRegClassFromGRPhysReg(unsigned Reg) { 217 assert(Register::isPhysicalRegister(Reg)); 218 if (X86::GR64RegClass.contains(Reg)) 219 return &X86::GR64RegClass; 220 if (X86::GR32RegClass.contains(Reg)) 221 return &X86::GR32RegClass; 222 if (X86::GR16RegClass.contains(Reg)) 223 return &X86::GR16RegClass; 224 if (X86::GR8RegClass.contains(Reg)) 225 return &X86::GR8RegClass; 226 227 llvm_unreachable("Unknown RegClass for PhysReg!"); 228} 229 230// Set X86 Opcode and constrain DestReg. 231bool X86InstructionSelector::selectCopy(MachineInstr &I, 232 MachineRegisterInfo &MRI) const { 233 Register DstReg = I.getOperand(0).getReg(); 234 const unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI); 235 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI); 236 237 Register SrcReg = I.getOperand(1).getReg(); 238 const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI); 239 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI); 240 241 if (Register::isPhysicalRegister(DstReg)) { 242 assert(I.isCopy() && "Generic operators do not allow physical registers"); 243 244 if (DstSize > SrcSize && SrcRegBank.getID() == X86::GPRRegBankID && 245 DstRegBank.getID() == X86::GPRRegBankID) { 246 247 const TargetRegisterClass *SrcRC = 248 getRegClass(MRI.getType(SrcReg), SrcRegBank); 249 const TargetRegisterClass *DstRC = getRegClassFromGRPhysReg(DstReg); 250 251 if (SrcRC != DstRC) { 252 // This case can be generated by ABI lowering, performe anyext 253 Register ExtSrc = MRI.createVirtualRegister(DstRC); 254 BuildMI(*I.getParent(), I, I.getDebugLoc(), 255 TII.get(TargetOpcode::SUBREG_TO_REG)) 256 .addDef(ExtSrc) 257 .addImm(0) 258 .addReg(SrcReg) 259 .addImm(getSubRegIndex(SrcRC)); 260 261 I.getOperand(1).setReg(ExtSrc); 262 } 263 } 264 265 return true; 266 } 267 268 assert((!Register::isPhysicalRegister(SrcReg) || I.isCopy()) && 269 "No phys reg on generic operators"); 270 assert((DstSize == SrcSize || 271 // Copies are a mean to setup initial types, the number of 272 // bits may not exactly match. 273 (Register::isPhysicalRegister(SrcReg) && 274 DstSize <= RBI.getSizeInBits(SrcReg, MRI, TRI))) && 275 "Copy with different width?!"); 276 277 const TargetRegisterClass *DstRC = 278 getRegClass(MRI.getType(DstReg), DstRegBank); 279 280 if (SrcRegBank.getID() == X86::GPRRegBankID && 281 DstRegBank.getID() == X86::GPRRegBankID && SrcSize > DstSize && 282 Register::isPhysicalRegister(SrcReg)) { 283 // Change the physical register to performe truncate. 284 285 const TargetRegisterClass *SrcRC = getRegClassFromGRPhysReg(SrcReg); 286 287 if (DstRC != SrcRC) { 288 I.getOperand(1).setSubReg(getSubRegIndex(DstRC)); 289 I.getOperand(1).substPhysReg(SrcReg, TRI); 290 } 291 } 292 293 // No need to constrain SrcReg. It will get constrained when 294 // we hit another of its use or its defs. 295 // Copies do not have constraints. 296 const TargetRegisterClass *OldRC = MRI.getRegClassOrNull(DstReg); 297 if (!OldRC || !DstRC->hasSubClassEq(OldRC)) { 298 if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) { 299 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode()) 300 << " operand\n"); 301 return false; 302 } 303 } 304 I.setDesc(TII.get(X86::COPY)); 305 return true; 306} 307 308bool X86InstructionSelector::select(MachineInstr &I) { 309 assert(I.getParent() && "Instruction should be in a basic block!"); 310 assert(I.getParent()->getParent() && "Instruction should be in a function!"); 311 312 MachineBasicBlock &MBB = *I.getParent(); 313 MachineFunction &MF = *MBB.getParent(); 314 MachineRegisterInfo &MRI = MF.getRegInfo(); 315 316 unsigned Opcode = I.getOpcode(); 317 if (!isPreISelGenericOpcode(Opcode)) { 318 // Certain non-generic instructions also need some special handling. 319 320 if (Opcode == TargetOpcode::LOAD_STACK_GUARD) 321 return false; 322 323 if (I.isCopy()) 324 return selectCopy(I, MRI); 325 326 return true; 327 } 328 329 assert(I.getNumOperands() == I.getNumExplicitOperands() && 330 "Generic instruction has unexpected implicit operands\n"); 331 332 if (selectImpl(I, *CoverageInfo)) 333 return true; 334 335 LLVM_DEBUG(dbgs() << " C++ instruction selection: "; I.print(dbgs())); 336 337 // TODO: This should be implemented by tblgen. 338 switch (I.getOpcode()) { 339 default: 340 return false; 341 case TargetOpcode::G_STORE: 342 case TargetOpcode::G_LOAD: 343 return selectLoadStoreOp(I, MRI, MF); 344 case TargetOpcode::G_PTR_ADD: 345 case TargetOpcode::G_FRAME_INDEX: 346 return selectFrameIndexOrGep(I, MRI, MF); 347 case TargetOpcode::G_GLOBAL_VALUE: 348 return selectGlobalValue(I, MRI, MF); 349 case TargetOpcode::G_CONSTANT: 350 return selectConstant(I, MRI, MF); 351 case TargetOpcode::G_FCONSTANT: 352 return materializeFP(I, MRI, MF); 353 case TargetOpcode::G_PTRTOINT: 354 case TargetOpcode::G_TRUNC: 355 return selectTruncOrPtrToInt(I, MRI, MF); 356 case TargetOpcode::G_INTTOPTR: 357 return selectCopy(I, MRI); 358 case TargetOpcode::G_ZEXT: 359 return selectZext(I, MRI, MF); 360 case TargetOpcode::G_ANYEXT: 361 return selectAnyext(I, MRI, MF); 362 case TargetOpcode::G_ICMP: 363 return selectCmp(I, MRI, MF); 364 case TargetOpcode::G_FCMP: 365 return selectFCmp(I, MRI, MF); 366 case TargetOpcode::G_UADDE: 367 return selectUadde(I, MRI, MF); 368 case TargetOpcode::G_UNMERGE_VALUES: 369 return selectUnmergeValues(I, MRI, MF); 370 case TargetOpcode::G_MERGE_VALUES: 371 case TargetOpcode::G_CONCAT_VECTORS: 372 return selectMergeValues(I, MRI, MF); 373 case TargetOpcode::G_EXTRACT: 374 return selectExtract(I, MRI, MF); 375 case TargetOpcode::G_INSERT: 376 return selectInsert(I, MRI, MF); 377 case TargetOpcode::G_BRCOND: 378 return selectCondBranch(I, MRI, MF); 379 case TargetOpcode::G_IMPLICIT_DEF: 380 case TargetOpcode::G_PHI: 381 return selectImplicitDefOrPHI(I, MRI); 382 case TargetOpcode::G_SDIV: 383 case TargetOpcode::G_UDIV: 384 case TargetOpcode::G_SREM: 385 case TargetOpcode::G_UREM: 386 return selectDivRem(I, MRI, MF); 387 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: 388 return selectIntrinsicWSideEffects(I, MRI, MF); 389 } 390 391 return false; 392} 393 394unsigned X86InstructionSelector::getLoadStoreOp(const LLT &Ty, 395 const RegisterBank &RB, 396 unsigned Opc, 397 uint64_t Alignment) const { 398 bool Isload = (Opc == TargetOpcode::G_LOAD); 399 bool HasAVX = STI.hasAVX(); 400 bool HasAVX512 = STI.hasAVX512(); 401 bool HasVLX = STI.hasVLX(); 402 403 if (Ty == LLT::scalar(8)) { 404 if (X86::GPRRegBankID == RB.getID()) 405 return Isload ? X86::MOV8rm : X86::MOV8mr; 406 } else if (Ty == LLT::scalar(16)) { 407 if (X86::GPRRegBankID == RB.getID()) 408 return Isload ? X86::MOV16rm : X86::MOV16mr; 409 } else if (Ty == LLT::scalar(32) || Ty == LLT::pointer(0, 32)) { 410 if (X86::GPRRegBankID == RB.getID()) 411 return Isload ? X86::MOV32rm : X86::MOV32mr; 412 if (X86::VECRRegBankID == RB.getID()) 413 return Isload ? (HasAVX512 ? X86::VMOVSSZrm_alt : 414 HasAVX ? X86::VMOVSSrm_alt : 415 X86::MOVSSrm_alt) 416 : (HasAVX512 ? X86::VMOVSSZmr : 417 HasAVX ? X86::VMOVSSmr : 418 X86::MOVSSmr); 419 } else if (Ty == LLT::scalar(64) || Ty == LLT::pointer(0, 64)) { 420 if (X86::GPRRegBankID == RB.getID()) 421 return Isload ? X86::MOV64rm : X86::MOV64mr; 422 if (X86::VECRRegBankID == RB.getID()) 423 return Isload ? (HasAVX512 ? X86::VMOVSDZrm_alt : 424 HasAVX ? X86::VMOVSDrm_alt : 425 X86::MOVSDrm_alt) 426 : (HasAVX512 ? X86::VMOVSDZmr : 427 HasAVX ? X86::VMOVSDmr : 428 X86::MOVSDmr); 429 } else if (Ty.isVector() && Ty.getSizeInBits() == 128) { 430 if (Alignment >= 16) 431 return Isload ? (HasVLX ? X86::VMOVAPSZ128rm 432 : HasAVX512 433 ? X86::VMOVAPSZ128rm_NOVLX 434 : HasAVX ? X86::VMOVAPSrm : X86::MOVAPSrm) 435 : (HasVLX ? X86::VMOVAPSZ128mr 436 : HasAVX512 437 ? X86::VMOVAPSZ128mr_NOVLX 438 : HasAVX ? X86::VMOVAPSmr : X86::MOVAPSmr); 439 else 440 return Isload ? (HasVLX ? X86::VMOVUPSZ128rm 441 : HasAVX512 442 ? X86::VMOVUPSZ128rm_NOVLX 443 : HasAVX ? X86::VMOVUPSrm : X86::MOVUPSrm) 444 : (HasVLX ? X86::VMOVUPSZ128mr 445 : HasAVX512 446 ? X86::VMOVUPSZ128mr_NOVLX 447 : HasAVX ? X86::VMOVUPSmr : X86::MOVUPSmr); 448 } else if (Ty.isVector() && Ty.getSizeInBits() == 256) { 449 if (Alignment >= 32) 450 return Isload ? (HasVLX ? X86::VMOVAPSZ256rm 451 : HasAVX512 ? X86::VMOVAPSZ256rm_NOVLX 452 : X86::VMOVAPSYrm) 453 : (HasVLX ? X86::VMOVAPSZ256mr 454 : HasAVX512 ? X86::VMOVAPSZ256mr_NOVLX 455 : X86::VMOVAPSYmr); 456 else 457 return Isload ? (HasVLX ? X86::VMOVUPSZ256rm 458 : HasAVX512 ? X86::VMOVUPSZ256rm_NOVLX 459 : X86::VMOVUPSYrm) 460 : (HasVLX ? X86::VMOVUPSZ256mr 461 : HasAVX512 ? X86::VMOVUPSZ256mr_NOVLX 462 : X86::VMOVUPSYmr); 463 } else if (Ty.isVector() && Ty.getSizeInBits() == 512) { 464 if (Alignment >= 64) 465 return Isload ? X86::VMOVAPSZrm : X86::VMOVAPSZmr; 466 else 467 return Isload ? X86::VMOVUPSZrm : X86::VMOVUPSZmr; 468 } 469 return Opc; 470} 471 472// Fill in an address from the given instruction. 473static void X86SelectAddress(const MachineInstr &I, 474 const MachineRegisterInfo &MRI, 475 X86AddressMode &AM) { 476 assert(I.getOperand(0).isReg() && "unsupported opperand."); 477 assert(MRI.getType(I.getOperand(0).getReg()).isPointer() && 478 "unsupported type."); 479 480 if (I.getOpcode() == TargetOpcode::G_PTR_ADD) { 481 if (auto COff = getConstantVRegVal(I.getOperand(2).getReg(), MRI)) { 482 int64_t Imm = *COff; 483 if (isInt<32>(Imm)) { // Check for displacement overflow. 484 AM.Disp = static_cast<int32_t>(Imm); 485 AM.Base.Reg = I.getOperand(1).getReg(); 486 return; 487 } 488 } 489 } else if (I.getOpcode() == TargetOpcode::G_FRAME_INDEX) { 490 AM.Base.FrameIndex = I.getOperand(1).getIndex(); 491 AM.BaseType = X86AddressMode::FrameIndexBase; 492 return; 493 } 494 495 // Default behavior. 496 AM.Base.Reg = I.getOperand(0).getReg(); 497} 498 499bool X86InstructionSelector::selectLoadStoreOp(MachineInstr &I, 500 MachineRegisterInfo &MRI, 501 MachineFunction &MF) const { 502 unsigned Opc = I.getOpcode(); 503 504 assert((Opc == TargetOpcode::G_STORE || Opc == TargetOpcode::G_LOAD) && 505 "unexpected instruction"); 506 507 const Register DefReg = I.getOperand(0).getReg(); 508 LLT Ty = MRI.getType(DefReg); 509 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI); 510 511 assert(I.hasOneMemOperand()); 512 auto &MemOp = **I.memoperands_begin(); 513 if (MemOp.isAtomic()) { 514 // Note: for unordered operations, we rely on the fact the appropriate MMO 515 // is already on the instruction we're mutating, and thus we don't need to 516 // make any changes. So long as we select an opcode which is capable of 517 // loading or storing the appropriate size atomically, the rest of the 518 // backend is required to respect the MMO state. 519 if (!MemOp.isUnordered()) { 520 LLVM_DEBUG(dbgs() << "Atomic ordering not supported yet\n"); 521 return false; 522 } 523 if (MemOp.getAlignment() < Ty.getSizeInBits()/8) { 524 LLVM_DEBUG(dbgs() << "Unaligned atomics not supported yet\n"); 525 return false; 526 } 527 } 528 529 unsigned NewOpc = getLoadStoreOp(Ty, RB, Opc, MemOp.getAlignment()); 530 if (NewOpc == Opc) 531 return false; 532 533 X86AddressMode AM; 534 X86SelectAddress(*MRI.getVRegDef(I.getOperand(1).getReg()), MRI, AM); 535 536 I.setDesc(TII.get(NewOpc)); 537 MachineInstrBuilder MIB(MF, I); 538 if (Opc == TargetOpcode::G_LOAD) { 539 I.RemoveOperand(1); 540 addFullAddress(MIB, AM); 541 } else { 542 // G_STORE (VAL, Addr), X86Store instruction (Addr, VAL) 543 I.RemoveOperand(1); 544 I.RemoveOperand(0); 545 addFullAddress(MIB, AM).addUse(DefReg); 546 } 547 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 548} 549 550static unsigned getLeaOP(LLT Ty, const X86Subtarget &STI) { 551 if (Ty == LLT::pointer(0, 64)) 552 return X86::LEA64r; 553 else if (Ty == LLT::pointer(0, 32)) 554 return STI.isTarget64BitILP32() ? X86::LEA64_32r : X86::LEA32r; 555 else 556 llvm_unreachable("Can't get LEA opcode. Unsupported type."); 557} 558 559bool X86InstructionSelector::selectFrameIndexOrGep(MachineInstr &I, 560 MachineRegisterInfo &MRI, 561 MachineFunction &MF) const { 562 unsigned Opc = I.getOpcode(); 563 564 assert((Opc == TargetOpcode::G_FRAME_INDEX || Opc == TargetOpcode::G_PTR_ADD) && 565 "unexpected instruction"); 566 567 const Register DefReg = I.getOperand(0).getReg(); 568 LLT Ty = MRI.getType(DefReg); 569 570 // Use LEA to calculate frame index and GEP 571 unsigned NewOpc = getLeaOP(Ty, STI); 572 I.setDesc(TII.get(NewOpc)); 573 MachineInstrBuilder MIB(MF, I); 574 575 if (Opc == TargetOpcode::G_FRAME_INDEX) { 576 addOffset(MIB, 0); 577 } else { 578 MachineOperand &InxOp = I.getOperand(2); 579 I.addOperand(InxOp); // set IndexReg 580 InxOp.ChangeToImmediate(1); // set Scale 581 MIB.addImm(0).addReg(0); 582 } 583 584 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 585} 586 587bool X86InstructionSelector::selectGlobalValue(MachineInstr &I, 588 MachineRegisterInfo &MRI, 589 MachineFunction &MF) const { 590 assert((I.getOpcode() == TargetOpcode::G_GLOBAL_VALUE) && 591 "unexpected instruction"); 592 593 auto GV = I.getOperand(1).getGlobal(); 594 if (GV->isThreadLocal()) { 595 return false; // TODO: we don't support TLS yet. 596 } 597 598 // Can't handle alternate code models yet. 599 if (TM.getCodeModel() != CodeModel::Small) 600 return false; 601 602 X86AddressMode AM; 603 AM.GV = GV; 604 AM.GVOpFlags = STI.classifyGlobalReference(GV); 605 606 // TODO: The ABI requires an extra load. not supported yet. 607 if (isGlobalStubReference(AM.GVOpFlags)) 608 return false; 609 610 // TODO: This reference is relative to the pic base. not supported yet. 611 if (isGlobalRelativeToPICBase(AM.GVOpFlags)) 612 return false; 613 614 if (STI.isPICStyleRIPRel()) { 615 // Use rip-relative addressing. 616 assert(AM.Base.Reg == 0 && AM.IndexReg == 0); 617 AM.Base.Reg = X86::RIP; 618 } 619 620 const Register DefReg = I.getOperand(0).getReg(); 621 LLT Ty = MRI.getType(DefReg); 622 unsigned NewOpc = getLeaOP(Ty, STI); 623 624 I.setDesc(TII.get(NewOpc)); 625 MachineInstrBuilder MIB(MF, I); 626 627 I.RemoveOperand(1); 628 addFullAddress(MIB, AM); 629 630 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 631} 632 633bool X86InstructionSelector::selectConstant(MachineInstr &I, 634 MachineRegisterInfo &MRI, 635 MachineFunction &MF) const { 636 assert((I.getOpcode() == TargetOpcode::G_CONSTANT) && 637 "unexpected instruction"); 638 639 const Register DefReg = I.getOperand(0).getReg(); 640 LLT Ty = MRI.getType(DefReg); 641 642 if (RBI.getRegBank(DefReg, MRI, TRI)->getID() != X86::GPRRegBankID) 643 return false; 644 645 uint64_t Val = 0; 646 if (I.getOperand(1).isCImm()) { 647 Val = I.getOperand(1).getCImm()->getZExtValue(); 648 I.getOperand(1).ChangeToImmediate(Val); 649 } else if (I.getOperand(1).isImm()) { 650 Val = I.getOperand(1).getImm(); 651 } else 652 llvm_unreachable("Unsupported operand type."); 653 654 unsigned NewOpc; 655 switch (Ty.getSizeInBits()) { 656 case 8: 657 NewOpc = X86::MOV8ri; 658 break; 659 case 16: 660 NewOpc = X86::MOV16ri; 661 break; 662 case 32: 663 NewOpc = X86::MOV32ri; 664 break; 665 case 64: 666 // TODO: in case isUInt<32>(Val), X86::MOV32ri can be used 667 if (isInt<32>(Val)) 668 NewOpc = X86::MOV64ri32; 669 else 670 NewOpc = X86::MOV64ri; 671 break; 672 default: 673 llvm_unreachable("Can't select G_CONSTANT, unsupported type."); 674 } 675 676 I.setDesc(TII.get(NewOpc)); 677 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 678} 679 680// Helper function for selectTruncOrPtrToInt and selectAnyext. 681// Returns true if DstRC lives on a floating register class and 682// SrcRC lives on a 128-bit vector class. 683static bool canTurnIntoCOPY(const TargetRegisterClass *DstRC, 684 const TargetRegisterClass *SrcRC) { 685 return (DstRC == &X86::FR32RegClass || DstRC == &X86::FR32XRegClass || 686 DstRC == &X86::FR64RegClass || DstRC == &X86::FR64XRegClass) && 687 (SrcRC == &X86::VR128RegClass || SrcRC == &X86::VR128XRegClass); 688} 689 690bool X86InstructionSelector::selectTurnIntoCOPY( 691 MachineInstr &I, MachineRegisterInfo &MRI, const unsigned DstReg, 692 const TargetRegisterClass *DstRC, const unsigned SrcReg, 693 const TargetRegisterClass *SrcRC) const { 694 695 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) || 696 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) { 697 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode()) 698 << " operand\n"); 699 return false; 700 } 701 I.setDesc(TII.get(X86::COPY)); 702 return true; 703} 704 705bool X86InstructionSelector::selectTruncOrPtrToInt(MachineInstr &I, 706 MachineRegisterInfo &MRI, 707 MachineFunction &MF) const { 708 assert((I.getOpcode() == TargetOpcode::G_TRUNC || 709 I.getOpcode() == TargetOpcode::G_PTRTOINT) && 710 "unexpected instruction"); 711 712 const Register DstReg = I.getOperand(0).getReg(); 713 const Register SrcReg = I.getOperand(1).getReg(); 714 715 const LLT DstTy = MRI.getType(DstReg); 716 const LLT SrcTy = MRI.getType(SrcReg); 717 718 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI); 719 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI); 720 721 if (DstRB.getID() != SrcRB.getID()) { 722 LLVM_DEBUG(dbgs() << TII.getName(I.getOpcode()) 723 << " input/output on different banks\n"); 724 return false; 725 } 726 727 const TargetRegisterClass *DstRC = getRegClass(DstTy, DstRB); 728 const TargetRegisterClass *SrcRC = getRegClass(SrcTy, SrcRB); 729 730 if (!DstRC || !SrcRC) 731 return false; 732 733 // If that's truncation of the value that lives on the vector class and goes 734 // into the floating class, just replace it with copy, as we are able to 735 // select it as a regular move. 736 if (canTurnIntoCOPY(DstRC, SrcRC)) 737 return selectTurnIntoCOPY(I, MRI, DstReg, DstRC, SrcReg, SrcRC); 738 739 if (DstRB.getID() != X86::GPRRegBankID) 740 return false; 741 742 unsigned SubIdx; 743 if (DstRC == SrcRC) { 744 // Nothing to be done 745 SubIdx = X86::NoSubRegister; 746 } else if (DstRC == &X86::GR32RegClass) { 747 SubIdx = X86::sub_32bit; 748 } else if (DstRC == &X86::GR16RegClass) { 749 SubIdx = X86::sub_16bit; 750 } else if (DstRC == &X86::GR8RegClass) { 751 SubIdx = X86::sub_8bit; 752 } else { 753 return false; 754 } 755 756 SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubIdx); 757 758 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) || 759 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) { 760 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode()) 761 << "\n"); 762 return false; 763 } 764 765 I.getOperand(1).setSubReg(SubIdx); 766 767 I.setDesc(TII.get(X86::COPY)); 768 return true; 769} 770 771bool X86InstructionSelector::selectZext(MachineInstr &I, 772 MachineRegisterInfo &MRI, 773 MachineFunction &MF) const { 774 assert((I.getOpcode() == TargetOpcode::G_ZEXT) && "unexpected instruction"); 775 776 const Register DstReg = I.getOperand(0).getReg(); 777 const Register SrcReg = I.getOperand(1).getReg(); 778 779 const LLT DstTy = MRI.getType(DstReg); 780 const LLT SrcTy = MRI.getType(SrcReg); 781 782 assert(!(SrcTy == LLT::scalar(8) && DstTy == LLT::scalar(32)) && 783 "8=>32 Zext is handled by tablegen"); 784 assert(!(SrcTy == LLT::scalar(16) && DstTy == LLT::scalar(32)) && 785 "16=>32 Zext is handled by tablegen"); 786 787 const static struct ZextEntry { 788 LLT SrcTy; 789 LLT DstTy; 790 unsigned MovOp; 791 bool NeedSubregToReg; 792 } OpTable[] = { 793 {LLT::scalar(8), LLT::scalar(16), X86::MOVZX16rr8, false}, // i8 => i16 794 {LLT::scalar(8), LLT::scalar(64), X86::MOVZX32rr8, true}, // i8 => i64 795 {LLT::scalar(16), LLT::scalar(64), X86::MOVZX32rr16, true}, // i16 => i64 796 {LLT::scalar(32), LLT::scalar(64), 0, true} // i32 => i64 797 }; 798 799 auto ZextEntryIt = 800 std::find_if(std::begin(OpTable), std::end(OpTable), 801 [SrcTy, DstTy](const ZextEntry &El) { 802 return El.DstTy == DstTy && El.SrcTy == SrcTy; 803 }); 804 805 // Here we try to select Zext into a MOVZ and/or SUBREG_TO_REG instruction. 806 if (ZextEntryIt != std::end(OpTable)) { 807 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI); 808 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI); 809 const TargetRegisterClass *DstRC = getRegClass(DstTy, DstRB); 810 const TargetRegisterClass *SrcRC = getRegClass(SrcTy, SrcRB); 811 812 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) || 813 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) { 814 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode()) 815 << " operand\n"); 816 return false; 817 } 818 819 unsigned TransitRegTo = DstReg; 820 unsigned TransitRegFrom = SrcReg; 821 if (ZextEntryIt->MovOp) { 822 // If we select Zext into MOVZ + SUBREG_TO_REG, we need to have 823 // a transit register in between: create it here. 824 if (ZextEntryIt->NeedSubregToReg) { 825 TransitRegFrom = MRI.createVirtualRegister( 826 getRegClass(LLT::scalar(32), DstReg, MRI)); 827 TransitRegTo = TransitRegFrom; 828 } 829 830 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(ZextEntryIt->MovOp)) 831 .addDef(TransitRegTo) 832 .addReg(SrcReg); 833 } 834 if (ZextEntryIt->NeedSubregToReg) { 835 BuildMI(*I.getParent(), I, I.getDebugLoc(), 836 TII.get(TargetOpcode::SUBREG_TO_REG)) 837 .addDef(DstReg) 838 .addImm(0) 839 .addReg(TransitRegFrom) 840 .addImm(X86::sub_32bit); 841 } 842 I.eraseFromParent(); 843 return true; 844 } 845 846 if (SrcTy != LLT::scalar(1)) 847 return false; 848 849 unsigned AndOpc; 850 if (DstTy == LLT::scalar(8)) 851 AndOpc = X86::AND8ri; 852 else if (DstTy == LLT::scalar(16)) 853 AndOpc = X86::AND16ri8; 854 else if (DstTy == LLT::scalar(32)) 855 AndOpc = X86::AND32ri8; 856 else if (DstTy == LLT::scalar(64)) 857 AndOpc = X86::AND64ri8; 858 else 859 return false; 860 861 unsigned DefReg = SrcReg; 862 if (DstTy != LLT::scalar(8)) { 863 DefReg = MRI.createVirtualRegister(getRegClass(DstTy, DstReg, MRI)); 864 BuildMI(*I.getParent(), I, I.getDebugLoc(), 865 TII.get(TargetOpcode::SUBREG_TO_REG), DefReg) 866 .addImm(0) 867 .addReg(SrcReg) 868 .addImm(X86::sub_8bit); 869 } 870 871 MachineInstr &AndInst = 872 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AndOpc), DstReg) 873 .addReg(DefReg) 874 .addImm(1); 875 876 constrainSelectedInstRegOperands(AndInst, TII, TRI, RBI); 877 878 I.eraseFromParent(); 879 return true; 880} 881 882bool X86InstructionSelector::selectAnyext(MachineInstr &I, 883 MachineRegisterInfo &MRI, 884 MachineFunction &MF) const { 885 assert((I.getOpcode() == TargetOpcode::G_ANYEXT) && "unexpected instruction"); 886 887 const Register DstReg = I.getOperand(0).getReg(); 888 const Register SrcReg = I.getOperand(1).getReg(); 889 890 const LLT DstTy = MRI.getType(DstReg); 891 const LLT SrcTy = MRI.getType(SrcReg); 892 893 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI); 894 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI); 895 896 assert(DstRB.getID() == SrcRB.getID() && 897 "G_ANYEXT input/output on different banks\n"); 898 899 assert(DstTy.getSizeInBits() > SrcTy.getSizeInBits() && 900 "G_ANYEXT incorrect operand size"); 901 902 const TargetRegisterClass *DstRC = getRegClass(DstTy, DstRB); 903 const TargetRegisterClass *SrcRC = getRegClass(SrcTy, SrcRB); 904 905 // If that's ANY_EXT of the value that lives on the floating class and goes 906 // into the vector class, just replace it with copy, as we are able to select 907 // it as a regular move. 908 if (canTurnIntoCOPY(SrcRC, DstRC)) 909 return selectTurnIntoCOPY(I, MRI, SrcReg, SrcRC, DstReg, DstRC); 910 911 if (DstRB.getID() != X86::GPRRegBankID) 912 return false; 913 914 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) || 915 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) { 916 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode()) 917 << " operand\n"); 918 return false; 919 } 920 921 if (SrcRC == DstRC) { 922 I.setDesc(TII.get(X86::COPY)); 923 return true; 924 } 925 926 BuildMI(*I.getParent(), I, I.getDebugLoc(), 927 TII.get(TargetOpcode::SUBREG_TO_REG)) 928 .addDef(DstReg) 929 .addImm(0) 930 .addReg(SrcReg) 931 .addImm(getSubRegIndex(SrcRC)); 932 933 I.eraseFromParent(); 934 return true; 935} 936 937bool X86InstructionSelector::selectCmp(MachineInstr &I, 938 MachineRegisterInfo &MRI, 939 MachineFunction &MF) const { 940 assert((I.getOpcode() == TargetOpcode::G_ICMP) && "unexpected instruction"); 941 942 X86::CondCode CC; 943 bool SwapArgs; 944 std::tie(CC, SwapArgs) = X86::getX86ConditionCode( 945 (CmpInst::Predicate)I.getOperand(1).getPredicate()); 946 947 Register LHS = I.getOperand(2).getReg(); 948 Register RHS = I.getOperand(3).getReg(); 949 950 if (SwapArgs) 951 std::swap(LHS, RHS); 952 953 unsigned OpCmp; 954 LLT Ty = MRI.getType(LHS); 955 956 switch (Ty.getSizeInBits()) { 957 default: 958 return false; 959 case 8: 960 OpCmp = X86::CMP8rr; 961 break; 962 case 16: 963 OpCmp = X86::CMP16rr; 964 break; 965 case 32: 966 OpCmp = X86::CMP32rr; 967 break; 968 case 64: 969 OpCmp = X86::CMP64rr; 970 break; 971 } 972 973 MachineInstr &CmpInst = 974 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(OpCmp)) 975 .addReg(LHS) 976 .addReg(RHS); 977 978 MachineInstr &SetInst = *BuildMI(*I.getParent(), I, I.getDebugLoc(), 979 TII.get(X86::SETCCr), I.getOperand(0).getReg()).addImm(CC); 980 981 constrainSelectedInstRegOperands(CmpInst, TII, TRI, RBI); 982 constrainSelectedInstRegOperands(SetInst, TII, TRI, RBI); 983 984 I.eraseFromParent(); 985 return true; 986} 987 988bool X86InstructionSelector::selectFCmp(MachineInstr &I, 989 MachineRegisterInfo &MRI, 990 MachineFunction &MF) const { 991 assert((I.getOpcode() == TargetOpcode::G_FCMP) && "unexpected instruction"); 992 993 Register LhsReg = I.getOperand(2).getReg(); 994 Register RhsReg = I.getOperand(3).getReg(); 995 CmpInst::Predicate Predicate = 996 (CmpInst::Predicate)I.getOperand(1).getPredicate(); 997 998 // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction. 999 static const uint16_t SETFOpcTable[2][3] = { 1000 {X86::COND_E, X86::COND_NP, X86::AND8rr}, 1001 {X86::COND_NE, X86::COND_P, X86::OR8rr}}; 1002 const uint16_t *SETFOpc = nullptr; 1003 switch (Predicate) { 1004 default: 1005 break; 1006 case CmpInst::FCMP_OEQ: 1007 SETFOpc = &SETFOpcTable[0][0]; 1008 break; 1009 case CmpInst::FCMP_UNE: 1010 SETFOpc = &SETFOpcTable[1][0]; 1011 break; 1012 } 1013 1014 // Compute the opcode for the CMP instruction. 1015 unsigned OpCmp; 1016 LLT Ty = MRI.getType(LhsReg); 1017 switch (Ty.getSizeInBits()) { 1018 default: 1019 return false; 1020 case 32: 1021 OpCmp = X86::UCOMISSrr; 1022 break; 1023 case 64: 1024 OpCmp = X86::UCOMISDrr; 1025 break; 1026 } 1027 1028 Register ResultReg = I.getOperand(0).getReg(); 1029 RBI.constrainGenericRegister( 1030 ResultReg, 1031 *getRegClass(LLT::scalar(8), *RBI.getRegBank(ResultReg, MRI, TRI)), MRI); 1032 if (SETFOpc) { 1033 MachineInstr &CmpInst = 1034 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(OpCmp)) 1035 .addReg(LhsReg) 1036 .addReg(RhsReg); 1037 1038 Register FlagReg1 = MRI.createVirtualRegister(&X86::GR8RegClass); 1039 Register FlagReg2 = MRI.createVirtualRegister(&X86::GR8RegClass); 1040 MachineInstr &Set1 = *BuildMI(*I.getParent(), I, I.getDebugLoc(), 1041 TII.get(X86::SETCCr), FlagReg1).addImm(SETFOpc[0]); 1042 MachineInstr &Set2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(), 1043 TII.get(X86::SETCCr), FlagReg2).addImm(SETFOpc[1]); 1044 MachineInstr &Set3 = *BuildMI(*I.getParent(), I, I.getDebugLoc(), 1045 TII.get(SETFOpc[2]), ResultReg) 1046 .addReg(FlagReg1) 1047 .addReg(FlagReg2); 1048 constrainSelectedInstRegOperands(CmpInst, TII, TRI, RBI); 1049 constrainSelectedInstRegOperands(Set1, TII, TRI, RBI); 1050 constrainSelectedInstRegOperands(Set2, TII, TRI, RBI); 1051 constrainSelectedInstRegOperands(Set3, TII, TRI, RBI); 1052 1053 I.eraseFromParent(); 1054 return true; 1055 } 1056 1057 X86::CondCode CC; 1058 bool SwapArgs; 1059 std::tie(CC, SwapArgs) = X86::getX86ConditionCode(Predicate); 1060 assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code."); 1061 1062 if (SwapArgs) 1063 std::swap(LhsReg, RhsReg); 1064 1065 // Emit a compare of LHS/RHS. 1066 MachineInstr &CmpInst = 1067 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(OpCmp)) 1068 .addReg(LhsReg) 1069 .addReg(RhsReg); 1070 1071 MachineInstr &Set = 1072 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::SETCCr), ResultReg).addImm(CC); 1073 constrainSelectedInstRegOperands(CmpInst, TII, TRI, RBI); 1074 constrainSelectedInstRegOperands(Set, TII, TRI, RBI); 1075 I.eraseFromParent(); 1076 return true; 1077} 1078 1079bool X86InstructionSelector::selectUadde(MachineInstr &I, 1080 MachineRegisterInfo &MRI, 1081 MachineFunction &MF) const { 1082 assert((I.getOpcode() == TargetOpcode::G_UADDE) && "unexpected instruction"); 1083 1084 const Register DstReg = I.getOperand(0).getReg(); 1085 const Register CarryOutReg = I.getOperand(1).getReg(); 1086 const Register Op0Reg = I.getOperand(2).getReg(); 1087 const Register Op1Reg = I.getOperand(3).getReg(); 1088 Register CarryInReg = I.getOperand(4).getReg(); 1089 1090 const LLT DstTy = MRI.getType(DstReg); 1091 1092 if (DstTy != LLT::scalar(32)) 1093 return false; 1094 1095 // find CarryIn def instruction. 1096 MachineInstr *Def = MRI.getVRegDef(CarryInReg); 1097 while (Def->getOpcode() == TargetOpcode::G_TRUNC) { 1098 CarryInReg = Def->getOperand(1).getReg(); 1099 Def = MRI.getVRegDef(CarryInReg); 1100 } 1101 1102 unsigned Opcode; 1103 if (Def->getOpcode() == TargetOpcode::G_UADDE) { 1104 // carry set by prev ADD. 1105 1106 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::COPY), X86::EFLAGS) 1107 .addReg(CarryInReg); 1108 1109 if (!RBI.constrainGenericRegister(CarryInReg, X86::GR32RegClass, MRI)) 1110 return false; 1111 1112 Opcode = X86::ADC32rr; 1113 } else if (auto val = getConstantVRegVal(CarryInReg, MRI)) { 1114 // carry is constant, support only 0. 1115 if (*val != 0) 1116 return false; 1117 1118 Opcode = X86::ADD32rr; 1119 } else 1120 return false; 1121 1122 MachineInstr &AddInst = 1123 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Opcode), DstReg) 1124 .addReg(Op0Reg) 1125 .addReg(Op1Reg); 1126 1127 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::COPY), CarryOutReg) 1128 .addReg(X86::EFLAGS); 1129 1130 if (!constrainSelectedInstRegOperands(AddInst, TII, TRI, RBI) || 1131 !RBI.constrainGenericRegister(CarryOutReg, X86::GR32RegClass, MRI)) 1132 return false; 1133 1134 I.eraseFromParent(); 1135 return true; 1136} 1137 1138bool X86InstructionSelector::selectExtract(MachineInstr &I, 1139 MachineRegisterInfo &MRI, 1140 MachineFunction &MF) const { 1141 assert((I.getOpcode() == TargetOpcode::G_EXTRACT) && 1142 "unexpected instruction"); 1143 1144 const Register DstReg = I.getOperand(0).getReg(); 1145 const Register SrcReg = I.getOperand(1).getReg(); 1146 int64_t Index = I.getOperand(2).getImm(); 1147 1148 const LLT DstTy = MRI.getType(DstReg); 1149 const LLT SrcTy = MRI.getType(SrcReg); 1150 1151 // Meanwile handle vector type only. 1152 if (!DstTy.isVector()) 1153 return false; 1154 1155 if (Index % DstTy.getSizeInBits() != 0) 1156 return false; // Not extract subvector. 1157 1158 if (Index == 0) { 1159 // Replace by extract subreg copy. 1160 if (!emitExtractSubreg(DstReg, SrcReg, I, MRI, MF)) 1161 return false; 1162 1163 I.eraseFromParent(); 1164 return true; 1165 } 1166 1167 bool HasAVX = STI.hasAVX(); 1168 bool HasAVX512 = STI.hasAVX512(); 1169 bool HasVLX = STI.hasVLX(); 1170 1171 if (SrcTy.getSizeInBits() == 256 && DstTy.getSizeInBits() == 128) { 1172 if (HasVLX) 1173 I.setDesc(TII.get(X86::VEXTRACTF32x4Z256rr)); 1174 else if (HasAVX) 1175 I.setDesc(TII.get(X86::VEXTRACTF128rr)); 1176 else 1177 return false; 1178 } else if (SrcTy.getSizeInBits() == 512 && HasAVX512) { 1179 if (DstTy.getSizeInBits() == 128) 1180 I.setDesc(TII.get(X86::VEXTRACTF32x4Zrr)); 1181 else if (DstTy.getSizeInBits() == 256) 1182 I.setDesc(TII.get(X86::VEXTRACTF64x4Zrr)); 1183 else 1184 return false; 1185 } else 1186 return false; 1187 1188 // Convert to X86 VEXTRACT immediate. 1189 Index = Index / DstTy.getSizeInBits(); 1190 I.getOperand(2).setImm(Index); 1191 1192 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 1193} 1194 1195bool X86InstructionSelector::emitExtractSubreg(unsigned DstReg, unsigned SrcReg, 1196 MachineInstr &I, 1197 MachineRegisterInfo &MRI, 1198 MachineFunction &MF) const { 1199 const LLT DstTy = MRI.getType(DstReg); 1200 const LLT SrcTy = MRI.getType(SrcReg); 1201 unsigned SubIdx = X86::NoSubRegister; 1202 1203 if (!DstTy.isVector() || !SrcTy.isVector()) 1204 return false; 1205 1206 assert(SrcTy.getSizeInBits() > DstTy.getSizeInBits() && 1207 "Incorrect Src/Dst register size"); 1208 1209 if (DstTy.getSizeInBits() == 128) 1210 SubIdx = X86::sub_xmm; 1211 else if (DstTy.getSizeInBits() == 256) 1212 SubIdx = X86::sub_ymm; 1213 else 1214 return false; 1215 1216 const TargetRegisterClass *DstRC = getRegClass(DstTy, DstReg, MRI); 1217 const TargetRegisterClass *SrcRC = getRegClass(SrcTy, SrcReg, MRI); 1218 1219 SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubIdx); 1220 1221 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) || 1222 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) { 1223 LLVM_DEBUG(dbgs() << "Failed to constrain EXTRACT_SUBREG\n"); 1224 return false; 1225 } 1226 1227 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::COPY), DstReg) 1228 .addReg(SrcReg, 0, SubIdx); 1229 1230 return true; 1231} 1232 1233bool X86InstructionSelector::emitInsertSubreg(unsigned DstReg, unsigned SrcReg, 1234 MachineInstr &I, 1235 MachineRegisterInfo &MRI, 1236 MachineFunction &MF) const { 1237 const LLT DstTy = MRI.getType(DstReg); 1238 const LLT SrcTy = MRI.getType(SrcReg); 1239 unsigned SubIdx = X86::NoSubRegister; 1240 1241 // TODO: support scalar types 1242 if (!DstTy.isVector() || !SrcTy.isVector()) 1243 return false; 1244 1245 assert(SrcTy.getSizeInBits() < DstTy.getSizeInBits() && 1246 "Incorrect Src/Dst register size"); 1247 1248 if (SrcTy.getSizeInBits() == 128) 1249 SubIdx = X86::sub_xmm; 1250 else if (SrcTy.getSizeInBits() == 256) 1251 SubIdx = X86::sub_ymm; 1252 else 1253 return false; 1254 1255 const TargetRegisterClass *SrcRC = getRegClass(SrcTy, SrcReg, MRI); 1256 const TargetRegisterClass *DstRC = getRegClass(DstTy, DstReg, MRI); 1257 1258 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) || 1259 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) { 1260 LLVM_DEBUG(dbgs() << "Failed to constrain INSERT_SUBREG\n"); 1261 return false; 1262 } 1263 1264 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::COPY)) 1265 .addReg(DstReg, RegState::DefineNoRead, SubIdx) 1266 .addReg(SrcReg); 1267 1268 return true; 1269} 1270 1271bool X86InstructionSelector::selectInsert(MachineInstr &I, 1272 MachineRegisterInfo &MRI, 1273 MachineFunction &MF) const { 1274 assert((I.getOpcode() == TargetOpcode::G_INSERT) && "unexpected instruction"); 1275 1276 const Register DstReg = I.getOperand(0).getReg(); 1277 const Register SrcReg = I.getOperand(1).getReg(); 1278 const Register InsertReg = I.getOperand(2).getReg(); 1279 int64_t Index = I.getOperand(3).getImm(); 1280 1281 const LLT DstTy = MRI.getType(DstReg); 1282 const LLT InsertRegTy = MRI.getType(InsertReg); 1283 1284 // Meanwile handle vector type only. 1285 if (!DstTy.isVector()) 1286 return false; 1287 1288 if (Index % InsertRegTy.getSizeInBits() != 0) 1289 return false; // Not insert subvector. 1290 1291 if (Index == 0 && MRI.getVRegDef(SrcReg)->isImplicitDef()) { 1292 // Replace by subreg copy. 1293 if (!emitInsertSubreg(DstReg, InsertReg, I, MRI, MF)) 1294 return false; 1295 1296 I.eraseFromParent(); 1297 return true; 1298 } 1299 1300 bool HasAVX = STI.hasAVX(); 1301 bool HasAVX512 = STI.hasAVX512(); 1302 bool HasVLX = STI.hasVLX(); 1303 1304 if (DstTy.getSizeInBits() == 256 && InsertRegTy.getSizeInBits() == 128) { 1305 if (HasVLX) 1306 I.setDesc(TII.get(X86::VINSERTF32x4Z256rr)); 1307 else if (HasAVX) 1308 I.setDesc(TII.get(X86::VINSERTF128rr)); 1309 else 1310 return false; 1311 } else if (DstTy.getSizeInBits() == 512 && HasAVX512) { 1312 if (InsertRegTy.getSizeInBits() == 128) 1313 I.setDesc(TII.get(X86::VINSERTF32x4Zrr)); 1314 else if (InsertRegTy.getSizeInBits() == 256) 1315 I.setDesc(TII.get(X86::VINSERTF64x4Zrr)); 1316 else 1317 return false; 1318 } else 1319 return false; 1320 1321 // Convert to X86 VINSERT immediate. 1322 Index = Index / InsertRegTy.getSizeInBits(); 1323 1324 I.getOperand(3).setImm(Index); 1325 1326 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 1327} 1328 1329bool X86InstructionSelector::selectUnmergeValues( 1330 MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) { 1331 assert((I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES) && 1332 "unexpected instruction"); 1333 1334 // Split to extracts. 1335 unsigned NumDefs = I.getNumOperands() - 1; 1336 Register SrcReg = I.getOperand(NumDefs).getReg(); 1337 unsigned DefSize = MRI.getType(I.getOperand(0).getReg()).getSizeInBits(); 1338 1339 for (unsigned Idx = 0; Idx < NumDefs; ++Idx) { 1340 MachineInstr &ExtrInst = 1341 *BuildMI(*I.getParent(), I, I.getDebugLoc(), 1342 TII.get(TargetOpcode::G_EXTRACT), I.getOperand(Idx).getReg()) 1343 .addReg(SrcReg) 1344 .addImm(Idx * DefSize); 1345 1346 if (!select(ExtrInst)) 1347 return false; 1348 } 1349 1350 I.eraseFromParent(); 1351 return true; 1352} 1353 1354bool X86InstructionSelector::selectMergeValues( 1355 MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) { 1356 assert((I.getOpcode() == TargetOpcode::G_MERGE_VALUES || 1357 I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS) && 1358 "unexpected instruction"); 1359 1360 // Split to inserts. 1361 Register DstReg = I.getOperand(0).getReg(); 1362 Register SrcReg0 = I.getOperand(1).getReg(); 1363 1364 const LLT DstTy = MRI.getType(DstReg); 1365 const LLT SrcTy = MRI.getType(SrcReg0); 1366 unsigned SrcSize = SrcTy.getSizeInBits(); 1367 1368 const RegisterBank &RegBank = *RBI.getRegBank(DstReg, MRI, TRI); 1369 1370 // For the first src use insertSubReg. 1371 Register DefReg = MRI.createGenericVirtualRegister(DstTy); 1372 MRI.setRegBank(DefReg, RegBank); 1373 if (!emitInsertSubreg(DefReg, I.getOperand(1).getReg(), I, MRI, MF)) 1374 return false; 1375 1376 for (unsigned Idx = 2; Idx < I.getNumOperands(); ++Idx) { 1377 Register Tmp = MRI.createGenericVirtualRegister(DstTy); 1378 MRI.setRegBank(Tmp, RegBank); 1379 1380 MachineInstr &InsertInst = *BuildMI(*I.getParent(), I, I.getDebugLoc(), 1381 TII.get(TargetOpcode::G_INSERT), Tmp) 1382 .addReg(DefReg) 1383 .addReg(I.getOperand(Idx).getReg()) 1384 .addImm((Idx - 1) * SrcSize); 1385 1386 DefReg = Tmp; 1387 1388 if (!select(InsertInst)) 1389 return false; 1390 } 1391 1392 MachineInstr &CopyInst = *BuildMI(*I.getParent(), I, I.getDebugLoc(), 1393 TII.get(TargetOpcode::COPY), DstReg) 1394 .addReg(DefReg); 1395 1396 if (!select(CopyInst)) 1397 return false; 1398 1399 I.eraseFromParent(); 1400 return true; 1401} 1402 1403bool X86InstructionSelector::selectCondBranch(MachineInstr &I, 1404 MachineRegisterInfo &MRI, 1405 MachineFunction &MF) const { 1406 assert((I.getOpcode() == TargetOpcode::G_BRCOND) && "unexpected instruction"); 1407 1408 const Register CondReg = I.getOperand(0).getReg(); 1409 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB(); 1410 1411 MachineInstr &TestInst = 1412 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::TEST8ri)) 1413 .addReg(CondReg) 1414 .addImm(1); 1415 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::JCC_1)) 1416 .addMBB(DestMBB).addImm(X86::COND_NE); 1417 1418 constrainSelectedInstRegOperands(TestInst, TII, TRI, RBI); 1419 1420 I.eraseFromParent(); 1421 return true; 1422} 1423 1424bool X86InstructionSelector::materializeFP(MachineInstr &I, 1425 MachineRegisterInfo &MRI, 1426 MachineFunction &MF) const { 1427 assert((I.getOpcode() == TargetOpcode::G_FCONSTANT) && 1428 "unexpected instruction"); 1429 1430 // Can't handle alternate code models yet. 1431 CodeModel::Model CM = TM.getCodeModel(); 1432 if (CM != CodeModel::Small && CM != CodeModel::Large) 1433 return false; 1434 1435 const Register DstReg = I.getOperand(0).getReg(); 1436 const LLT DstTy = MRI.getType(DstReg); 1437 const RegisterBank &RegBank = *RBI.getRegBank(DstReg, MRI, TRI); 1438 unsigned Align = DstTy.getSizeInBits(); 1439 const DebugLoc &DbgLoc = I.getDebugLoc(); 1440 1441 unsigned Opc = getLoadStoreOp(DstTy, RegBank, TargetOpcode::G_LOAD, Align); 1442 1443 // Create the load from the constant pool. 1444 const ConstantFP *CFP = I.getOperand(1).getFPImm(); 1445 unsigned CPI = MF.getConstantPool()->getConstantPoolIndex(CFP, Align); 1446 MachineInstr *LoadInst = nullptr; 1447 unsigned char OpFlag = STI.classifyLocalReference(nullptr); 1448 1449 if (CM == CodeModel::Large && STI.is64Bit()) { 1450 // Under X86-64 non-small code model, GV (and friends) are 64-bits, so 1451 // they cannot be folded into immediate fields. 1452 1453 Register AddrReg = MRI.createVirtualRegister(&X86::GR64RegClass); 1454 BuildMI(*I.getParent(), I, DbgLoc, TII.get(X86::MOV64ri), AddrReg) 1455 .addConstantPoolIndex(CPI, 0, OpFlag); 1456 1457 MachineMemOperand *MMO = MF.getMachineMemOperand( 1458 MachinePointerInfo::getConstantPool(MF), MachineMemOperand::MOLoad, 1459 MF.getDataLayout().getPointerSize(), Align); 1460 1461 LoadInst = 1462 addDirectMem(BuildMI(*I.getParent(), I, DbgLoc, TII.get(Opc), DstReg), 1463 AddrReg) 1464 .addMemOperand(MMO); 1465 1466 } else if (CM == CodeModel::Small || !STI.is64Bit()) { 1467 // Handle the case when globals fit in our immediate field. 1468 // This is true for X86-32 always and X86-64 when in -mcmodel=small mode. 1469 1470 // x86-32 PIC requires a PIC base register for constant pools. 1471 unsigned PICBase = 0; 1472 if (OpFlag == X86II::MO_PIC_BASE_OFFSET || OpFlag == X86II::MO_GOTOFF) { 1473 // PICBase can be allocated by TII.getGlobalBaseReg(&MF). 1474 // In DAGISEL the code that initialize it generated by the CGBR pass. 1475 return false; // TODO support the mode. 1476 } else if (STI.is64Bit() && TM.getCodeModel() == CodeModel::Small) 1477 PICBase = X86::RIP; 1478 1479 LoadInst = addConstantPoolReference( 1480 BuildMI(*I.getParent(), I, DbgLoc, TII.get(Opc), DstReg), CPI, PICBase, 1481 OpFlag); 1482 } else 1483 return false; 1484 1485 constrainSelectedInstRegOperands(*LoadInst, TII, TRI, RBI); 1486 I.eraseFromParent(); 1487 return true; 1488} 1489 1490bool X86InstructionSelector::selectImplicitDefOrPHI( 1491 MachineInstr &I, MachineRegisterInfo &MRI) const { 1492 assert((I.getOpcode() == TargetOpcode::G_IMPLICIT_DEF || 1493 I.getOpcode() == TargetOpcode::G_PHI) && 1494 "unexpected instruction"); 1495 1496 Register DstReg = I.getOperand(0).getReg(); 1497 1498 if (!MRI.getRegClassOrNull(DstReg)) { 1499 const LLT DstTy = MRI.getType(DstReg); 1500 const TargetRegisterClass *RC = getRegClass(DstTy, DstReg, MRI); 1501 1502 if (!RBI.constrainGenericRegister(DstReg, *RC, MRI)) { 1503 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode()) 1504 << " operand\n"); 1505 return false; 1506 } 1507 } 1508 1509 if (I.getOpcode() == TargetOpcode::G_IMPLICIT_DEF) 1510 I.setDesc(TII.get(X86::IMPLICIT_DEF)); 1511 else 1512 I.setDesc(TII.get(X86::PHI)); 1513 1514 return true; 1515} 1516 1517bool X86InstructionSelector::selectDivRem(MachineInstr &I, 1518 MachineRegisterInfo &MRI, 1519 MachineFunction &MF) const { 1520 // The implementation of this function is taken from X86FastISel. 1521 assert((I.getOpcode() == TargetOpcode::G_SDIV || 1522 I.getOpcode() == TargetOpcode::G_SREM || 1523 I.getOpcode() == TargetOpcode::G_UDIV || 1524 I.getOpcode() == TargetOpcode::G_UREM) && 1525 "unexpected instruction"); 1526 1527 const Register DstReg = I.getOperand(0).getReg(); 1528 const Register Op1Reg = I.getOperand(1).getReg(); 1529 const Register Op2Reg = I.getOperand(2).getReg(); 1530 1531 const LLT RegTy = MRI.getType(DstReg); 1532 assert(RegTy == MRI.getType(Op1Reg) && RegTy == MRI.getType(Op2Reg) && 1533 "Arguments and return value types must match"); 1534 1535 const RegisterBank *RegRB = RBI.getRegBank(DstReg, MRI, TRI); 1536 if (!RegRB || RegRB->getID() != X86::GPRRegBankID) 1537 return false; 1538 1539 const static unsigned NumTypes = 4; // i8, i16, i32, i64 1540 const static unsigned NumOps = 4; // SDiv, SRem, UDiv, URem 1541 const static bool S = true; // IsSigned 1542 const static bool U = false; // !IsSigned 1543 const static unsigned Copy = TargetOpcode::COPY; 1544 // For the X86 IDIV instruction, in most cases the dividend 1545 // (numerator) must be in a specific register pair highreg:lowreg, 1546 // producing the quotient in lowreg and the remainder in highreg. 1547 // For most data types, to set up the instruction, the dividend is 1548 // copied into lowreg, and lowreg is sign-extended into highreg. The 1549 // exception is i8, where the dividend is defined as a single register rather 1550 // than a register pair, and we therefore directly sign-extend the dividend 1551 // into lowreg, instead of copying, and ignore the highreg. 1552 const static struct DivRemEntry { 1553 // The following portion depends only on the data type. 1554 unsigned SizeInBits; 1555 unsigned LowInReg; // low part of the register pair 1556 unsigned HighInReg; // high part of the register pair 1557 // The following portion depends on both the data type and the operation. 1558 struct DivRemResult { 1559 unsigned OpDivRem; // The specific DIV/IDIV opcode to use. 1560 unsigned OpSignExtend; // Opcode for sign-extending lowreg into 1561 // highreg, or copying a zero into highreg. 1562 unsigned OpCopy; // Opcode for copying dividend into lowreg, or 1563 // zero/sign-extending into lowreg for i8. 1564 unsigned DivRemResultReg; // Register containing the desired result. 1565 bool IsOpSigned; // Whether to use signed or unsigned form. 1566 } ResultTable[NumOps]; 1567 } OpTable[NumTypes] = { 1568 {8, 1569 X86::AX, 1570 0, 1571 { 1572 {X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AL, S}, // SDiv 1573 {X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AH, S}, // SRem 1574 {X86::DIV8r, 0, X86::MOVZX16rr8, X86::AL, U}, // UDiv 1575 {X86::DIV8r, 0, X86::MOVZX16rr8, X86::AH, U}, // URem 1576 }}, // i8 1577 {16, 1578 X86::AX, 1579 X86::DX, 1580 { 1581 {X86::IDIV16r, X86::CWD, Copy, X86::AX, S}, // SDiv 1582 {X86::IDIV16r, X86::CWD, Copy, X86::DX, S}, // SRem 1583 {X86::DIV16r, X86::MOV32r0, Copy, X86::AX, U}, // UDiv 1584 {X86::DIV16r, X86::MOV32r0, Copy, X86::DX, U}, // URem 1585 }}, // i16 1586 {32, 1587 X86::EAX, 1588 X86::EDX, 1589 { 1590 {X86::IDIV32r, X86::CDQ, Copy, X86::EAX, S}, // SDiv 1591 {X86::IDIV32r, X86::CDQ, Copy, X86::EDX, S}, // SRem 1592 {X86::DIV32r, X86::MOV32r0, Copy, X86::EAX, U}, // UDiv 1593 {X86::DIV32r, X86::MOV32r0, Copy, X86::EDX, U}, // URem 1594 }}, // i32 1595 {64, 1596 X86::RAX, 1597 X86::RDX, 1598 { 1599 {X86::IDIV64r, X86::CQO, Copy, X86::RAX, S}, // SDiv 1600 {X86::IDIV64r, X86::CQO, Copy, X86::RDX, S}, // SRem 1601 {X86::DIV64r, X86::MOV32r0, Copy, X86::RAX, U}, // UDiv 1602 {X86::DIV64r, X86::MOV32r0, Copy, X86::RDX, U}, // URem 1603 }}, // i64 1604 }; 1605 1606 auto OpEntryIt = std::find_if(std::begin(OpTable), std::end(OpTable), 1607 [RegTy](const DivRemEntry &El) { 1608 return El.SizeInBits == RegTy.getSizeInBits(); 1609 }); 1610 if (OpEntryIt == std::end(OpTable)) 1611 return false; 1612 1613 unsigned OpIndex; 1614 switch (I.getOpcode()) { 1615 default: 1616 llvm_unreachable("Unexpected div/rem opcode"); 1617 case TargetOpcode::G_SDIV: 1618 OpIndex = 0; 1619 break; 1620 case TargetOpcode::G_SREM: 1621 OpIndex = 1; 1622 break; 1623 case TargetOpcode::G_UDIV: 1624 OpIndex = 2; 1625 break; 1626 case TargetOpcode::G_UREM: 1627 OpIndex = 3; 1628 break; 1629 } 1630 1631 const DivRemEntry &TypeEntry = *OpEntryIt; 1632 const DivRemEntry::DivRemResult &OpEntry = TypeEntry.ResultTable[OpIndex]; 1633 1634 const TargetRegisterClass *RegRC = getRegClass(RegTy, *RegRB); 1635 if (!RBI.constrainGenericRegister(Op1Reg, *RegRC, MRI) || 1636 !RBI.constrainGenericRegister(Op2Reg, *RegRC, MRI) || 1637 !RBI.constrainGenericRegister(DstReg, *RegRC, MRI)) { 1638 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode()) 1639 << " operand\n"); 1640 return false; 1641 } 1642 1643 // Move op1 into low-order input register. 1644 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(OpEntry.OpCopy), 1645 TypeEntry.LowInReg) 1646 .addReg(Op1Reg); 1647 // Zero-extend or sign-extend into high-order input register. 1648 if (OpEntry.OpSignExtend) { 1649 if (OpEntry.IsOpSigned) 1650 BuildMI(*I.getParent(), I, I.getDebugLoc(), 1651 TII.get(OpEntry.OpSignExtend)); 1652 else { 1653 Register Zero32 = MRI.createVirtualRegister(&X86::GR32RegClass); 1654 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::MOV32r0), 1655 Zero32); 1656 1657 // Copy the zero into the appropriate sub/super/identical physical 1658 // register. Unfortunately the operations needed are not uniform enough 1659 // to fit neatly into the table above. 1660 if (RegTy.getSizeInBits() == 16) { 1661 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Copy), 1662 TypeEntry.HighInReg) 1663 .addReg(Zero32, 0, X86::sub_16bit); 1664 } else if (RegTy.getSizeInBits() == 32) { 1665 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Copy), 1666 TypeEntry.HighInReg) 1667 .addReg(Zero32); 1668 } else if (RegTy.getSizeInBits() == 64) { 1669 BuildMI(*I.getParent(), I, I.getDebugLoc(), 1670 TII.get(TargetOpcode::SUBREG_TO_REG), TypeEntry.HighInReg) 1671 .addImm(0) 1672 .addReg(Zero32) 1673 .addImm(X86::sub_32bit); 1674 } 1675 } 1676 } 1677 // Generate the DIV/IDIV instruction. 1678 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(OpEntry.OpDivRem)) 1679 .addReg(Op2Reg); 1680 // For i8 remainder, we can't reference ah directly, as we'll end 1681 // up with bogus copies like %r9b = COPY %ah. Reference ax 1682 // instead to prevent ah references in a rex instruction. 1683 // 1684 // The current assumption of the fast register allocator is that isel 1685 // won't generate explicit references to the GR8_NOREX registers. If 1686 // the allocator and/or the backend get enhanced to be more robust in 1687 // that regard, this can be, and should be, removed. 1688 if ((I.getOpcode() == Instruction::SRem || 1689 I.getOpcode() == Instruction::URem) && 1690 OpEntry.DivRemResultReg == X86::AH && STI.is64Bit()) { 1691 Register SourceSuperReg = MRI.createVirtualRegister(&X86::GR16RegClass); 1692 Register ResultSuperReg = MRI.createVirtualRegister(&X86::GR16RegClass); 1693 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Copy), SourceSuperReg) 1694 .addReg(X86::AX); 1695 1696 // Shift AX right by 8 bits instead of using AH. 1697 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::SHR16ri), 1698 ResultSuperReg) 1699 .addReg(SourceSuperReg) 1700 .addImm(8); 1701 1702 // Now reference the 8-bit subreg of the result. 1703 BuildMI(*I.getParent(), I, I.getDebugLoc(), 1704 TII.get(TargetOpcode::SUBREG_TO_REG)) 1705 .addDef(DstReg) 1706 .addImm(0) 1707 .addReg(ResultSuperReg) 1708 .addImm(X86::sub_8bit); 1709 } else { 1710 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(TargetOpcode::COPY), 1711 DstReg) 1712 .addReg(OpEntry.DivRemResultReg); 1713 } 1714 I.eraseFromParent(); 1715 return true; 1716} 1717 1718bool X86InstructionSelector::selectIntrinsicWSideEffects( 1719 MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) const { 1720 1721 assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS && 1722 "unexpected instruction"); 1723 1724 if (I.getOperand(0).getIntrinsicID() != Intrinsic::trap) 1725 return false; 1726 1727 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::TRAP)); 1728 1729 I.eraseFromParent(); 1730 return true; 1731} 1732 1733InstructionSelector * 1734llvm::createX86InstructionSelector(const X86TargetMachine &TM, 1735 X86Subtarget &Subtarget, 1736 X86RegisterBankInfo &RBI) { 1737 return new X86InstructionSelector(TM, Subtarget, RBI); 1738} 1739