1249259Sdim//===-- AMDGPUInstructions.td - Common instruction defs ---*- tablegen -*-===// 2249259Sdim// 3249259Sdim// The LLVM Compiler Infrastructure 4249259Sdim// 5249259Sdim// This file is distributed under the University of Illinois Open Source 6249259Sdim// License. See LICENSE.TXT for details. 7249259Sdim// 8249259Sdim//===----------------------------------------------------------------------===// 9249259Sdim// 10249259Sdim// This file contains instruction defs that are common to all hw codegen 11249259Sdim// targets. 12249259Sdim// 13249259Sdim//===----------------------------------------------------------------------===// 14249259Sdim 15249259Sdimclass AMDGPUInst <dag outs, dag ins, string asm, list<dag> pattern> : Instruction { 16249259Sdim field bit isRegisterLoad = 0; 17249259Sdim field bit isRegisterStore = 0; 18249259Sdim 19249259Sdim let Namespace = "AMDGPU"; 20249259Sdim let OutOperandList = outs; 21249259Sdim let InOperandList = ins; 22249259Sdim let AsmString = asm; 23249259Sdim let Pattern = pattern; 24249259Sdim let Itinerary = NullALU; 25249259Sdim 26249259Sdim let TSFlags{63} = isRegisterLoad; 27249259Sdim let TSFlags{62} = isRegisterStore; 28249259Sdim} 29249259Sdim 30249259Sdimclass AMDGPUShaderInst <dag outs, dag ins, string asm, list<dag> pattern> 31249259Sdim : AMDGPUInst<outs, ins, asm, pattern> { 32249259Sdim 33249259Sdim field bits<32> Inst = 0xffffffff; 34249259Sdim 35249259Sdim} 36249259Sdim 37249259Sdimdef InstFlag : OperandWithDefaultOps <i32, (ops (i32 0))>; 38263508Sdimdef ADDRIndirect : ComplexPattern<iPTR, 2, "SelectADDRIndirect", [], []>; 39249259Sdim 40263508Sdim//===----------------------------------------------------------------------===// 41263508Sdim// PatLeafs for floating-point comparisons 42263508Sdim//===----------------------------------------------------------------------===// 43263508Sdim 44263508Sdimdef COND_OEQ : PatLeaf < 45249259Sdim (cond), 46263508Sdim [{return N->get() == ISD::SETOEQ || N->get() == ISD::SETEQ;}] 47249259Sdim>; 48249259Sdim 49263508Sdimdef COND_OGT : PatLeaf < 50249259Sdim (cond), 51263508Sdim [{return N->get() == ISD::SETOGT || N->get() == ISD::SETGT;}] 52249259Sdim>; 53263508Sdim 54263508Sdimdef COND_OGE : PatLeaf < 55249259Sdim (cond), 56263508Sdim [{return N->get() == ISD::SETOGE || N->get() == ISD::SETGE;}] 57249259Sdim>; 58249259Sdim 59263508Sdimdef COND_OLT : PatLeaf < 60249259Sdim (cond), 61263508Sdim [{return N->get() == ISD::SETOLT || N->get() == ISD::SETLT;}] 62249259Sdim>; 63249259Sdim 64263508Sdimdef COND_OLE : PatLeaf < 65249259Sdim (cond), 66263508Sdim [{return N->get() == ISD::SETOLE || N->get() == ISD::SETLE;}] 67249259Sdim>; 68249259Sdim 69263508Sdimdef COND_UNE : PatLeaf < 70249259Sdim (cond), 71263508Sdim [{return N->get() == ISD::SETUNE || N->get() == ISD::SETNE;}] 72249259Sdim>; 73249259Sdim 74263508Sdimdef COND_O : PatLeaf <(cond), [{return N->get() == ISD::SETO;}]>; 75263508Sdimdef COND_UO : PatLeaf <(cond), [{return N->get() == ISD::SETUO;}]>; 76263508Sdim 77263508Sdim//===----------------------------------------------------------------------===// 78263508Sdim// PatLeafs for unsigned comparisons 79263508Sdim//===----------------------------------------------------------------------===// 80263508Sdim 81263508Sdimdef COND_UGT : PatLeaf <(cond), [{return N->get() == ISD::SETUGT;}]>; 82263508Sdimdef COND_UGE : PatLeaf <(cond), [{return N->get() == ISD::SETUGE;}]>; 83263508Sdimdef COND_ULT : PatLeaf <(cond), [{return N->get() == ISD::SETULT;}]>; 84263508Sdimdef COND_ULE : PatLeaf <(cond), [{return N->get() == ISD::SETULE;}]>; 85263508Sdim 86263508Sdim//===----------------------------------------------------------------------===// 87263508Sdim// PatLeafs for signed comparisons 88263508Sdim//===----------------------------------------------------------------------===// 89263508Sdim 90263508Sdimdef COND_SGT : PatLeaf <(cond), [{return N->get() == ISD::SETGT;}]>; 91263508Sdimdef COND_SGE : PatLeaf <(cond), [{return N->get() == ISD::SETGE;}]>; 92263508Sdimdef COND_SLT : PatLeaf <(cond), [{return N->get() == ISD::SETLT;}]>; 93263508Sdimdef COND_SLE : PatLeaf <(cond), [{return N->get() == ISD::SETLE;}]>; 94263508Sdim 95263508Sdim//===----------------------------------------------------------------------===// 96263508Sdim// PatLeafs for integer equality 97263508Sdim//===----------------------------------------------------------------------===// 98263508Sdim 99263508Sdimdef COND_EQ : PatLeaf < 100263508Sdim (cond), 101263508Sdim [{return N->get() == ISD::SETEQ || N->get() == ISD::SETUEQ;}] 102263508Sdim>; 103263508Sdim 104263508Sdimdef COND_NE : PatLeaf < 105263508Sdim (cond), 106263508Sdim [{return N->get() == ISD::SETNE || N->get() == ISD::SETUNE;}] 107263508Sdim>; 108263508Sdim 109249259Sdimdef COND_NULL : PatLeaf < 110249259Sdim (cond), 111249259Sdim [{return false;}] 112249259Sdim>; 113249259Sdim 114249259Sdim//===----------------------------------------------------------------------===// 115249259Sdim// Load/Store Pattern Fragments 116249259Sdim//===----------------------------------------------------------------------===// 117249259Sdim 118263508Sdimdef az_extload : PatFrag<(ops node:$ptr), (unindexedload node:$ptr), [{ 119263508Sdim LoadSDNode *L = cast<LoadSDNode>(N); 120263508Sdim return L->getExtensionType() == ISD::ZEXTLOAD || 121263508Sdim L->getExtensionType() == ISD::EXTLOAD; 122263508Sdim}]>; 123263508Sdim 124263508Sdimdef az_extloadi8 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{ 125263508Sdim return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i8; 126263508Sdim}]>; 127263508Sdim 128263508Sdimdef az_extloadi8_global : PatFrag<(ops node:$ptr), (az_extloadi8 node:$ptr), [{ 129249259Sdim return isGlobalLoad(dyn_cast<LoadSDNode>(N)); 130249259Sdim}]>; 131249259Sdim 132263508Sdimdef sextloadi8_global : PatFrag<(ops node:$ptr), (sextloadi8 node:$ptr), [{ 133263508Sdim return isGlobalLoad(dyn_cast<LoadSDNode>(N)); 134263508Sdim}]>; 135263508Sdim 136263508Sdimdef az_extloadi8_constant : PatFrag<(ops node:$ptr), (az_extloadi8 node:$ptr), [{ 137263508Sdim return isConstantLoad(dyn_cast<LoadSDNode>(N), -1); 138263508Sdim}]>; 139263508Sdim 140263508Sdimdef sextloadi8_constant : PatFrag<(ops node:$ptr), (sextloadi8 node:$ptr), [{ 141263508Sdim return isConstantLoad(dyn_cast<LoadSDNode>(N), -1); 142263508Sdim}]>; 143263508Sdim 144263508Sdimdef az_extloadi8_local : PatFrag<(ops node:$ptr), (az_extloadi8 node:$ptr), [{ 145263508Sdim return isLocalLoad(dyn_cast<LoadSDNode>(N)); 146263508Sdim}]>; 147263508Sdim 148263508Sdimdef sextloadi8_local : PatFrag<(ops node:$ptr), (sextloadi8 node:$ptr), [{ 149263508Sdim return isLocalLoad(dyn_cast<LoadSDNode>(N)); 150263508Sdim}]>; 151263508Sdim 152263508Sdimdef az_extloadi16 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{ 153263508Sdim return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i16; 154263508Sdim}]>; 155263508Sdim 156263508Sdimdef az_extloadi16_global : PatFrag<(ops node:$ptr), (az_extloadi16 node:$ptr), [{ 157263508Sdim return isGlobalLoad(dyn_cast<LoadSDNode>(N)); 158263508Sdim}]>; 159263508Sdim 160263508Sdimdef sextloadi16_global : PatFrag<(ops node:$ptr), (sextloadi16 node:$ptr), [{ 161263508Sdim return isGlobalLoad(dyn_cast<LoadSDNode>(N)); 162263508Sdim}]>; 163263508Sdim 164263508Sdimdef az_extloadi16_constant : PatFrag<(ops node:$ptr), (az_extloadi16 node:$ptr), [{ 165263508Sdim return isConstantLoad(dyn_cast<LoadSDNode>(N), -1); 166263508Sdim}]>; 167263508Sdim 168263508Sdimdef sextloadi16_constant : PatFrag<(ops node:$ptr), (sextloadi16 node:$ptr), [{ 169263508Sdim return isConstantLoad(dyn_cast<LoadSDNode>(N), -1); 170263508Sdim}]>; 171263508Sdim 172263508Sdimdef az_extloadi16_local : PatFrag<(ops node:$ptr), (az_extloadi16 node:$ptr), [{ 173263508Sdim return isLocalLoad(dyn_cast<LoadSDNode>(N)); 174263508Sdim}]>; 175263508Sdim 176263508Sdimdef sextloadi16_local : PatFrag<(ops node:$ptr), (sextloadi16 node:$ptr), [{ 177263508Sdim return isLocalLoad(dyn_cast<LoadSDNode>(N)); 178263508Sdim}]>; 179263508Sdim 180263508Sdimdef az_extloadi32 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{ 181263508Sdim return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i32; 182263508Sdim}]>; 183263508Sdim 184263508Sdimdef az_extloadi32_global : PatFrag<(ops node:$ptr), 185263508Sdim (az_extloadi32 node:$ptr), [{ 186263508Sdim return isGlobalLoad(dyn_cast<LoadSDNode>(N)); 187263508Sdim}]>; 188263508Sdim 189263508Sdimdef az_extloadi32_constant : PatFrag<(ops node:$ptr), 190263508Sdim (az_extloadi32 node:$ptr), [{ 191263508Sdim return isConstantLoad(dyn_cast<LoadSDNode>(N), -1); 192263508Sdim}]>; 193263508Sdim 194263508Sdimdef truncstorei8_global : PatFrag<(ops node:$val, node:$ptr), 195263508Sdim (truncstorei8 node:$val, node:$ptr), [{ 196263508Sdim return isGlobalStore(dyn_cast<StoreSDNode>(N)); 197263508Sdim}]>; 198263508Sdim 199263508Sdimdef truncstorei16_global : PatFrag<(ops node:$val, node:$ptr), 200263508Sdim (truncstorei16 node:$val, node:$ptr), [{ 201263508Sdim return isGlobalStore(dyn_cast<StoreSDNode>(N)); 202263508Sdim}]>; 203263508Sdim 204263508Sdimdef local_store : PatFrag<(ops node:$val, node:$ptr), 205263508Sdim (store node:$val, node:$ptr), [{ 206263508Sdim return isLocalStore(dyn_cast<StoreSDNode>(N)); 207263508Sdim}]>; 208263508Sdim 209263508Sdimdef truncstorei8_local : PatFrag<(ops node:$val, node:$ptr), 210263508Sdim (truncstorei8 node:$val, node:$ptr), [{ 211263508Sdim return isLocalStore(dyn_cast<StoreSDNode>(N)); 212263508Sdim}]>; 213263508Sdim 214263508Sdimdef truncstorei16_local : PatFrag<(ops node:$val, node:$ptr), 215263508Sdim (truncstorei16 node:$val, node:$ptr), [{ 216263508Sdim return isLocalStore(dyn_cast<StoreSDNode>(N)); 217263508Sdim}]>; 218263508Sdim 219263508Sdimdef local_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 220263508Sdim return isLocalLoad(dyn_cast<LoadSDNode>(N)); 221263508Sdim}]>; 222263508Sdim 223263508Sdimdef atomic_load_add_local : PatFrag<(ops node:$ptr, node:$value), 224263508Sdim (atomic_load_add node:$ptr, node:$value), [{ 225263508Sdim return dyn_cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; 226263508Sdim}]>; 227263508Sdim 228263508Sdimdef atomic_load_sub_local : PatFrag<(ops node:$ptr, node:$value), 229263508Sdim (atomic_load_sub node:$ptr, node:$value), [{ 230263508Sdim return dyn_cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; 231263508Sdim}]>; 232263508Sdim 233263508Sdimdef mskor_global : PatFrag<(ops node:$val, node:$ptr), 234263508Sdim (AMDGPUstore_mskor node:$val, node:$ptr), [{ 235263508Sdim return dyn_cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; 236263508Sdim}]>; 237263508Sdim 238249259Sdimclass Constants { 239249259Sdimint TWO_PI = 0x40c90fdb; 240249259Sdimint PI = 0x40490fdb; 241249259Sdimint TWO_PI_INV = 0x3e22f983; 242263508Sdimint FP_UINT_MAX_PLUS_1 = 0x4f800000; // 1 << 32 in floating point encoding 243249259Sdim} 244249259Sdimdef CONST : Constants; 245249259Sdim 246249259Sdimdef FP_ZERO : PatLeaf < 247249259Sdim (fpimm), 248249259Sdim [{return N->getValueAPF().isZero();}] 249249259Sdim>; 250249259Sdim 251249259Sdimdef FP_ONE : PatLeaf < 252249259Sdim (fpimm), 253249259Sdim [{return N->isExactlyValue(1.0);}] 254249259Sdim>; 255249259Sdim 256263508Sdimdef U24 : ComplexPattern<i32, 1, "SelectU24", [], []>; 257263508Sdimdef I24 : ComplexPattern<i32, 1, "SelectI24", [], []>; 258263508Sdim 259249259Sdimlet isCodeGenOnly = 1, isPseudo = 1 in { 260249259Sdim 261249259Sdimlet usesCustomInserter = 1 in { 262249259Sdim 263249259Sdimclass CLAMP <RegisterClass rc> : AMDGPUShaderInst < 264249259Sdim (outs rc:$dst), 265249259Sdim (ins rc:$src0), 266249259Sdim "CLAMP $dst, $src0", 267251662Sdim [(set f32:$dst, (int_AMDIL_clamp f32:$src0, (f32 FP_ZERO), (f32 FP_ONE)))] 268249259Sdim>; 269249259Sdim 270249259Sdimclass FABS <RegisterClass rc> : AMDGPUShaderInst < 271249259Sdim (outs rc:$dst), 272249259Sdim (ins rc:$src0), 273249259Sdim "FABS $dst, $src0", 274251662Sdim [(set f32:$dst, (fabs f32:$src0))] 275249259Sdim>; 276249259Sdim 277249259Sdimclass FNEG <RegisterClass rc> : AMDGPUShaderInst < 278249259Sdim (outs rc:$dst), 279249259Sdim (ins rc:$src0), 280249259Sdim "FNEG $dst, $src0", 281251662Sdim [(set f32:$dst, (fneg f32:$src0))] 282249259Sdim>; 283249259Sdim 284249259Sdim} // usesCustomInserter = 1 285249259Sdim 286249259Sdimmulticlass RegisterLoadStore <RegisterClass dstClass, Operand addrClass, 287249259Sdim ComplexPattern addrPat> { 288263508Sdimlet UseNamedOperandTable = 1 in { 289263508Sdim 290249259Sdim def RegisterLoad : AMDGPUShaderInst < 291249259Sdim (outs dstClass:$dst), 292249259Sdim (ins addrClass:$addr, i32imm:$chan), 293249259Sdim "RegisterLoad $dst, $addr", 294251662Sdim [(set i32:$dst, (AMDGPUregister_load addrPat:$addr, (i32 timm:$chan)))] 295249259Sdim > { 296249259Sdim let isRegisterLoad = 1; 297249259Sdim } 298249259Sdim 299249259Sdim def RegisterStore : AMDGPUShaderInst < 300249259Sdim (outs), 301249259Sdim (ins dstClass:$val, addrClass:$addr, i32imm:$chan), 302249259Sdim "RegisterStore $val, $addr", 303251662Sdim [(AMDGPUregister_store i32:$val, addrPat:$addr, (i32 timm:$chan))] 304249259Sdim > { 305249259Sdim let isRegisterStore = 1; 306249259Sdim } 307249259Sdim} 308263508Sdim} 309249259Sdim 310249259Sdim} // End isCodeGenOnly = 1, isPseudo = 1 311249259Sdim 312249259Sdim/* Generic helper patterns for intrinsics */ 313249259Sdim/* -------------------------------------- */ 314249259Sdim 315251662Sdimclass POW_Common <AMDGPUInst log_ieee, AMDGPUInst exp_ieee, AMDGPUInst mul> 316251662Sdim : Pat < 317251662Sdim (fpow f32:$src0, f32:$src1), 318251662Sdim (exp_ieee (mul f32:$src1, (log_ieee f32:$src0))) 319249259Sdim>; 320249259Sdim 321249259Sdim/* Other helper patterns */ 322249259Sdim/* --------------------- */ 323249259Sdim 324249259Sdim/* Extract element pattern */ 325251662Sdimclass Extract_Element <ValueType sub_type, ValueType vec_type, int sub_idx, 326251662Sdim SubRegIndex sub_reg> 327251662Sdim : Pat< 328251662Sdim (sub_type (vector_extract vec_type:$src, sub_idx)), 329251662Sdim (EXTRACT_SUBREG $src, sub_reg) 330249259Sdim>; 331249259Sdim 332249259Sdim/* Insert element pattern */ 333249259Sdimclass Insert_Element <ValueType elem_type, ValueType vec_type, 334251662Sdim int sub_idx, SubRegIndex sub_reg> 335251662Sdim : Pat < 336251662Sdim (vector_insert vec_type:$vec, elem_type:$elem, sub_idx), 337251662Sdim (INSERT_SUBREG $vec, $elem, sub_reg) 338249259Sdim>; 339249259Sdim 340251662Sdimclass Vector4_Build <ValueType vecType, ValueType elemType> : Pat < 341251662Sdim (vecType (build_vector elemType:$x, elemType:$y, elemType:$z, elemType:$w)), 342249259Sdim (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG 343251662Sdim (vecType (IMPLICIT_DEF)), $x, sub0), $y, sub1), $z, sub2), $w, sub3) 344249259Sdim>; 345249259Sdim 346251662Sdim// XXX: Convert to new syntax and use COPY_TO_REG, once the DFAPacketizer 347251662Sdim// can handle COPY instructions. 348249259Sdim// bitconvert pattern 349249259Sdimclass BitConvert <ValueType dt, ValueType st, RegisterClass rc> : Pat < 350249259Sdim (dt (bitconvert (st rc:$src0))), 351249259Sdim (dt rc:$src0) 352249259Sdim>; 353249259Sdim 354251662Sdim// XXX: Convert to new syntax and use COPY_TO_REG, once the DFAPacketizer 355251662Sdim// can handle COPY instructions. 356249259Sdimclass DwordAddrPat<ValueType vt, RegisterClass rc> : Pat < 357249259Sdim (vt (AMDGPUdwordaddr (vt rc:$addr))), 358249259Sdim (vt rc:$addr) 359249259Sdim>; 360249259Sdim 361251662Sdim// BFI_INT patterns 362251662Sdim 363251662Sdimmulticlass BFIPatterns <Instruction BFI_INT> { 364251662Sdim 365251662Sdim // Definition from ISA doc: 366251662Sdim // (y & x) | (z & ~x) 367251662Sdim def : Pat < 368251662Sdim (or (and i32:$y, i32:$x), (and i32:$z, (not i32:$x))), 369251662Sdim (BFI_INT $x, $y, $z) 370251662Sdim >; 371251662Sdim 372251662Sdim // SHA-256 Ch function 373251662Sdim // z ^ (x & (y ^ z)) 374251662Sdim def : Pat < 375251662Sdim (xor i32:$z, (and i32:$x, (xor i32:$y, i32:$z))), 376251662Sdim (BFI_INT $x, $y, $z) 377251662Sdim >; 378251662Sdim 379251662Sdim} 380251662Sdim 381251662Sdim// SHA-256 Ma patterns 382251662Sdim 383251662Sdim// ((x & z) | (y & (x | z))) -> BFI_INT (XOR x, y), z, y 384251662Sdimclass SHA256MaPattern <Instruction BFI_INT, Instruction XOR> : Pat < 385251662Sdim (or (and i32:$x, i32:$z), (and i32:$y, (or i32:$x, i32:$z))), 386251662Sdim (BFI_INT (XOR i32:$x, i32:$y), i32:$z, i32:$y) 387251662Sdim>; 388251662Sdim 389251662Sdim// Bitfield extract patterns 390251662Sdim 391266715Sdim/* 392266715Sdim 393266715SdimXXX: The BFE pattern is not working correctly because the XForm is not being 394266715Sdimapplied. 395266715Sdim 396251662Sdimdef legalshift32 : ImmLeaf <i32, [{return Imm >=0 && Imm < 32;}]>; 397251662Sdimdef bfemask : PatLeaf <(imm), [{return isMask_32(N->getZExtValue());}], 398251662Sdim SDNodeXForm<imm, [{ return CurDAG->getTargetConstant(CountTrailingOnes_32(N->getZExtValue()), MVT::i32);}]>>; 399251662Sdim 400251662Sdimclass BFEPattern <Instruction BFE> : Pat < 401251662Sdim (and (srl i32:$x, legalshift32:$y), bfemask:$z), 402251662Sdim (BFE $x, $y, $z) 403251662Sdim>; 404251662Sdim 405266715Sdim*/ 406266715Sdim 407263508Sdim// rotr pattern 408263508Sdimclass ROTRPattern <Instruction BIT_ALIGN> : Pat < 409263508Sdim (rotr i32:$src0, i32:$src1), 410263508Sdim (BIT_ALIGN $src0, $src0, $src1) 411263508Sdim>; 412263508Sdim 413263508Sdim// 24-bit arithmetic patterns 414263508Sdimdef umul24 : PatFrag <(ops node:$x, node:$y), (mul node:$x, node:$y)>; 415263508Sdim 416263508Sdim/* 417263508Sdimclass UMUL24Pattern <Instruction UMUL24> : Pat < 418263508Sdim (mul U24:$x, U24:$y), 419263508Sdim (UMUL24 $x, $y) 420263508Sdim>; 421263508Sdim*/ 422263508Sdim 423249259Sdiminclude "R600Instructions.td" 424249259Sdim 425249259Sdiminclude "SIInstrInfo.td" 426249259Sdim 427