AMDGPU.cpp revision 360784
1219089Spjd//===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===// 2219089Spjd// 3219089Spjd// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4219089Spjd// See https://llvm.org/LICENSE.txt for license information. 5219089Spjd// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6219089Spjd// 7219089Spjd//===----------------------------------------------------------------------===// 8219089Spjd// 9219089Spjd// This file implements AMDGPU TargetInfo objects. 10219089Spjd// 11219089Spjd//===----------------------------------------------------------------------===// 12219089Spjd 13219089Spjd#include "AMDGPU.h" 14219089Spjd#include "clang/Basic/Builtins.h" 15219089Spjd#include "clang/Basic/CodeGenOptions.h" 16219089Spjd#include "clang/Basic/LangOptions.h" 17219089Spjd#include "clang/Basic/MacroBuilder.h" 18219089Spjd#include "clang/Basic/TargetBuiltins.h" 19219089Spjd#include "llvm/ADT/StringSwitch.h" 20219089Spjd#include "llvm/IR/DataLayout.h" 21219089Spjd 22219089Spjdusing namespace clang; 23249643Smmusing namespace clang::targets; 24219089Spjd 25219089Spjdnamespace clang { 26219089Spjdnamespace targets { 27219089Spjd 28219089Spjd// If you edit the description strings, make sure you update 29219089Spjd// getPointerWidthV(). 30219089Spjd 31219089Spjdstatic const char *const DataLayoutStringR600 = 32219089Spjd "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 33219089Spjd "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"; 34219089Spjd 35219089Spjdstatic const char *const DataLayoutStringAMDGCN = 36219089Spjd "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32" 37219089Spjd "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 38219089Spjd "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" 39219089Spjd "-ni:7"; 40219089Spjd 41219089Spjdconst LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = { 42219089Spjd Generic, // Default 43219089Spjd Global, // opencl_global 44219089Spjd Local, // opencl_local 45219089Spjd Constant, // opencl_constant 46219089Spjd Private, // opencl_private 47219089Spjd Generic, // opencl_generic 48243674Smm Global, // cuda_device 49219089Spjd Constant, // cuda_constant 50219089Spjd Local, // cuda_shared 51219089Spjd Generic, // ptr32_sptr 52219089Spjd Generic, // ptr32_uptr 53219089Spjd Generic // ptr64 54219089Spjd}; 55219089Spjd 56219089Spjdconst LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = { 57219089Spjd Private, // Default 58249643Smm Global, // opencl_global 59219089Spjd Local, // opencl_local 60219089Spjd Constant, // opencl_constant 61243674Smm Private, // opencl_private 62243674Smm Generic, // opencl_generic 63243674Smm Global, // cuda_device 64243674Smm Constant, // cuda_constant 65219089Spjd Local, // cuda_shared 66243674Smm Generic, // ptr32_sptr 67243674Smm Generic, // ptr32_uptr 68243674Smm Generic // ptr64 69243674Smm 70219089Spjd}; 71219089Spjd} // namespace targets 72243674Smm} // namespace clang 73243674Smm 74243674Smmconst Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = { 75243674Smm#define BUILTIN(ID, TYPE, ATTRS) \ 76243674Smm {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr}, 77243674Smm#define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \ 78243674Smm {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE}, 79243674Smm#include "clang/Basic/BuiltinsAMDGPU.def" 80243674Smm}; 81243674Smm 82243674Smmconst char *const AMDGPUTargetInfo::GCCRegNames[] = { 83243674Smm "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", 84243674Smm "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 85243674Smm "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", 86243674Smm "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35", 87243674Smm "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44", 88243674Smm "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53", 89243674Smm "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62", 90243674Smm "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71", 91243674Smm "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80", 92243674Smm "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89", 93243674Smm "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98", 94243674Smm "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107", 95243674Smm "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116", 96243674Smm "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125", 97243674Smm "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134", 98243674Smm "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143", 99243674Smm "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152", 100243674Smm "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161", 101243674Smm "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170", 102219089Spjd "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179", 103219089Spjd "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188", 104219089Spjd "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197", 105219089Spjd "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206", 106219089Spjd "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215", 107219089Spjd "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224", 108219089Spjd "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233", 109219089Spjd "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242", 110219089Spjd "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251", 111219089Spjd "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4", 112219089Spjd "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13", 113219089Spjd "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22", 114219089Spjd "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31", 115219089Spjd "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40", 116219089Spjd "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49", 117219089Spjd "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58", 118219089Spjd "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67", 119219089Spjd "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76", 120219089Spjd "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85", 121219089Spjd "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94", 122219089Spjd "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103", 123219089Spjd "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112", 124219089Spjd "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121", 125219089Spjd "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc", 126219089Spjd "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi", 127219089Spjd "flat_scratch_lo", "flat_scratch_hi" 128250098Smm}; 129250098Smm 130250098SmmArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const { 131250098Smm return llvm::makeArrayRef(GCCRegNames); 132250098Smm} 133250098Smm 134250098Smmbool AMDGPUTargetInfo::initFeatureMap( 135263391Sdelphij llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU, 136250098Smm const std::vector<std::string> &FeatureVec) const { 137219089Spjd 138219089Spjd using namespace llvm::AMDGPU; 139219089Spjd 140219089Spjd // XXX - What does the member GPU mean if device name string passed here? 141219089Spjd if (isAMDGCN(getTriple())) { 142219089Spjd switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) { 143219089Spjd case GK_GFX1012: 144219089Spjd case GK_GFX1011: 145219089Spjd Features["dot1-insts"] = true; 146219089Spjd Features["dot2-insts"] = true; 147219089Spjd Features["dot5-insts"] = true; 148219089Spjd Features["dot6-insts"] = true; 149219089Spjd LLVM_FALLTHROUGH; 150219089Spjd case GK_GFX1010: 151219089Spjd Features["dl-insts"] = true; 152219089Spjd Features["ci-insts"] = true; 153219089Spjd Features["flat-address-space"] = true; 154219089Spjd Features["16-bit-insts"] = true; 155219089Spjd Features["dpp"] = true; 156219089Spjd Features["gfx8-insts"] = true; 157219089Spjd Features["gfx9-insts"] = true; 158219089Spjd Features["gfx10-insts"] = true; 159219089Spjd Features["s-memrealtime"] = true; 160219089Spjd break; 161219089Spjd case GK_GFX908: 162219089Spjd Features["dot3-insts"] = true; 163219089Spjd Features["dot4-insts"] = true; 164219089Spjd Features["dot5-insts"] = true; 165219089Spjd Features["dot6-insts"] = true; 166219089Spjd LLVM_FALLTHROUGH; 167219089Spjd case GK_GFX906: 168219089Spjd Features["dl-insts"] = true; 169219089Spjd Features["dot1-insts"] = true; 170219089Spjd Features["dot2-insts"] = true; 171219089Spjd LLVM_FALLTHROUGH; 172219089Spjd case GK_GFX909: 173219089Spjd case GK_GFX904: 174219089Spjd case GK_GFX902: 175219089Spjd case GK_GFX900: 176219089Spjd Features["gfx9-insts"] = true; 177219089Spjd LLVM_FALLTHROUGH; 178219089Spjd case GK_GFX810: 179219089Spjd case GK_GFX803: 180219089Spjd case GK_GFX802: 181219089Spjd case GK_GFX801: 182219089Spjd Features["gfx8-insts"] = true; 183219089Spjd Features["16-bit-insts"] = true; 184219089Spjd Features["dpp"] = true; 185219089Spjd Features["s-memrealtime"] = true; 186219089Spjd LLVM_FALLTHROUGH; 187219089Spjd case GK_GFX704: 188219089Spjd case GK_GFX703: 189219089Spjd case GK_GFX702: 190219089Spjd case GK_GFX701: 191219089Spjd case GK_GFX700: 192219089Spjd Features["ci-insts"] = true; 193219089Spjd Features["flat-address-space"] = true; 194219089Spjd LLVM_FALLTHROUGH; 195219089Spjd case GK_GFX601: 196249643Smm case GK_GFX600: 197219089Spjd break; 198249643Smm case GK_NONE: 199219089Spjd break; 200219089Spjd default: 201249643Smm llvm_unreachable("Unhandled GPU!"); 202219089Spjd } 203219089Spjd } else { 204219089Spjd if (CPU.empty()) 205219089Spjd CPU = "r600"; 206219089Spjd 207249643Smm switch (llvm::AMDGPU::parseArchR600(CPU)) { 208219089Spjd case GK_CAYMAN: 209249643Smm case GK_CYPRESS: 210249643Smm case GK_RV770: 211219089Spjd case GK_RV670: 212219089Spjd // TODO: Add fp64 when implemented. 213219089Spjd break; 214219089Spjd case GK_TURKS: 215219089Spjd case GK_CAICOS: 216219089Spjd case GK_BARTS: 217219089Spjd case GK_SUMO: 218219089Spjd case GK_REDWOOD: 219219089Spjd case GK_JUNIPER: 220219089Spjd case GK_CEDAR: 221219089Spjd case GK_RV730: 222219089Spjd case GK_RV710: 223219089Spjd case GK_RS880: 224219089Spjd case GK_R630: 225219089Spjd case GK_R600: 226219089Spjd break; 227262086Savg default: 228219089Spjd llvm_unreachable("Unhandled GPU!"); 229219089Spjd } 230219089Spjd } 231219089Spjd 232219089Spjd return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec); 233219089Spjd} 234219089Spjd 235219089Spjdvoid AMDGPUTargetInfo::adjustTargetOptions(const CodeGenOptions &CGOpts, 236219089Spjd TargetOptions &TargetOpts) const { 237219089Spjd bool hasFP32Denormals = false; 238219089Spjd bool hasFP64Denormals = false; 239219089Spjd 240219089Spjd for (auto &I : TargetOpts.FeaturesAsWritten) { 241219089Spjd if (I == "+fp32-denormals" || I == "-fp32-denormals") 242219089Spjd hasFP32Denormals = true; 243219089Spjd if (I == "+fp64-fp16-denormals" || I == "-fp64-fp16-denormals") 244219089Spjd hasFP64Denormals = true; 245219089Spjd } 246219089Spjd if (!hasFP32Denormals) 247219089Spjd TargetOpts.Features.push_back( 248219089Spjd (Twine(hasFastFMAF() && hasFullRateDenormalsF32() && !CGOpts.FlushDenorm 249219089Spjd ? '+' : '-') + Twine("fp32-denormals")) 250219089Spjd .str()); 251219089Spjd // Always do not flush fp64 or fp16 denorms. 252219089Spjd if (!hasFP64Denormals && hasFP64()) 253219089Spjd TargetOpts.Features.push_back("+fp64-fp16-denormals"); 254219089Spjd} 255219089Spjd 256219089Spjdvoid AMDGPUTargetInfo::fillValidCPUList( 257219089Spjd SmallVectorImpl<StringRef> &Values) const { 258219089Spjd if (isAMDGCN(getTriple())) 259219089Spjd llvm::AMDGPU::fillValidArchListAMDGCN(Values); 260219089Spjd else 261219089Spjd llvm::AMDGPU::fillValidArchListR600(Values); 262219089Spjd} 263219089Spjd 264219089Spjdvoid AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) { 265219089Spjd AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap; 266219089Spjd} 267219089Spjd 268219089SpjdAMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple, 269219089Spjd const TargetOptions &Opts) 270249643Smm : TargetInfo(Triple), 271219089Spjd GPUKind(isAMDGCN(Triple) ? 272219089Spjd llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) : 273219089Spjd llvm::AMDGPU::parseArchR600(Opts.CPU)), 274219089Spjd GPUFeatures(isAMDGCN(Triple) ? 275219089Spjd llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) : 276219089Spjd llvm::AMDGPU::getArchAttrR600(GPUKind)) { 277219089Spjd resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN 278219089Spjd : DataLayoutStringR600); 279219089Spjd assert(DataLayout->getAllocaAddrSpace() == Private); 280219089Spjd 281219089Spjd setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D || 282219089Spjd !isAMDGCN(Triple)); 283219089Spjd UseAddrSpaceMapMangling = true; 284219089Spjd 285219089Spjd HasLegalHalfType = true; 286219089Spjd HasFloat16 = true; 287219089Spjd 288219089Spjd // Set pointer width and alignment for target address space 0. 289219089Spjd PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits(); 290219089Spjd if (getMaxPointerWidth() == 64) { 291219089Spjd LongWidth = LongAlign = 64; 292219089Spjd SizeType = UnsignedLong; 293219089Spjd PtrDiffType = SignedLong; 294219089Spjd IntPtrType = SignedLong; 295219089Spjd } 296219089Spjd 297219089Spjd MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64; 298219089Spjd} 299219089Spjd 300219089Spjdvoid AMDGPUTargetInfo::adjust(LangOptions &Opts) { 301219089Spjd TargetInfo::adjust(Opts); 302219089Spjd // ToDo: There are still a few places using default address space as private 303219089Spjd // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL 304219089Spjd // can be removed from the following line. 305219089Spjd setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL || 306219089Spjd !isAMDGCN(getTriple())); 307219089Spjd} 308219089Spjd 309219089SpjdArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const { 310219089Spjd return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin - 311219089Spjd Builtin::FirstTSBuiltin); 312219089Spjd} 313219089Spjd 314219089Spjdvoid AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts, 315219089Spjd MacroBuilder &Builder) const { 316219089Spjd Builder.defineMacro("__AMD__"); 317219089Spjd Builder.defineMacro("__AMDGPU__"); 318219089Spjd 319249643Smm if (isAMDGCN(getTriple())) 320219089Spjd Builder.defineMacro("__AMDGCN__"); 321219089Spjd else 322219089Spjd Builder.defineMacro("__R600__"); 323219089Spjd 324219089Spjd if (GPUKind != llvm::AMDGPU::GK_NONE) { 325219089Spjd StringRef CanonName = isAMDGCN(getTriple()) ? 326219089Spjd getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind); 327219089Spjd Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__")); 328219089Spjd } 329219089Spjd 330219089Spjd // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be 331219089Spjd // removed in the near future. 332219089Spjd if (hasFMAF()) 333219089Spjd Builder.defineMacro("__HAS_FMAF__"); 334219089Spjd if (hasFastFMAF()) 335219089Spjd Builder.defineMacro("FP_FAST_FMAF"); 336219089Spjd if (hasLDEXPF()) 337219089Spjd Builder.defineMacro("__HAS_LDEXPF__"); 338219089Spjd if (hasFP64()) 339219089Spjd Builder.defineMacro("__HAS_FP64__"); 340219089Spjd if (hasFastFMA()) 341219089Spjd Builder.defineMacro("FP_FAST_FMA"); 342219089Spjd} 343219089Spjd 344219089Spjdvoid AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) { 345219089Spjd assert(HalfFormat == Aux->HalfFormat); 346219089Spjd assert(FloatFormat == Aux->FloatFormat); 347219089Spjd assert(DoubleFormat == Aux->DoubleFormat); 348219089Spjd 349219089Spjd // On x86_64 long double is 80-bit extended precision format, which is 350219089Spjd // not supported by AMDGPU. 128-bit floating point format is also not 351219089Spjd // supported by AMDGPU. Therefore keep its own format for these two types. 352219089Spjd auto SaveLongDoubleFormat = LongDoubleFormat; 353219089Spjd auto SaveFloat128Format = Float128Format; 354219089Spjd copyAuxTarget(Aux); 355219089Spjd LongDoubleFormat = SaveLongDoubleFormat; 356219089Spjd Float128Format = SaveFloat128Format; 357249643Smm} 358219089Spjd