AMDGPU.cpp revision 360784
1219089Spjd//===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
2219089Spjd//
3219089Spjd// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4219089Spjd// See https://llvm.org/LICENSE.txt for license information.
5219089Spjd// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6219089Spjd//
7219089Spjd//===----------------------------------------------------------------------===//
8219089Spjd//
9219089Spjd// This file implements AMDGPU TargetInfo objects.
10219089Spjd//
11219089Spjd//===----------------------------------------------------------------------===//
12219089Spjd
13219089Spjd#include "AMDGPU.h"
14219089Spjd#include "clang/Basic/Builtins.h"
15219089Spjd#include "clang/Basic/CodeGenOptions.h"
16219089Spjd#include "clang/Basic/LangOptions.h"
17219089Spjd#include "clang/Basic/MacroBuilder.h"
18219089Spjd#include "clang/Basic/TargetBuiltins.h"
19219089Spjd#include "llvm/ADT/StringSwitch.h"
20219089Spjd#include "llvm/IR/DataLayout.h"
21219089Spjd
22219089Spjdusing namespace clang;
23249643Smmusing namespace clang::targets;
24219089Spjd
25219089Spjdnamespace clang {
26219089Spjdnamespace targets {
27219089Spjd
28219089Spjd// If you edit the description strings, make sure you update
29219089Spjd// getPointerWidthV().
30219089Spjd
31219089Spjdstatic const char *const DataLayoutStringR600 =
32219089Spjd    "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
33219089Spjd    "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5";
34219089Spjd
35219089Spjdstatic const char *const DataLayoutStringAMDGCN =
36219089Spjd    "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
37219089Spjd    "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
38219089Spjd    "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
39219089Spjd    "-ni:7";
40219089Spjd
41219089Spjdconst LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
42219089Spjd    Generic,  // Default
43219089Spjd    Global,   // opencl_global
44219089Spjd    Local,    // opencl_local
45219089Spjd    Constant, // opencl_constant
46219089Spjd    Private,  // opencl_private
47219089Spjd    Generic,  // opencl_generic
48243674Smm    Global,   // cuda_device
49219089Spjd    Constant, // cuda_constant
50219089Spjd    Local,    // cuda_shared
51219089Spjd    Generic,  // ptr32_sptr
52219089Spjd    Generic,  // ptr32_uptr
53219089Spjd    Generic   // ptr64
54219089Spjd};
55219089Spjd
56219089Spjdconst LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
57219089Spjd    Private,  // Default
58249643Smm    Global,   // opencl_global
59219089Spjd    Local,    // opencl_local
60219089Spjd    Constant, // opencl_constant
61243674Smm    Private,  // opencl_private
62243674Smm    Generic,  // opencl_generic
63243674Smm    Global,   // cuda_device
64243674Smm    Constant, // cuda_constant
65219089Spjd    Local,    // cuda_shared
66243674Smm    Generic,  // ptr32_sptr
67243674Smm    Generic,  // ptr32_uptr
68243674Smm    Generic   // ptr64
69243674Smm
70219089Spjd};
71219089Spjd} // namespace targets
72243674Smm} // namespace clang
73243674Smm
74243674Smmconst Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = {
75243674Smm#define BUILTIN(ID, TYPE, ATTRS)                                               \
76243674Smm  {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr},
77243674Smm#define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
78243674Smm  {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE},
79243674Smm#include "clang/Basic/BuiltinsAMDGPU.def"
80243674Smm};
81243674Smm
82243674Smmconst char *const AMDGPUTargetInfo::GCCRegNames[] = {
83243674Smm  "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
84243674Smm  "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
85243674Smm  "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
86243674Smm  "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
87243674Smm  "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
88243674Smm  "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
89243674Smm  "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
90243674Smm  "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
91243674Smm  "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
92243674Smm  "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
93243674Smm  "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
94243674Smm  "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
95243674Smm  "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
96243674Smm  "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
97243674Smm  "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
98243674Smm  "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
99243674Smm  "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
100243674Smm  "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
101243674Smm  "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
102219089Spjd  "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
103219089Spjd  "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
104219089Spjd  "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
105219089Spjd  "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
106219089Spjd  "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
107219089Spjd  "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
108219089Spjd  "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
109219089Spjd  "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
110219089Spjd  "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
111219089Spjd  "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
112219089Spjd  "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
113219089Spjd  "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
114219089Spjd  "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
115219089Spjd  "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
116219089Spjd  "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
117219089Spjd  "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
118219089Spjd  "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
119219089Spjd  "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
120219089Spjd  "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
121219089Spjd  "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
122219089Spjd  "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
123219089Spjd  "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
124219089Spjd  "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
125219089Spjd  "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
126219089Spjd  "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
127219089Spjd  "flat_scratch_lo", "flat_scratch_hi"
128250098Smm};
129250098Smm
130250098SmmArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const {
131250098Smm  return llvm::makeArrayRef(GCCRegNames);
132250098Smm}
133250098Smm
134250098Smmbool AMDGPUTargetInfo::initFeatureMap(
135263391Sdelphij    llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
136250098Smm    const std::vector<std::string> &FeatureVec) const {
137219089Spjd
138219089Spjd  using namespace llvm::AMDGPU;
139219089Spjd
140219089Spjd  // XXX - What does the member GPU mean if device name string passed here?
141219089Spjd  if (isAMDGCN(getTriple())) {
142219089Spjd    switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) {
143219089Spjd    case GK_GFX1012:
144219089Spjd    case GK_GFX1011:
145219089Spjd      Features["dot1-insts"] = true;
146219089Spjd      Features["dot2-insts"] = true;
147219089Spjd      Features["dot5-insts"] = true;
148219089Spjd      Features["dot6-insts"] = true;
149219089Spjd      LLVM_FALLTHROUGH;
150219089Spjd    case GK_GFX1010:
151219089Spjd      Features["dl-insts"] = true;
152219089Spjd      Features["ci-insts"] = true;
153219089Spjd      Features["flat-address-space"] = true;
154219089Spjd      Features["16-bit-insts"] = true;
155219089Spjd      Features["dpp"] = true;
156219089Spjd      Features["gfx8-insts"] = true;
157219089Spjd      Features["gfx9-insts"] = true;
158219089Spjd      Features["gfx10-insts"] = true;
159219089Spjd      Features["s-memrealtime"] = true;
160219089Spjd      break;
161219089Spjd    case GK_GFX908:
162219089Spjd      Features["dot3-insts"] = true;
163219089Spjd      Features["dot4-insts"] = true;
164219089Spjd      Features["dot5-insts"] = true;
165219089Spjd      Features["dot6-insts"] = true;
166219089Spjd      LLVM_FALLTHROUGH;
167219089Spjd    case GK_GFX906:
168219089Spjd      Features["dl-insts"] = true;
169219089Spjd      Features["dot1-insts"] = true;
170219089Spjd      Features["dot2-insts"] = true;
171219089Spjd      LLVM_FALLTHROUGH;
172219089Spjd    case GK_GFX909:
173219089Spjd    case GK_GFX904:
174219089Spjd    case GK_GFX902:
175219089Spjd    case GK_GFX900:
176219089Spjd      Features["gfx9-insts"] = true;
177219089Spjd      LLVM_FALLTHROUGH;
178219089Spjd    case GK_GFX810:
179219089Spjd    case GK_GFX803:
180219089Spjd    case GK_GFX802:
181219089Spjd    case GK_GFX801:
182219089Spjd      Features["gfx8-insts"] = true;
183219089Spjd      Features["16-bit-insts"] = true;
184219089Spjd      Features["dpp"] = true;
185219089Spjd      Features["s-memrealtime"] = true;
186219089Spjd      LLVM_FALLTHROUGH;
187219089Spjd    case GK_GFX704:
188219089Spjd    case GK_GFX703:
189219089Spjd    case GK_GFX702:
190219089Spjd    case GK_GFX701:
191219089Spjd    case GK_GFX700:
192219089Spjd      Features["ci-insts"] = true;
193219089Spjd      Features["flat-address-space"] = true;
194219089Spjd      LLVM_FALLTHROUGH;
195219089Spjd    case GK_GFX601:
196249643Smm    case GK_GFX600:
197219089Spjd      break;
198249643Smm    case GK_NONE:
199219089Spjd      break;
200219089Spjd    default:
201249643Smm      llvm_unreachable("Unhandled GPU!");
202219089Spjd    }
203219089Spjd  } else {
204219089Spjd    if (CPU.empty())
205219089Spjd      CPU = "r600";
206219089Spjd
207249643Smm    switch (llvm::AMDGPU::parseArchR600(CPU)) {
208219089Spjd    case GK_CAYMAN:
209249643Smm    case GK_CYPRESS:
210249643Smm    case GK_RV770:
211219089Spjd    case GK_RV670:
212219089Spjd      // TODO: Add fp64 when implemented.
213219089Spjd      break;
214219089Spjd    case GK_TURKS:
215219089Spjd    case GK_CAICOS:
216219089Spjd    case GK_BARTS:
217219089Spjd    case GK_SUMO:
218219089Spjd    case GK_REDWOOD:
219219089Spjd    case GK_JUNIPER:
220219089Spjd    case GK_CEDAR:
221219089Spjd    case GK_RV730:
222219089Spjd    case GK_RV710:
223219089Spjd    case GK_RS880:
224219089Spjd    case GK_R630:
225219089Spjd    case GK_R600:
226219089Spjd      break;
227262086Savg    default:
228219089Spjd      llvm_unreachable("Unhandled GPU!");
229219089Spjd    }
230219089Spjd  }
231219089Spjd
232219089Spjd  return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec);
233219089Spjd}
234219089Spjd
235219089Spjdvoid AMDGPUTargetInfo::adjustTargetOptions(const CodeGenOptions &CGOpts,
236219089Spjd                                           TargetOptions &TargetOpts) const {
237219089Spjd  bool hasFP32Denormals = false;
238219089Spjd  bool hasFP64Denormals = false;
239219089Spjd
240219089Spjd  for (auto &I : TargetOpts.FeaturesAsWritten) {
241219089Spjd    if (I == "+fp32-denormals" || I == "-fp32-denormals")
242219089Spjd      hasFP32Denormals = true;
243219089Spjd    if (I == "+fp64-fp16-denormals" || I == "-fp64-fp16-denormals")
244219089Spjd      hasFP64Denormals = true;
245219089Spjd  }
246219089Spjd  if (!hasFP32Denormals)
247219089Spjd    TargetOpts.Features.push_back(
248219089Spjd      (Twine(hasFastFMAF() && hasFullRateDenormalsF32() && !CGOpts.FlushDenorm
249219089Spjd             ? '+' : '-') + Twine("fp32-denormals"))
250219089Spjd            .str());
251219089Spjd  // Always do not flush fp64 or fp16 denorms.
252219089Spjd  if (!hasFP64Denormals && hasFP64())
253219089Spjd    TargetOpts.Features.push_back("+fp64-fp16-denormals");
254219089Spjd}
255219089Spjd
256219089Spjdvoid AMDGPUTargetInfo::fillValidCPUList(
257219089Spjd    SmallVectorImpl<StringRef> &Values) const {
258219089Spjd  if (isAMDGCN(getTriple()))
259219089Spjd    llvm::AMDGPU::fillValidArchListAMDGCN(Values);
260219089Spjd  else
261219089Spjd    llvm::AMDGPU::fillValidArchListR600(Values);
262219089Spjd}
263219089Spjd
264219089Spjdvoid AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
265219089Spjd  AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap;
266219089Spjd}
267219089Spjd
268219089SpjdAMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
269219089Spjd                                   const TargetOptions &Opts)
270249643Smm    : TargetInfo(Triple),
271219089Spjd      GPUKind(isAMDGCN(Triple) ?
272219089Spjd              llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) :
273219089Spjd              llvm::AMDGPU::parseArchR600(Opts.CPU)),
274219089Spjd      GPUFeatures(isAMDGCN(Triple) ?
275219089Spjd                  llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) :
276219089Spjd                  llvm::AMDGPU::getArchAttrR600(GPUKind)) {
277219089Spjd  resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
278219089Spjd                                        : DataLayoutStringR600);
279219089Spjd  assert(DataLayout->getAllocaAddrSpace() == Private);
280219089Spjd
281219089Spjd  setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
282219089Spjd                     !isAMDGCN(Triple));
283219089Spjd  UseAddrSpaceMapMangling = true;
284219089Spjd
285219089Spjd  HasLegalHalfType = true;
286219089Spjd  HasFloat16 = true;
287219089Spjd
288219089Spjd  // Set pointer width and alignment for target address space 0.
289219089Spjd  PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits();
290219089Spjd  if (getMaxPointerWidth() == 64) {
291219089Spjd    LongWidth = LongAlign = 64;
292219089Spjd    SizeType = UnsignedLong;
293219089Spjd    PtrDiffType = SignedLong;
294219089Spjd    IntPtrType = SignedLong;
295219089Spjd  }
296219089Spjd
297219089Spjd  MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
298219089Spjd}
299219089Spjd
300219089Spjdvoid AMDGPUTargetInfo::adjust(LangOptions &Opts) {
301219089Spjd  TargetInfo::adjust(Opts);
302219089Spjd  // ToDo: There are still a few places using default address space as private
303219089Spjd  // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
304219089Spjd  // can be removed from the following line.
305219089Spjd  setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
306219089Spjd                     !isAMDGCN(getTriple()));
307219089Spjd}
308219089Spjd
309219089SpjdArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const {
310219089Spjd  return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin -
311219089Spjd                                             Builtin::FirstTSBuiltin);
312219089Spjd}
313219089Spjd
314219089Spjdvoid AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
315219089Spjd                                        MacroBuilder &Builder) const {
316219089Spjd  Builder.defineMacro("__AMD__");
317219089Spjd  Builder.defineMacro("__AMDGPU__");
318219089Spjd
319249643Smm  if (isAMDGCN(getTriple()))
320219089Spjd    Builder.defineMacro("__AMDGCN__");
321219089Spjd  else
322219089Spjd    Builder.defineMacro("__R600__");
323219089Spjd
324219089Spjd  if (GPUKind != llvm::AMDGPU::GK_NONE) {
325219089Spjd    StringRef CanonName = isAMDGCN(getTriple()) ?
326219089Spjd      getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind);
327219089Spjd    Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__"));
328219089Spjd  }
329219089Spjd
330219089Spjd  // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
331219089Spjd  // removed in the near future.
332219089Spjd  if (hasFMAF())
333219089Spjd    Builder.defineMacro("__HAS_FMAF__");
334219089Spjd  if (hasFastFMAF())
335219089Spjd    Builder.defineMacro("FP_FAST_FMAF");
336219089Spjd  if (hasLDEXPF())
337219089Spjd    Builder.defineMacro("__HAS_LDEXPF__");
338219089Spjd  if (hasFP64())
339219089Spjd    Builder.defineMacro("__HAS_FP64__");
340219089Spjd  if (hasFastFMA())
341219089Spjd    Builder.defineMacro("FP_FAST_FMA");
342219089Spjd}
343219089Spjd
344219089Spjdvoid AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) {
345219089Spjd  assert(HalfFormat == Aux->HalfFormat);
346219089Spjd  assert(FloatFormat == Aux->FloatFormat);
347219089Spjd  assert(DoubleFormat == Aux->DoubleFormat);
348219089Spjd
349219089Spjd  // On x86_64 long double is 80-bit extended precision format, which is
350219089Spjd  // not supported by AMDGPU. 128-bit floating point format is also not
351219089Spjd  // supported by AMDGPU. Therefore keep its own format for these two types.
352219089Spjd  auto SaveLongDoubleFormat = LongDoubleFormat;
353219089Spjd  auto SaveFloat128Format = Float128Format;
354219089Spjd  copyAuxTarget(Aux);
355219089Spjd  LongDoubleFormat = SaveLongDoubleFormat;
356219089Spjd  Float128Format = SaveFloat128Format;
357249643Smm}
358219089Spjd