AArch64Subtarget.cpp revision 360784
1//===-- AArch64Subtarget.cpp - AArch64 Subtarget Information ----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the AArch64 specific subclass of TargetSubtarget.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AArch64Subtarget.h"
14
15#include "AArch64.h"
16#include "AArch64CallLowering.h"
17#include "AArch64InstrInfo.h"
18#include "AArch64LegalizerInfo.h"
19#include "AArch64PBQPRegAlloc.h"
20#include "AArch64RegisterBankInfo.h"
21#include "AArch64TargetMachine.h"
22#include "MCTargetDesc/AArch64AddressingModes.h"
23#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
24#include "llvm/CodeGen/MachineScheduler.h"
25#include "llvm/IR/GlobalValue.h"
26#include "llvm/Support/TargetParser.h"
27
28using namespace llvm;
29
30#define DEBUG_TYPE "aarch64-subtarget"
31
32#define GET_SUBTARGETINFO_CTOR
33#define GET_SUBTARGETINFO_TARGET_DESC
34#include "AArch64GenSubtargetInfo.inc"
35
36static cl::opt<bool>
37EnableEarlyIfConvert("aarch64-early-ifcvt", cl::desc("Enable the early if "
38                     "converter pass"), cl::init(true), cl::Hidden);
39
40// If OS supports TBI, use this flag to enable it.
41static cl::opt<bool>
42UseAddressTopByteIgnored("aarch64-use-tbi", cl::desc("Assume that top byte of "
43                         "an address is ignored"), cl::init(false), cl::Hidden);
44
45static cl::opt<bool>
46    UseNonLazyBind("aarch64-enable-nonlazybind",
47                   cl::desc("Call nonlazybind functions via direct GOT load"),
48                   cl::init(false), cl::Hidden);
49
50AArch64Subtarget &
51AArch64Subtarget::initializeSubtargetDependencies(StringRef FS,
52                                                  StringRef CPUString) {
53  // Determine default and user-specified characteristics
54
55  if (CPUString.empty())
56    CPUString = "generic";
57
58  ParseSubtargetFeatures(CPUString, FS);
59  initializeProperties();
60
61  return *this;
62}
63
64void AArch64Subtarget::initializeProperties() {
65  // Initialize CPU specific properties. We should add a tablegen feature for
66  // this in the future so we can specify it together with the subtarget
67  // features.
68  switch (ARMProcFamily) {
69  case Others:
70    break;
71  case CortexA35:
72    break;
73  case CortexA53:
74    PrefFunctionLogAlignment = 3;
75    break;
76  case CortexA55:
77    break;
78  case CortexA57:
79    MaxInterleaveFactor = 4;
80    PrefFunctionLogAlignment = 4;
81    break;
82  case CortexA65:
83    PrefFunctionLogAlignment = 3;
84    break;
85  case CortexA72:
86  case CortexA73:
87  case CortexA75:
88  case CortexA76:
89    PrefFunctionLogAlignment = 4;
90    break;
91  case AppleA7:
92  case AppleA10:
93  case AppleA11:
94  case AppleA12:
95  case AppleA13:
96    CacheLineSize = 64;
97    PrefetchDistance = 280;
98    MinPrefetchStride = 2048;
99    MaxPrefetchIterationsAhead = 3;
100    break;
101  case ExynosM3:
102    MaxInterleaveFactor = 4;
103    MaxJumpTableSize = 20;
104    PrefFunctionLogAlignment = 5;
105    PrefLoopLogAlignment = 4;
106    break;
107  case Falkor:
108    MaxInterleaveFactor = 4;
109    // FIXME: remove this to enable 64-bit SLP if performance looks good.
110    MinVectorRegisterBitWidth = 128;
111    CacheLineSize = 128;
112    PrefetchDistance = 820;
113    MinPrefetchStride = 2048;
114    MaxPrefetchIterationsAhead = 8;
115    break;
116  case Kryo:
117    MaxInterleaveFactor = 4;
118    VectorInsertExtractBaseCost = 2;
119    CacheLineSize = 128;
120    PrefetchDistance = 740;
121    MinPrefetchStride = 1024;
122    MaxPrefetchIterationsAhead = 11;
123    // FIXME: remove this to enable 64-bit SLP if performance looks good.
124    MinVectorRegisterBitWidth = 128;
125    break;
126  case NeoverseE1:
127    PrefFunctionLogAlignment = 3;
128    break;
129  case NeoverseN1:
130    PrefFunctionLogAlignment = 4;
131    break;
132  case Saphira:
133    MaxInterleaveFactor = 4;
134    // FIXME: remove this to enable 64-bit SLP if performance looks good.
135    MinVectorRegisterBitWidth = 128;
136    break;
137  case ThunderX2T99:
138    CacheLineSize = 64;
139    PrefFunctionLogAlignment = 3;
140    PrefLoopLogAlignment = 2;
141    MaxInterleaveFactor = 4;
142    PrefetchDistance = 128;
143    MinPrefetchStride = 1024;
144    MaxPrefetchIterationsAhead = 4;
145    // FIXME: remove this to enable 64-bit SLP if performance looks good.
146    MinVectorRegisterBitWidth = 128;
147    break;
148  case ThunderX:
149  case ThunderXT88:
150  case ThunderXT81:
151  case ThunderXT83:
152    CacheLineSize = 128;
153    PrefFunctionLogAlignment = 3;
154    PrefLoopLogAlignment = 2;
155    // FIXME: remove this to enable 64-bit SLP if performance looks good.
156    MinVectorRegisterBitWidth = 128;
157    break;
158  case TSV110:
159    CacheLineSize = 64;
160    PrefFunctionLogAlignment = 4;
161    PrefLoopLogAlignment = 2;
162    break;
163  }
164}
165
166AArch64Subtarget::AArch64Subtarget(const Triple &TT, const std::string &CPU,
167                                   const std::string &FS,
168                                   const TargetMachine &TM, bool LittleEndian)
169    : AArch64GenSubtargetInfo(TT, CPU, FS),
170      ReserveXRegister(AArch64::GPR64commonRegClass.getNumRegs()),
171      CustomCallSavedXRegs(AArch64::GPR64commonRegClass.getNumRegs()),
172      IsLittle(LittleEndian),
173      TargetTriple(TT), FrameLowering(),
174      InstrInfo(initializeSubtargetDependencies(FS, CPU)), TSInfo(),
175      TLInfo(TM, *this) {
176  if (AArch64::isX18ReservedByDefault(TT))
177    ReserveXRegister.set(18);
178
179  CallLoweringInfo.reset(new AArch64CallLowering(*getTargetLowering()));
180  Legalizer.reset(new AArch64LegalizerInfo(*this));
181
182  auto *RBI = new AArch64RegisterBankInfo(*getRegisterInfo());
183
184  // FIXME: At this point, we can't rely on Subtarget having RBI.
185  // It's awkward to mix passing RBI and the Subtarget; should we pass
186  // TII/TRI as well?
187  InstSelector.reset(createAArch64InstructionSelector(
188      *static_cast<const AArch64TargetMachine *>(&TM), *this, *RBI));
189
190  RegBankInfo.reset(RBI);
191}
192
193const CallLowering *AArch64Subtarget::getCallLowering() const {
194  return CallLoweringInfo.get();
195}
196
197InstructionSelector *AArch64Subtarget::getInstructionSelector() const {
198  return InstSelector.get();
199}
200
201const LegalizerInfo *AArch64Subtarget::getLegalizerInfo() const {
202  return Legalizer.get();
203}
204
205const RegisterBankInfo *AArch64Subtarget::getRegBankInfo() const {
206  return RegBankInfo.get();
207}
208
209/// Find the target operand flags that describe how a global value should be
210/// referenced for the current subtarget.
211unsigned
212AArch64Subtarget::ClassifyGlobalReference(const GlobalValue *GV,
213                                          const TargetMachine &TM) const {
214  // MachO large model always goes via a GOT, simply to get a single 8-byte
215  // absolute relocation on all global addresses.
216  if (TM.getCodeModel() == CodeModel::Large && isTargetMachO())
217    return AArch64II::MO_GOT;
218
219  if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV)) {
220    if (GV->hasDLLImportStorageClass())
221      return AArch64II::MO_GOT | AArch64II::MO_DLLIMPORT;
222    if (getTargetTriple().isOSWindows())
223      return AArch64II::MO_GOT | AArch64II::MO_COFFSTUB;
224    return AArch64II::MO_GOT;
225  }
226
227  // The small code model's direct accesses use ADRP, which cannot
228  // necessarily produce the value 0 (if the code is above 4GB).
229  // Same for the tiny code model, where we have a pc relative LDR.
230  if ((useSmallAddressing() || TM.getCodeModel() == CodeModel::Tiny) &&
231      GV->hasExternalWeakLinkage())
232    return AArch64II::MO_GOT;
233
234  // References to tagged globals are marked with MO_NC | MO_TAGGED to indicate
235  // that their nominal addresses are tagged and outside of the code model. In
236  // AArch64ExpandPseudo::expandMI we emit an additional instruction to set the
237  // tag if necessary based on MO_TAGGED.
238  if (AllowTaggedGlobals && !isa<FunctionType>(GV->getValueType()))
239    return AArch64II::MO_NC | AArch64II::MO_TAGGED;
240
241  return AArch64II::MO_NO_FLAG;
242}
243
244unsigned AArch64Subtarget::classifyGlobalFunctionReference(
245    const GlobalValue *GV, const TargetMachine &TM) const {
246  // MachO large model always goes via a GOT, because we don't have the
247  // relocations available to do anything else..
248  if (TM.getCodeModel() == CodeModel::Large && isTargetMachO() &&
249      !GV->hasInternalLinkage())
250    return AArch64II::MO_GOT;
251
252  // NonLazyBind goes via GOT unless we know it's available locally.
253  auto *F = dyn_cast<Function>(GV);
254  if (UseNonLazyBind && F && F->hasFnAttribute(Attribute::NonLazyBind) &&
255      !TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
256    return AArch64II::MO_GOT;
257
258  // Use ClassifyGlobalReference for setting MO_DLLIMPORT/MO_COFFSTUB.
259  if (getTargetTriple().isOSWindows())
260    return ClassifyGlobalReference(GV, TM);
261
262  return AArch64II::MO_NO_FLAG;
263}
264
265void AArch64Subtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
266                                           unsigned NumRegionInstrs) const {
267  // LNT run (at least on Cyclone) showed reasonably significant gains for
268  // bi-directional scheduling. 253.perlbmk.
269  Policy.OnlyTopDown = false;
270  Policy.OnlyBottomUp = false;
271  // Enabling or Disabling the latency heuristic is a close call: It seems to
272  // help nearly no benchmark on out-of-order architectures, on the other hand
273  // it regresses register pressure on a few benchmarking.
274  Policy.DisableLatencyHeuristic = DisableLatencySchedHeuristic;
275}
276
277bool AArch64Subtarget::enableEarlyIfConversion() const {
278  return EnableEarlyIfConvert;
279}
280
281bool AArch64Subtarget::supportsAddressTopByteIgnored() const {
282  if (!UseAddressTopByteIgnored)
283    return false;
284
285  if (TargetTriple.isiOS()) {
286    unsigned Major, Minor, Micro;
287    TargetTriple.getiOSVersion(Major, Minor, Micro);
288    return Major >= 8;
289  }
290
291  return false;
292}
293
294std::unique_ptr<PBQPRAConstraint>
295AArch64Subtarget::getCustomPBQPConstraints() const {
296  return balanceFPOps() ? std::make_unique<A57ChainingConstraint>() : nullptr;
297}
298
299void AArch64Subtarget::mirFileLoaded(MachineFunction &MF) const {
300  // We usually compute max call frame size after ISel. Do the computation now
301  // if the .mir file didn't specify it. Note that this will probably give you
302  // bogus values after PEI has eliminated the callframe setup/destroy pseudo
303  // instructions, specify explicitly if you need it to be correct.
304  MachineFrameInfo &MFI = MF.getFrameInfo();
305  if (!MFI.isMaxCallFrameSizeComputed())
306    MFI.computeMaxCallFrameSize(MF);
307}
308