X86TargetTransformInfo.h revision 360784
1//===-- X86TargetTransformInfo.h - X86 specific TTI -------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file a TargetTransformInfo::Concept conforming object specific to the
10/// X86 target machine. It uses the target's detailed information to
11/// provide more precise answers to certain TTI queries, while letting the
12/// target independent and default TTI implementations handle the rest.
13///
14//===----------------------------------------------------------------------===//
15
16#ifndef LLVM_LIB_TARGET_X86_X86TARGETTRANSFORMINFO_H
17#define LLVM_LIB_TARGET_X86_X86TARGETTRANSFORMINFO_H
18
19#include "X86.h"
20#include "X86TargetMachine.h"
21#include "llvm/Analysis/TargetTransformInfo.h"
22#include "llvm/CodeGen/BasicTTIImpl.h"
23#include "llvm/CodeGen/TargetLowering.h"
24
25namespace llvm {
26
27class X86TTIImpl : public BasicTTIImplBase<X86TTIImpl> {
28  typedef BasicTTIImplBase<X86TTIImpl> BaseT;
29  typedef TargetTransformInfo TTI;
30  friend BaseT;
31
32  const X86Subtarget *ST;
33  const X86TargetLowering *TLI;
34
35  const X86Subtarget *getST() const { return ST; }
36  const X86TargetLowering *getTLI() const { return TLI; }
37
38  const FeatureBitset InlineFeatureIgnoreList = {
39      // This indicates the CPU is 64 bit capable not that we are in 64-bit
40      // mode.
41      X86::Feature64Bit,
42
43      // These features don't have any intrinsics or ABI effect.
44      X86::FeatureNOPL,
45      X86::FeatureCMPXCHG16B,
46      X86::FeatureLAHFSAHF,
47
48      // Codegen control options.
49      X86::FeatureFast11ByteNOP,
50      X86::FeatureFast15ByteNOP,
51      X86::FeatureFastBEXTR,
52      X86::FeatureFastHorizontalOps,
53      X86::FeatureFastLZCNT,
54      X86::FeatureFastScalarFSQRT,
55      X86::FeatureFastSHLDRotate,
56      X86::FeatureFastScalarShiftMasks,
57      X86::FeatureFastVectorShiftMasks,
58      X86::FeatureFastVariableShuffle,
59      X86::FeatureFastVectorFSQRT,
60      X86::FeatureLEAForSP,
61      X86::FeatureLEAUsesAG,
62      X86::FeatureLZCNTFalseDeps,
63      X86::FeatureBranchFusion,
64      X86::FeatureMacroFusion,
65      X86::FeatureMergeToThreeWayBranch,
66      X86::FeaturePadShortFunctions,
67      X86::FeaturePOPCNTFalseDeps,
68      X86::FeatureSSEUnalignedMem,
69      X86::FeatureSlow3OpsLEA,
70      X86::FeatureSlowDivide32,
71      X86::FeatureSlowDivide64,
72      X86::FeatureSlowIncDec,
73      X86::FeatureSlowLEA,
74      X86::FeatureSlowPMADDWD,
75      X86::FeatureSlowPMULLD,
76      X86::FeatureSlowSHLD,
77      X86::FeatureSlowTwoMemOps,
78      X86::FeatureSlowUAMem16,
79      X86::FeaturePreferMaskRegisters,
80      X86::FeatureInsertVZEROUPPER,
81      X86::FeatureUseGLMDivSqrtCosts,
82
83      // Perf-tuning flags.
84      X86::FeatureHasFastGather,
85      X86::FeatureSlowUAMem32,
86
87      // Based on whether user set the -mprefer-vector-width command line.
88      X86::FeaturePrefer128Bit,
89      X86::FeaturePrefer256Bit,
90
91      // CPU name enums. These just follow CPU string.
92      X86::ProcIntelAtom,
93      X86::ProcIntelSLM,
94  };
95
96public:
97  explicit X86TTIImpl(const X86TargetMachine *TM, const Function &F)
98      : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
99        TLI(ST->getTargetLowering()) {}
100
101  /// \name Scalar TTI Implementations
102  /// @{
103  TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
104
105  /// @}
106
107  /// \name Cache TTI Implementation
108  /// @{
109  llvm::Optional<unsigned> getCacheSize(
110    TargetTransformInfo::CacheLevel Level) const;
111  llvm::Optional<unsigned> getCacheAssociativity(
112    TargetTransformInfo::CacheLevel Level) const;
113  /// @}
114
115  /// \name Vector TTI Implementations
116  /// @{
117
118  unsigned getNumberOfRegisters(unsigned ClassID) const;
119  unsigned getRegisterBitWidth(bool Vector) const;
120  unsigned getLoadStoreVecRegBitWidth(unsigned AS) const;
121  unsigned getMaxInterleaveFactor(unsigned VF);
122  int getArithmeticInstrCost(
123      unsigned Opcode, Type *Ty,
124      TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
125      TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
126      TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
127      TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
128      ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
129      const Instruction *CxtI = nullptr);
130  int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp);
131  int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
132                       const Instruction *I = nullptr);
133  int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
134                         const Instruction *I = nullptr);
135  int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
136  int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
137                      unsigned AddressSpace, const Instruction *I = nullptr);
138  int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
139                            unsigned AddressSpace);
140  int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr,
141                             bool VariableMask, unsigned Alignment);
142  int getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE,
143                                const SCEV *Ptr);
144
145  unsigned getAtomicMemIntrinsicMaxElementSize() const;
146
147  int getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
148                            ArrayRef<Type *> Tys, FastMathFlags FMF,
149                            unsigned ScalarizationCostPassed = UINT_MAX);
150  int getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
151                            ArrayRef<Value *> Args, FastMathFlags FMF,
152                            unsigned VF = 1);
153
154  int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
155                                 bool IsPairwiseForm);
156
157  int getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwiseForm,
158                             bool IsUnsigned);
159
160  int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
161                                 unsigned Factor, ArrayRef<unsigned> Indices,
162                                 unsigned Alignment, unsigned AddressSpace,
163                                 bool UseMaskForCond = false,
164                                 bool UseMaskForGaps = false);
165  int getInterleavedMemoryOpCostAVX512(unsigned Opcode, Type *VecTy,
166                                 unsigned Factor, ArrayRef<unsigned> Indices,
167                                 unsigned Alignment, unsigned AddressSpace,
168                                 bool UseMaskForCond = false,
169                                 bool UseMaskForGaps = false);
170  int getInterleavedMemoryOpCostAVX2(unsigned Opcode, Type *VecTy,
171                                 unsigned Factor, ArrayRef<unsigned> Indices,
172                                 unsigned Alignment, unsigned AddressSpace,
173                                 bool UseMaskForCond = false,
174                                 bool UseMaskForGaps = false);
175
176  int getIntImmCost(int64_t);
177
178  int getIntImmCost(const APInt &Imm, Type *Ty);
179
180  unsigned getUserCost(const User *U, ArrayRef<const Value *> Operands);
181
182  int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty);
183  int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
184                          Type *Ty);
185  bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
186                     TargetTransformInfo::LSRCost &C2);
187  bool canMacroFuseCmp();
188  bool isLegalMaskedLoad(Type *DataType, MaybeAlign Alignment);
189  bool isLegalMaskedStore(Type *DataType, MaybeAlign Alignment);
190  bool isLegalNTLoad(Type *DataType, Align Alignment);
191  bool isLegalNTStore(Type *DataType, Align Alignment);
192  bool isLegalMaskedGather(Type *DataType, MaybeAlign Alignment);
193  bool isLegalMaskedScatter(Type *DataType, MaybeAlign Alignment);
194  bool isLegalMaskedExpandLoad(Type *DataType);
195  bool isLegalMaskedCompressStore(Type *DataType);
196  bool hasDivRemOp(Type *DataType, bool IsSigned);
197  bool isFCmpOrdCheaperThanFCmpZero(Type *Ty);
198  bool areInlineCompatible(const Function *Caller,
199                           const Function *Callee) const;
200  bool areFunctionArgsABICompatible(const Function *Caller,
201                                    const Function *Callee,
202                                    SmallPtrSetImpl<Argument *> &Args) const;
203  TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
204                                                    bool IsZeroCmp) const;
205  bool enableInterleavedAccessVectorization();
206private:
207  int getGSScalarCost(unsigned Opcode, Type *DataTy, bool VariableMask,
208                      unsigned Alignment, unsigned AddressSpace);
209  int getGSVectorCost(unsigned Opcode, Type *DataTy, Value *Ptr,
210                      unsigned Alignment, unsigned AddressSpace);
211
212  /// @}
213};
214
215} // end namespace llvm
216
217#endif
218